From 4a2e634ed8e6277d9ceb8e04b764ca59cf8e4a04 Mon Sep 17 00:00:00 2001 From: doko Date: Thu, 24 Jul 2014 21:58:23 +0000 Subject: * Update to SVN 20140724 (r212995) from the gcc-4_9-branch. * Update the Linaro support to the 4.9-2014.07 release. git-svn-id: svn://svn.debian.org/svn/gcccvs/branches/sid/gcc-4.9@7526 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca --- debian/changelog | 5 +- debian/patches/gcc-linaro-doc.diff | 121 +- debian/patches/gcc-linaro.diff | 36844 +++++++++++++++++++++-------- debian/patches/gcc-multiarch-linaro.diff | 147 + debian/patches/svn-updates.diff | 1951 +- debian/rules.patch | 7 +- 6 files changed, 29225 insertions(+), 9850 deletions(-) create mode 100644 debian/patches/gcc-multiarch-linaro.diff (limited to 'debian') diff --git a/debian/changelog b/debian/changelog index f0906d6..47c7ab6 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,11 +1,14 @@ gcc-4.9 (4.9.1-2) UNRELEASED; urgency=medium + * Update to SVN 20140724 (r212995) from the gcc-4_9-branch. + * Fix installing test logs and summaries. * Warn about ppc ELFv2 ABI issues, which will change in GCC 4.10. * Don't gzip the xz compressed testsuite logs and summaries. * Build libphobos on armel and armhf. Closes: #755390. + * Update the Linaro support to the 4.9-2014.07 release. - -- Matthias Klose Thu, 17 Jul 2014 14:56:55 +0200 + -- Matthias Klose Thu, 24 Jul 2014 16:47:07 +0200 gcc-4.9 (4.9.1-1) unstable; urgency=medium diff --git a/debian/patches/gcc-linaro-doc.diff b/debian/patches/gcc-linaro-doc.diff index f4599a8..ee2ed39 100644 --- a/debian/patches/gcc-linaro-doc.diff +++ b/debian/patches/gcc-linaro-doc.diff @@ -1,2 +1,121 @@ -# DP: Changes for the Linaro 4.9-2014.06 release (documentation). +# DP: Changes for the Linaro 4.9-2014.07 release (documentation). +--- a/src/gcc/doc/extend.texi ++++ b/src/gcc/doc/extend.texi +@@ -9109,6 +9109,8 @@ + instructions, but allow the compiler to schedule those calls. + + @menu ++* AArch64 Built-in Functions:: ++* AArch64 intrinsics:: + * Alpha Built-in Functions:: + * Altera Nios II Built-in Functions:: + * ARC Built-in Functions:: +@@ -9116,6 +9118,7 @@ + * ARM iWMMXt Built-in Functions:: + * ARM NEON Intrinsics:: + * ARM ACLE Intrinsics:: ++* ARM Floating Point Status and Control Intrinsics:: + * AVR Built-in Functions:: + * Blackfin Built-in Functions:: + * FR-V Built-in Functions:: +@@ -9141,6 +9144,23 @@ + * TILEPro Built-in Functions:: + @end menu + ++@node AArch64 Built-in Functions ++@subsection AArch64 Built-in Functions ++ ++These built-in functions are available for the AArch64 family of ++processors. ++@smallexample ++unsigned int __builtin_aarch64_get_fpcr () ++void __builtin_aarch64_set_fpcr (unsigned int) ++unsigned int __builtin_aarch64_get_fpsr () ++void __builtin_aarch64_set_fpsr (unsigned int) ++@end smallexample ++ ++@node AArch64 intrinsics ++@subsection ACLE Intrinsics for AArch64 ++ ++@include aarch64-acle-intrinsics.texi ++ + @node Alpha Built-in Functions + @subsection Alpha Built-in Functions + +@@ -9917,6 +9937,17 @@ + + @include arm-acle-intrinsics.texi + ++@node ARM Floating Point Status and Control Intrinsics ++@subsection ARM Floating Point Status and Control Intrinsics ++ ++These built-in functions are available for the ARM family of ++processors with floating-point unit. ++ ++@smallexample ++unsigned int __builtin_arm_get_fpscr () ++void __builtin_arm_set_fpscr (unsigned int) ++@end smallexample ++ + @node AVR Built-in Functions + @subsection AVR Built-in Functions + +--- a/src/gcc/doc/aarch64-acle-intrinsics.texi ++++ b/src/gcc/doc/aarch64-acle-intrinsics.texi +@@ -0,0 +1,55 @@ ++@c Copyright (C) 2014 Free Software Foundation, Inc. ++@c This is part of the GCC manual. ++@c For copying conditions, see the file gcc.texi. ++ ++@subsubsection CRC32 intrinsics ++ ++These intrinsics are available when the CRC32 architecture extension is ++specified, e.g. when the @option{-march=armv8-a+crc} switch is used, or when ++the target processor specified with @option{-mcpu} supports it. ++ ++@itemize @bullet ++@item uint32_t __crc32b (uint32_t, uint8_t) ++@*@emph{Form of expected instruction(s):} @code{crc32b @var{w0}, @var{w1}, @var{w2}} ++@end itemize ++ ++ ++@itemize @bullet ++@item uint32_t __crc32h (uint32_t, uint16_t) ++@*@emph{Form of expected instruction(s):} @code{crc32h @var{w0}, @var{w1}, @var{w2}} ++@end itemize ++ ++ ++@itemize @bullet ++@item uint32_t __crc32w (uint32_t, uint32_t) ++@*@emph{Form of expected instruction(s):} @code{crc32w @var{w0}, @var{w1}, @var{w2}} ++@end itemize ++ ++ ++@itemize @bullet ++@item uint32_t __crc32d (uint32_t, uint64_t) ++@*@emph{Form of expected instruction(s):} @code{crc32x @var{w0}, @var{w1}, @var{x2}} ++@end itemize ++ ++@itemize @bullet ++@item uint32_t __crc32cb (uint32_t, uint8_t) ++@*@emph{Form of expected instruction(s):} @code{crc32cb @var{w0}, @var{w1}, @var{w2}} ++@end itemize ++ ++ ++@itemize @bullet ++@item uint32_t __crc32ch (uint32_t, uint16_t) ++@*@emph{Form of expected instruction(s):} @code{crc32ch @var{w0}, @var{w1}, @var{w2}} ++@end itemize ++ ++ ++@itemize @bullet ++@item uint32_t __crc32cw (uint32_t, uint32_t) ++@*@emph{Form of expected instruction(s):} @code{crc32cw @var{w0}, @var{w1}, @var{w2}} ++@end itemize ++ ++ ++@itemize @bullet ++@item uint32_t __crc32cd (uint32_t, uint64_t) ++@*@emph{Form of expected instruction(s):} @code{crc32cx @var{w0}, @var{w1}, @var{x2}} ++@end itemize diff --git a/debian/patches/gcc-linaro.diff b/debian/patches/gcc-linaro.diff index 1686fa4..28457c5 100644 --- a/debian/patches/gcc-linaro.diff +++ b/debian/patches/gcc-linaro.diff @@ -1,12 +1,16 @@ -# DP: Changes for the Linaro 4.9-2014.06 release. +# DP: Changes for the Linaro 4.9-2014.07 release. -LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ - svn://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_9-branch@212009 \ +LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@212635 \ + svn://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_9-branch@212977 \ | filterdiff --remove-timestamps --addoldprefix=a/src/ --addnewprefix=b/src/ --- a/src/libitm/ChangeLog.linaro +++ b/src/libitm/ChangeLog.linaro -@@ -0,0 +1,24 @@ +@@ -0,0 +1,28 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -187,7 +191,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ +} // namespace GTM --- a/src/libgomp/ChangeLog.linaro +++ b/src/libgomp/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -205,7 +213,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/libquadmath/ChangeLog.linaro +++ b/src/libquadmath/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -223,7 +235,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/libsanitizer/ChangeLog.linaro +++ b/src/libsanitizer/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -241,7 +257,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/zlib/ChangeLog.linaro +++ b/src/zlib/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -259,7 +279,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/libstdc++-v3/ChangeLog.linaro +++ b/src/libstdc++-v3/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -277,7 +301,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/intl/ChangeLog.linaro +++ b/src/intl/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -295,7 +323,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/ChangeLog.linaro +++ b/src/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -313,7 +345,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/boehm-gc/ChangeLog.linaro +++ b/src/boehm-gc/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -331,7 +367,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/include/ChangeLog.linaro +++ b/src/include/ChangeLog.linaro -@@ -0,0 +1,22 @@ +@@ -0,0 +1,26 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -388,7 +428,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ do { \ --- a/src/libiberty/ChangeLog.linaro +++ b/src/libiberty/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -406,7 +450,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/lto-plugin/ChangeLog.linaro +++ b/src/lto-plugin/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -424,7 +472,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/contrib/regression/ChangeLog.linaro +++ b/src/contrib/regression/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -442,7 +494,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/contrib/ChangeLog.linaro +++ b/src/contrib/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -460,7 +516,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/contrib/reghunt/ChangeLog.linaro +++ b/src/contrib/reghunt/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -478,7 +538,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/libatomic/ChangeLog.linaro +++ b/src/libatomic/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -496,7 +560,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/config/ChangeLog.linaro +++ b/src/config/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -514,7 +582,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/libbacktrace/ChangeLog.linaro +++ b/src/libbacktrace/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -532,7 +604,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/libjava/libltdl/ChangeLog.linaro +++ b/src/libjava/libltdl/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -550,7 +626,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/libjava/ChangeLog.linaro +++ b/src/libjava/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -568,7 +648,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/libjava/classpath/ChangeLog.linaro +++ b/src/libjava/classpath/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -586,7 +670,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/gnattools/ChangeLog.linaro +++ b/src/gnattools/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -604,7 +692,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/maintainer-scripts/ChangeLog.linaro +++ b/src/maintainer-scripts/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -622,7 +714,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/libgcc/ChangeLog.linaro +++ b/src/libgcc/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -640,7 +736,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/libgcc/config/libbid/ChangeLog.linaro +++ b/src/libgcc/config/libbid/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -658,7 +758,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/libdecnumber/ChangeLog.linaro +++ b/src/libdecnumber/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -677,10 +781,14 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ --- a/src/gcc/LINARO-VERSION +++ b/src/gcc/LINARO-VERSION @@ -0,0 +1 @@ -+4.9-2014.06-1 ++4.9-2014.07-1~dev --- a/src/gcc/c-family/ChangeLog.linaro +++ b/src/gcc/c-family/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -698,7 +806,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/gcc/java/ChangeLog.linaro +++ b/src/gcc/java/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -714,9 +826,268 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. +--- a/src/gcc/c/c-parser.c ++++ b/src/gcc/c/c-parser.c +@@ -4210,7 +4210,8 @@ + init.original_type = NULL; + c_parser_error (parser, "expected identifier"); + c_parser_skip_until_found (parser, CPP_COMMA, NULL); +- process_init_element (init, false, braced_init_obstack); ++ process_init_element (input_location, init, false, ++ braced_init_obstack); + return; + } + } +@@ -4342,7 +4343,8 @@ + init.original_type = NULL; + c_parser_error (parser, "expected %<=%>"); + c_parser_skip_until_found (parser, CPP_COMMA, NULL); +- process_init_element (init, false, braced_init_obstack); ++ process_init_element (input_location, init, false, ++ braced_init_obstack); + return; + } + } +@@ -4363,11 +4365,12 @@ + { + struct c_expr init; + gcc_assert (!after || c_dialect_objc ()); ++ location_t loc = c_parser_peek_token (parser)->location; ++ + if (c_parser_next_token_is (parser, CPP_OPEN_BRACE) && !after) + init = c_parser_braced_init (parser, NULL_TREE, true); + else + { +- location_t loc = c_parser_peek_token (parser)->location; + init = c_parser_expr_no_commas (parser, after); + if (init.value != NULL_TREE + && TREE_CODE (init.value) != STRING_CST +@@ -4374,7 +4377,7 @@ + && TREE_CODE (init.value) != COMPOUND_LITERAL_EXPR) + init = convert_lvalue_to_rvalue (loc, init, true, true); + } +- process_init_element (init, false, braced_init_obstack); ++ process_init_element (loc, init, false, braced_init_obstack); + } + + /* Parse a compound statement (possibly a function body) (C90 6.6.2, +--- a/src/gcc/c/c-typeck.c ++++ b/src/gcc/c/c-typeck.c +@@ -102,8 +102,8 @@ + static char *print_spelling (char *); + static void warning_init (int, const char *); + static tree digest_init (location_t, tree, tree, tree, bool, bool, int); +-static void output_init_element (tree, tree, bool, tree, tree, int, bool, +- struct obstack *); ++static void output_init_element (location_t, tree, tree, bool, tree, tree, int, ++ bool, struct obstack *); + static void output_pending_init_elements (int, struct obstack *); + static int set_designator (int, struct obstack *); + static void push_range_stack (tree, struct obstack *); +@@ -7183,13 +7183,15 @@ + if ((TREE_CODE (constructor_type) == RECORD_TYPE + || TREE_CODE (constructor_type) == UNION_TYPE) + && constructor_fields == 0) +- process_init_element (pop_init_level (1, braced_init_obstack), ++ process_init_element (input_location, ++ pop_init_level (1, braced_init_obstack), + true, braced_init_obstack); + else if (TREE_CODE (constructor_type) == ARRAY_TYPE + && constructor_max_index + && tree_int_cst_lt (constructor_max_index, + constructor_index)) +- process_init_element (pop_init_level (1, braced_init_obstack), ++ process_init_element (input_location, ++ pop_init_level (1, braced_init_obstack), + true, braced_init_obstack); + else + break; +@@ -7389,10 +7391,9 @@ + /* When we come to an explicit close brace, + pop any inner levels that didn't have explicit braces. */ + while (constructor_stack->implicit) +- { +- process_init_element (pop_init_level (1, braced_init_obstack), +- true, braced_init_obstack); +- } ++ process_init_element (input_location, ++ pop_init_level (1, braced_init_obstack), ++ true, braced_init_obstack); + gcc_assert (!constructor_range_stack); + } + +@@ -7570,10 +7571,9 @@ + /* Designator list starts at the level of closest explicit + braces. */ + while (constructor_stack->implicit) +- { +- process_init_element (pop_init_level (1, braced_init_obstack), +- true, braced_init_obstack); +- } ++ process_init_element (input_location, ++ pop_init_level (1, braced_init_obstack), ++ true, braced_init_obstack); + constructor_designated = 1; + return 0; + } +@@ -8193,9 +8193,9 @@ + existing initializer. */ + + static void +-output_init_element (tree value, tree origtype, bool strict_string, tree type, +- tree field, int pending, bool implicit, +- struct obstack * braced_init_obstack) ++output_init_element (location_t loc, tree value, tree origtype, ++ bool strict_string, tree type, tree field, int pending, ++ bool implicit, struct obstack * braced_init_obstack) + { + tree semantic_type = NULL_TREE; + bool maybe_const = true; +@@ -8293,8 +8293,8 @@ + + if (semantic_type) + value = build1 (EXCESS_PRECISION_EXPR, semantic_type, value); +- value = digest_init (input_location, type, value, origtype, npc, +- strict_string, require_constant_value); ++ value = digest_init (loc, type, value, origtype, npc, strict_string, ++ require_constant_value); + if (value == error_mark_node) + { + constructor_erroneous = 1; +@@ -8421,8 +8421,8 @@ + { + if (tree_int_cst_equal (elt->purpose, + constructor_unfilled_index)) +- output_init_element (elt->value, elt->origtype, true, +- TREE_TYPE (constructor_type), ++ output_init_element (input_location, elt->value, elt->origtype, ++ true, TREE_TYPE (constructor_type), + constructor_unfilled_index, 0, false, + braced_init_obstack); + else if (tree_int_cst_lt (constructor_unfilled_index, +@@ -8476,8 +8476,8 @@ + if (tree_int_cst_equal (elt_bitpos, ctor_unfilled_bitpos)) + { + constructor_unfilled_fields = elt->purpose; +- output_init_element (elt->value, elt->origtype, true, +- TREE_TYPE (elt->purpose), ++ output_init_element (input_location, elt->value, elt->origtype, ++ true, TREE_TYPE (elt->purpose), + elt->purpose, 0, false, + braced_init_obstack); + } +@@ -8550,7 +8550,7 @@ + existing initializer. */ + + void +-process_init_element (struct c_expr value, bool implicit, ++process_init_element (location_t loc, struct c_expr value, bool implicit, + struct obstack * braced_init_obstack) + { + tree orig_value = value.value; +@@ -8594,7 +8594,7 @@ + if ((TREE_CODE (constructor_type) == RECORD_TYPE + || TREE_CODE (constructor_type) == UNION_TYPE) + && constructor_fields == 0) +- process_init_element (pop_init_level (1, braced_init_obstack), ++ process_init_element (loc, pop_init_level (1, braced_init_obstack), + true, braced_init_obstack); + else if ((TREE_CODE (constructor_type) == ARRAY_TYPE + || TREE_CODE (constructor_type) == VECTOR_TYPE) +@@ -8601,7 +8601,7 @@ + && constructor_max_index + && tree_int_cst_lt (constructor_max_index, + constructor_index)) +- process_init_element (pop_init_level (1, braced_init_obstack), ++ process_init_element (loc, pop_init_level (1, braced_init_obstack), + true, braced_init_obstack); + else + break; +@@ -8679,7 +8679,7 @@ + if (value.value) + { + push_member_name (constructor_fields); +- output_init_element (value.value, value.original_type, ++ output_init_element (loc, value.value, value.original_type, + strict_string, fieldtype, + constructor_fields, 1, implicit, + braced_init_obstack); +@@ -8771,7 +8771,7 @@ + if (value.value) + { + push_member_name (constructor_fields); +- output_init_element (value.value, value.original_type, ++ output_init_element (loc, value.value, value.original_type, + strict_string, fieldtype, + constructor_fields, 1, implicit, + braced_init_obstack); +@@ -8823,7 +8823,7 @@ + if (value.value) + { + push_array_bounds (tree_to_uhwi (constructor_index)); +- output_init_element (value.value, value.original_type, ++ output_init_element (loc, value.value, value.original_type, + strict_string, elttype, + constructor_index, 1, implicit, + braced_init_obstack); +@@ -8858,7 +8858,7 @@ + { + if (TREE_CODE (value.value) == VECTOR_CST) + elttype = TYPE_MAIN_VARIANT (constructor_type); +- output_init_element (value.value, value.original_type, ++ output_init_element (loc, value.value, value.original_type, + strict_string, elttype, + constructor_index, 1, implicit, + braced_init_obstack); +@@ -8887,7 +8887,7 @@ + else + { + if (value.value) +- output_init_element (value.value, value.original_type, ++ output_init_element (loc, value.value, value.original_type, + strict_string, constructor_type, + NULL_TREE, 1, implicit, + braced_init_obstack); +@@ -8906,8 +8906,8 @@ + while (constructor_stack != range_stack->stack) + { + gcc_assert (constructor_stack->implicit); +- process_init_element (pop_init_level (1, +- braced_init_obstack), ++ process_init_element (loc, ++ pop_init_level (1, braced_init_obstack), + true, braced_init_obstack); + } + for (p = range_stack; +@@ -8915,7 +8915,8 @@ + p = p->prev) + { + gcc_assert (constructor_stack->implicit); +- process_init_element (pop_init_level (1, braced_init_obstack), ++ process_init_element (loc, ++ pop_init_level (1, braced_init_obstack), + true, braced_init_obstack); + } + +--- a/src/gcc/c/c-tree.h ++++ b/src/gcc/c/c-tree.h +@@ -612,7 +612,8 @@ + extern struct c_expr pop_init_level (int, struct obstack *); + extern void set_init_index (tree, tree, struct obstack *); + extern void set_init_label (tree, struct obstack *); +-extern void process_init_element (struct c_expr, bool, struct obstack *); ++extern void process_init_element (location_t, struct c_expr, bool, ++ struct obstack *); + extern tree build_compound_literal (location_t, tree, tree, bool); + extern void check_compound_literal_type (location_t, struct c_type_name *); + extern tree c_start_case (location_t, location_t, tree); --- a/src/gcc/c/ChangeLog.linaro +++ b/src/gcc/c/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -773,7 +1144,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ #if HAVE_DLFCN_H --- a/src/gcc/objc/ChangeLog.linaro +++ b/src/gcc/objc/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -791,7 +1166,632 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/gcc/ChangeLog.linaro +++ b/src/gcc/ChangeLog.linaro -@@ -0,0 +1,836 @@ +@@ -0,0 +1,2648 @@ ++2014-07-20 Yvan Roux ++ ++ Revert: ++ 2014-07-16 Yvan Roux ++ ++ Backport from trunk r211129. ++ 2014-06-02 Ramana Radhakrishnan ++ ++ PR target/61154 ++ * config/arm/arm.h (TARGET_SUPPORTS_WIDE_INT): Define. ++ * config/arm/arm.md (mov64 splitter): Replace const_double_operand ++ with immediate_operand. ++ ++2014-07-19 Yvan Roux ++ ++ * LINARO-VERSION: Bump version. ++ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ * LINARO-VERSION: Update. ++ ++2014-07-17 Yvan Roux ++ ++ Backport from trunk r211887, r211899. ++ 2014-06-23 James Greenhalgh ++ ++ * config/aarch64/aarch64.md (addsi3_aarch64): Set "simd" attr to ++ "yes" where needed. ++ ++ 2014-06-23 James Greenhalgh ++ ++ * config/aarch64/aarch64.md (*addsi3_aarch64): Add alternative in ++ vector registers. ++ ++2014-07-17 Yvan Roux ++ ++ Backport from trunk r211440. ++ 2014-06-11 Kyrylo Tkachov ++ ++ * config.gcc (aarch64*-*-*): Add arm_acle.h to extra headers. ++ * Makefile.in (TEXI_GCC_FILES): Add aarch64-acle-intrinsics.texi to ++ dependencies. ++ * config/aarch64/aarch64-builtins.c (AARCH64_CRC32_BUILTINS): Define. ++ (aarch64_crc_builtin_datum): New struct. ++ (aarch64_crc_builtin_data): New. ++ (aarch64_init_crc32_builtins): New function. ++ (aarch64_init_builtins): Initialise CRC32 builtins when appropriate. ++ (aarch64_crc32_expand_builtin): New. ++ (aarch64_expand_builtin): Add CRC32 builtin expansion case. ++ * config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define ++ __ARM_FEATURE_CRC32 when appropriate. ++ (TARGET_CRC32): Define. ++ * config/aarch64/aarch64.md (UNSPEC_CRC32B, UNSPEC_CRC32H, ++ UNSPEC_CRC32W, UNSPEC_CRC32X, UNSPEC_CRC32CB, UNSPEC_CRC32CH, ++ UNSPEC_CRC32CW, UNSPEC_CRC32CX): New unspec values. ++ (aarch64_): New pattern. ++ * config/aarch64/arm_acle.h: New file. ++ * config/aarch64/iterators.md (CRC): New int iterator. ++ (crc_variant, crc_mode): New int attributes. ++ * doc/aarch64-acle-intrinsics.texi: New file. ++ * doc/extend.texi (aarch64): Document aarch64 ACLE intrinsics. ++ Include aarch64-acle-intrinsics.texi. ++ ++2014-07-17 Yvan Roux ++ ++ Backport from trunk r211174. ++ 2014-06-03 Alan Lawrence ++ ++ * config/aarch64/aarch64-simd.md (aarch64_rev): ++ New pattern. ++ * config/aarch64/aarch64.c (aarch64_evpc_rev): New function. ++ (aarch64_expand_vec_perm_const_1): Add call to aarch64_evpc_rev. ++ * config/aarch64/iterators.md (REVERSE): New iterator. ++ (UNSPEC_REV64, UNSPEC_REV32, UNSPEC_REV16): New enum elements. ++ (rev_op): New int_attribute. ++ * config/aarch64/arm_neon.h (vrev16_p8, vrev16_s8, vrev16_u8, ++ vrev16q_p8, vrev16q_s8, vrev16q_u8, vrev32_p8, vrev32_p16, vrev32_s8, ++ vrev32_s16, vrev32_u8, vrev32_u16, vrev32q_p8, vrev32q_p16, vrev32q_s8, ++ vrev32q_s16, vrev32q_u8, vrev32q_u16, vrev64_f32, vrev64_p8, ++ vrev64_p16, vrev64_s8, vrev64_s16, vrev64_s32, vrev64_u8, vrev64_u16, ++ vrev64_u32, vrev64q_f32, vrev64q_p8, vrev64q_p16, vrev64q_s8, ++ vrev64q_s16, vrev64q_s32, vrev64q_u8, vrev64q_u16, vrev64q_u32): ++ Replace temporary __asm__ with __builtin_shuffle. ++ ++2014-07-17 Yvan Roux ++ ++ Backport from trunk r210216, r210218, r210219. ++ 2014-05-08 Ramana Radhakrishnan ++ ++ * config/arm/arm_neon.h: Update comment. ++ * config/arm/neon-docgen.ml: Delete. ++ * config/arm/neon-gen.ml: Delete. ++ * doc/arm-neon-intrinsics.texi: Update comment. ++ ++ 2014-05-08 Ramana Radhakrishnan ++ ++ * config/arm/arm_neon_builtins.def (vadd, vsub): Only define the v2sf ++ and v4sf versions. ++ (vand, vorr, veor, vorn, vbic): Remove. ++ * config/arm/neon.md (neon_vadd, neon_vsub, neon_vadd_unspec): Adjust ++ iterator. ++ (neon_vsub_unspec): Likewise. ++ (neon_vorr, neon_vand, neon_vbic, neon_veor, neon_vorn): Remove. ++ ++ 2014-05-08 Ramana Radhakrishnan ++ ++ * config/arm/arm_neon.h (vadd_s8): GNU C implementation ++ (vadd_s16): Likewise. ++ (vadd_s32): Likewise. ++ (vadd_f32): Likewise. ++ (vadd_u8): Likewise. ++ (vadd_u16): Likewise. ++ (vadd_u32): Likewise. ++ (vadd_s64): Likewise. ++ (vadd_u64): Likewise. ++ (vaddq_s8): Likewise. ++ (vaddq_s16): Likewise. ++ (vaddq_s32): Likewise. ++ (vaddq_s64): Likewise. ++ (vaddq_f32): Likewise. ++ (vaddq_u8): Likewise. ++ (vaddq_u16): Likewise. ++ (vaddq_u32): Likewise. ++ (vaddq_u64): Likewise. ++ (vmul_s8): Likewise. ++ (vmul_s16): Likewise. ++ (vmul_s32): Likewise. ++ (vmul_f32): Likewise. ++ (vmul_u8): Likewise. ++ (vmul_u16): Likewise. ++ (vmul_u32): Likewise. ++ (vmul_p8): Likewise. ++ (vmulq_s8): Likewise. ++ (vmulq_s16): Likewise. ++ (vmulq_s32): Likewise. ++ (vmulq_f32): Likewise. ++ (vmulq_u8): Likewise. ++ (vmulq_u16): Likewise. ++ (vmulq_u32): Likewise. ++ (vsub_s8): Likewise. ++ (vsub_s16): Likewise. ++ (vsub_s32): Likewise. ++ (vsub_f32): Likewise. ++ (vsub_u8): Likewise. ++ (vsub_u16): Likewise. ++ (vsub_u32): Likewise. ++ (vsub_s64): Likewise. ++ (vsub_u64): Likewise. ++ (vsubq_s8): Likewise. ++ (vsubq_s16): Likewise. ++ (vsubq_s32): Likewise. ++ (vsubq_s64): Likewise. ++ (vsubq_f32): Likewise. ++ (vsubq_u8): Likewise. ++ (vsubq_u16): Likewise. ++ (vsubq_u32): Likewise. ++ (vsubq_u64): Likewise. ++ (vand_s8): Likewise. ++ (vand_s16): Likewise. ++ (vand_s32): Likewise. ++ (vand_u8): Likewise. ++ (vand_u16): Likewise. ++ (vand_u32): Likewise. ++ (vand_s64): Likewise. ++ (vand_u64): Likewise. ++ (vandq_s8): Likewise. ++ (vandq_s16): Likewise. ++ (vandq_s32): Likewise. ++ (vandq_s64): Likewise. ++ (vandq_u8): Likewise. ++ (vandq_u16): Likewise. ++ (vandq_u32): Likewise. ++ (vandq_u64): Likewise. ++ (vorr_s8): Likewise. ++ (vorr_s16): Likewise. ++ (vorr_s32): Likewise. ++ (vorr_u8): Likewise. ++ (vorr_u16): Likewise. ++ (vorr_u32): Likewise. ++ (vorr_s64): Likewise. ++ (vorr_u64): Likewise. ++ (vorrq_s8): Likewise. ++ (vorrq_s16): Likewise. ++ (vorrq_s32): Likewise. ++ (vorrq_s64): Likewise. ++ (vorrq_u8): Likewise. ++ (vorrq_u16): Likewise. ++ (vorrq_u32): Likewise. ++ (vorrq_u64): Likewise. ++ (veor_s8): Likewise. ++ (veor_s16): Likewise. ++ (veor_s32): Likewise. ++ (veor_u8): Likewise. ++ (veor_u16): Likewise. ++ (veor_u32): Likewise. ++ (veor_s64): Likewise. ++ (veor_u64): Likewise. ++ (veorq_s8): Likewise. ++ (veorq_s16): Likewise. ++ (veorq_s32): Likewise. ++ (veorq_s64): Likewise. ++ (veorq_u8): Likewise. ++ (veorq_u16): Likewise. ++ (veorq_u32): Likewise. ++ (veorq_u64): Likewise. ++ (vbic_s8): Likewise. ++ (vbic_s16): Likewise. ++ (vbic_s32): Likewise. ++ (vbic_u8): Likewise. ++ (vbic_u16): Likewise. ++ (vbic_u32): Likewise. ++ (vbic_s64): Likewise. ++ (vbic_u64): Likewise. ++ (vbicq_s8): Likewise. ++ (vbicq_s16): Likewise. ++ (vbicq_s32): Likewise. ++ (vbicq_s64): Likewise. ++ (vbicq_u8): Likewise. ++ (vbicq_u16): Likewise. ++ (vbicq_u32): Likewise. ++ (vbicq_u64): Likewise. ++ (vorn_s8): Likewise. ++ (vorn_s16): Likewise. ++ (vorn_s32): Likewise. ++ (vorn_u8): Likewise. ++ (vorn_u16): Likewise. ++ (vorn_u32): Likewise. ++ (vorn_s64): Likewise. ++ (vorn_u64): Likewise. ++ (vornq_s8): Likewise. ++ (vornq_s16): Likewise. ++ (vornq_s32): Likewise. ++ (vornq_s64): Likewise. ++ (vornq_u8): Likewise. ++ (vornq_u16): Likewise. ++ (vornq_u32): Likewise. ++ (vornq_u64): Likewise. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r210151. ++ 2014-05-07 Alan Lawrence ++ ++ * config/aarch64/arm_neon.h (vtrn1_f32, vtrn1_p8, vtrn1_p16, vtrn1_s8, ++ vtrn1_s16, vtrn1_s32, vtrn1_u8, vtrn1_u16, vtrn1_u32, vtrn1q_f32, ++ vtrn1q_f64, vtrn1q_p8, vtrn1q_p16, vtrn1q_s8, vtrn1q_s16, vtrn1q_s32, ++ vtrn1q_s64, vtrn1q_u8, vtrn1q_u16, vtrn1q_u32, vtrn1q_u64, vtrn2_f32, ++ vtrn2_p8, vtrn2_p16, vtrn2_s8, vtrn2_s16, vtrn2_s32, vtrn2_u8, ++ vtrn2_u16, vtrn2_u32, vtrn2q_f32, vtrn2q_f64, vtrn2q_p8, vtrn2q_p16, ++ vtrn2q_s8, vtrn2q_s16, vtrn2q_s32, vtrn2q_s64, vtrn2q_u8, vtrn2q_u16, ++ vtrn2q_u32, vtrn2q_u64): Replace temporary asm with __builtin_shuffle. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r209794. ++ 2014-04-25 Marek Polacek ++ ++ PR c/60114 ++ * c-parser.c (c_parser_initelt): Pass input_location to ++ process_init_element. ++ (c_parser_initval): Pass loc to process_init_element. ++ * c-tree.h (process_init_element): Adjust declaration. ++ * c-typeck.c (push_init_level): Pass input_location to ++ process_init_element. ++ (pop_init_level): Likewise. ++ (set_designator): Likewise. ++ (output_init_element): Add location_t parameter. Pass loc to ++ digest_init. ++ (output_pending_init_elements): Pass input_location to ++ output_init_element. ++ (process_init_element): Add location_t parameter. Pass loc to ++ output_init_element. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211771. ++ 2014-06-18 Kyrylo Tkachov ++ ++ * genattrtab.c (n_bypassed): New variable. ++ (process_bypasses): Initialise n_bypassed. ++ Count number of bypassed reservations. ++ (make_automaton_attrs): Allocate space for bypassed reservations ++ rather than number of bypasses. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r210861. ++ 2014-05-23 Jiong Wang ++ ++ * config/aarch64/predicates.md (aarch64_call_insn_operand): New ++ predicate. ++ * config/aarch64/constraints.md ("Ucs", "Usf"): New constraints. ++ * config/aarch64/aarch64.md (*sibcall_insn, *sibcall_value_insn): ++ Adjust for tailcalling through registers. ++ * config/aarch64/aarch64.h (enum reg_class): New caller save ++ register class. ++ (REG_CLASS_NAMES): Likewise. ++ (REG_CLASS_CONTENTS): Likewise. ++ * config/aarch64/aarch64.c (aarch64_function_ok_for_sibcall): ++ Allow tailcalling without decls. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211314. ++ 2014-06-06 James Greenhalgh ++ ++ * config/aarch64/aarch64-protos.h (aarch64_expand_movmem): New. ++ * config/aarch64/aarch64.c (aarch64_move_pointer): New. ++ (aarch64_progress_pointer): Likewise. ++ (aarch64_copy_one_part_and_move_pointers): Likewise. ++ (aarch64_expand_movmen): Likewise. ++ * config/aarch64/aarch64.h (MOVE_RATIO): Set low. ++ * config/aarch64/aarch64.md (movmem): New. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211185, 211186. ++ 2014-06-03 Alan Lawrence ++ ++ * gcc/config/aarch64/aarch64-builtins.c ++ (aarch64_types_binop_uus_qualifiers, ++ aarch64_types_shift_to_unsigned_qualifiers, ++ aarch64_types_unsigned_shiftacc_qualifiers): Define. ++ * gcc/config/aarch64/aarch64-simd-builtins.def (uqshl, uqrshl, uqadd, ++ uqsub, usqadd, usra_n, ursra_n, uqshrn_n, uqrshrn_n, usri_n, usli_n, ++ sqshlu_n, uqshl_n): Update qualifiers. ++ * gcc/config/aarch64/arm_neon.h (vqadd_u8, vqadd_u16, vqadd_u32, ++ vqadd_u64, vqaddq_u8, vqaddq_u16, vqaddq_u32, vqaddq_u64, vqsub_u8, ++ vqsub_u16, vqsub_u32, vqsub_u64, vqsubq_u8, vqsubq_u16, vqsubq_u32, ++ vqsubq_u64, vqaddb_u8, vqaddh_u16, vqadds_u32, vqaddd_u64, vqrshl_u8, ++ vqrshl_u16, vqrshl_u32, vqrshl_u64, vqrshlq_u8, vqrshlq_u16, ++ vqrshlq_u32, vqrshlq_u64, vqrshlb_u8, vqrshlh_u16, vqrshls_u32, ++ vqrshld_u64, vqrshrn_n_u16, vqrshrn_n_u32, vqrshrn_n_u64, ++ vqrshrnh_n_u16, vqrshrns_n_u32, vqrshrnd_n_u64, vqshl_u8, vqshl_u16, ++ vqshl_u32, vqshl_u64, vqshlq_u8, vqshlq_u16, vqshlq_u32, vqshlq_u64, ++ vqshlb_u8, vqshlh_u16, vqshls_u32, vqshld_u64, vqshl_n_u8, vqshl_n_u16, ++ vqshl_n_u32, vqshl_n_u64, vqshlq_n_u8, vqshlq_n_u16, vqshlq_n_u32, ++ vqshlq_n_u64, vqshlb_n_u8, vqshlh_n_u16, vqshls_n_u32, vqshld_n_u64, ++ vqshlu_n_s8, vqshlu_n_s16, vqshlu_n_s32, vqshlu_n_s64, vqshluq_n_s8, ++ vqshluq_n_s16, vqshluq_n_s32, vqshluq_n_s64, vqshlub_n_s8, ++ vqshluh_n_s16, vqshlus_n_s32, vqshlud_n_s64, vqshrn_n_u16, ++ vqshrn_n_u32, vqshrn_n_u64, vqshrnh_n_u16, vqshrns_n_u32, ++ vqshrnd_n_u64, vqsubb_u8, vqsubh_u16, vqsubs_u32, vqsubd_u64, ++ vrsra_n_u8, vrsra_n_u16, vrsra_n_u32, vrsra_n_u64, vrsraq_n_u8, ++ vrsraq_n_u16, vrsraq_n_u32, vrsraq_n_u64, vrsrad_n_u64, vsli_n_u8, ++ vsli_n_u16, vsli_n_u32,vsli_n_u64, vsliq_n_u8, vsliq_n_u16, ++ vsliq_n_u32, vsliq_n_u64, vslid_n_u64, vsqadd_u8, vsqadd_u16, ++ vsqadd_u32, vsqadd_u64, vsqaddq_u8, vsqaddq_u16, vsqaddq_u32, ++ vsqaddq_u64, vsqaddb_u8, vsqaddh_u16, vsqadds_u32, vsqaddd_u64, ++ vsra_n_u8, vsra_n_u16, vsra_n_u32, vsra_n_u64, vsraq_n_u8, ++ vsraq_n_u16, vsraq_n_u32, vsraq_n_u64, vsrad_n_u64, vsri_n_u8, ++ vsri_n_u16, vsri_n_u32, vsri_n_u64, vsriq_n_u8, vsriq_n_u16, ++ vsriq_n_u32, vsriq_n_u64, vsrid_n_u64): Remove casts. ++ ++ 2014-06-03 Alan Lawrence ++ ++ * gcc/config/aarch64/aarch64-builtins.c ++ (aarch64_types_binop_ssu_qualifiers): New static data. ++ (TYPES_BINOP_SSU): Define. ++ * gcc/config/aarch64/aarch64-simd-builtins.def (suqadd, ushl, urshl, ++ urshr_n, ushll_n): Use appropriate unsigned qualifiers. 47 ++ * gcc/config/aarch64/arm_neon.h (vrshl_u8, vrshl_u16, vrshl_u32, ++ vrshl_u64, vrshlq_u8, vrshlq_u16, vrshlq_u32, vrshlq_u64, vrshld_u64, ++ vrshr_n_u8, vrshr_n_u16, vrshr_n_u32, vrshr_n_u64, vrshrq_n_u8, 50 ++ vrshrq_n_u16, vrshrq_n_u32, vrshrq_n_u64, vrshrd_n_u64, vshll_n_u8, ++ vshll_n_u16, vshll_n_u32, vuqadd_s8, vuqadd_s16, vuqadd_s32, 52 ++ vuqadd_s64, vuqaddq_s8, vuqaddq_s16, vuqaddq_s32, vuqaddq_s64, 53 ++ vuqaddb_s8, vuqaddh_s16, vuqadds_s32, vuqaddd_s64): Add signedness ++ suffix to builtin function name, remove cast. 55 ++ (vshl_s8, vshl_s16, vshl_s32, vshl_s64, vshl_u8, vshl_u16, vshl_u32, ++ vshl_u64, vshlq_s8, vshlq_s16, vshlq_s32, vshlq_s64, vshlq_u8, 57 ++ vshlq_u16, vshlq_u32, vshlq_u64, vshld_s64, vshld_u64): Remove cast. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211408, 211416. ++ 2014-06-10 Marcus Shawcroft ++ ++ * config/aarch64/aarch64.c (aarch64_save_or_restore_fprs): Fix ++ REG_CFA_RESTORE mode. ++ ++ 2014-06-10 Jiong Wang ++ ++ * config/aarch64/aarch64.c (aarch64_save_or_restore_fprs) ++ (aarch64_save_or_restore_callee_save_registers): Fix layout. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211418. ++ 2014-06-10 Kyrylo Tkachov ++ ++ * config/aarch64/aarch64-simd.md (move_lo_quad_): ++ Change second alternative type to f_mcr. ++ * config/aarch64/aarch64.md (*movsi_aarch64): Change 11th ++ and 12th alternatives' types to f_mcr and f_mrc. ++ (*movdi_aarch64): Same for 12th and 13th alternatives. ++ (*movsf_aarch64): Change 9th alternatives' type to mov_reg. ++ (aarch64_movtilow_tilow): Change type to fmov. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211371. ++ 2014-06-09 Ramana Radhakrishnan ++ ++ * config/arm/arm-modes.def: Remove XFmode. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211268. ++ 2014-06-05 Marcus Shawcroft ++ ++ * config/aarch64/aarch64.c (aarch64_expand_prologue): Update stack ++ layout comment. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211129. ++ 2014-06-02 Ramana Radhakrishnan ++ ++ PR target/61154 ++ * config/arm/arm.h (TARGET_SUPPORTS_WIDE_INT): Define. ++ * config/arm/arm.md (mov64 splitter): Replace const_double_operand ++ with immediate_operand. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211073. ++ 2014-05-30 Kyrylo Tkachov ++ ++ * config/arm/thumb2.md (*thumb2_movhi_insn): Set type of movw ++ to mov_imm. ++ * config/arm/vfp.md (*thumb2_movsi_vfp): Likewise. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211050. ++ 2014-05-29 Richard Earnshaw ++ Richard Sandiford ++ ++ * arm/iterators.md (shiftable_ops): New code iterator. ++ (t2_binop0, arith_shift_insn): New code attributes. ++ * arm/predicates.md (shift_nomul_operator): New predicate. ++ * arm/arm.md (insn_enabled): Delete. ++ (enabled): Remove insn_enabled test. ++ (*arith_shiftsi): Delete. Replace with ... ++ (*_multsi): ... new pattern. ++ (*_shiftsi): ... new pattern. ++ * config/arm/arm.c (arm_print_operand): Handle operand format 'b'. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r210996. ++ 2014-05-27 Andrew Pinski ++ ++ * config/aarch64/aarch64.md (stack_protect_set_): ++ Use for the register in assembly template. ++ (stack_protect_test): Use the mode of operands[0] for the ++ result. ++ (stack_protect_test_): Use for the register ++ in assembly template. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r210967. ++ 2014-05-27 Kyrylo Tkachov ++ ++ * config/arm/neon.md (neon_bswap): New pattern. ++ * config/arm/arm.c (neon_itype): Add NEON_BSWAP. ++ (arm_init_neon_builtins): Handle NEON_BSWAP. ++ Define required type nodes. ++ (arm_expand_neon_builtin): Handle NEON_BSWAP. ++ (arm_builtin_vectorized_function): Handle BUILTIN_BSWAP builtins. ++ * config/arm/arm_neon_builtins.def (bswap): Define builtins. ++ * config/arm/iterators.md (VDQHSD): New mode iterator. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r210471. ++ 2014-05-15 Kyrylo Tkachov ++ ++ * config/arm/arm.c (arm_option_override): Use the SCHED_PRESSURE_MODEL ++ enum name for PARAM_SCHED_PRESSURE_ALGORITHM. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r210369. ++ 2014-05-13 Kyrylo Tkachov ++ ++ * config/arm/arm.c (neon_itype): Remove NEON_RESULTPAIR. ++ (arm_init_neon_builtins): Remove handling of NEON_RESULTPAIR. ++ Remove associated type declarations and initialisations. ++ (arm_expand_neon_builtin): Likewise. ++ (neon_emit_pair_result_insn): Delete. ++ * config/arm/arm_neon_builtins (vtrn, vzip, vuzp): Delete. ++ * config/arm/neon.md (neon_vtrn): Delete. ++ (neon_vzip): Likewise. ++ (neon_vuzp): Likewise. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211058, 211177. ++ 2014-05-29 Alan Lawrence ++ ++ * config/aarch64/aarch64-builtins.c (aarch64_types_binopv_qualifiers, ++ TYPES_BINOPV): New static data. ++ * config/aarch64/aarch64-simd-builtins.def (im_lane_bound): New builtin. ++ * config/aarch64/aarch64-simd.md (aarch64_ext, aarch64_im_lane_boundsi): ++ New patterns. ++ * config/aarch64/aarch64.c (aarch64_expand_vec_perm_const_1): Match ++ patterns for EXT. ++ (aarch64_evpc_ext): New function. ++ ++ * config/aarch64/iterators.md (UNSPEC_EXT): New enum element. ++ ++ * config/aarch64/arm_neon.h (vext_f32, vext_f64, vext_p8, vext_p16, ++ vext_s8, vext_s16, vext_s32, vext_s64, vext_u8, vext_u16, vext_u32, ++ vext_u64, vextq_f32, vextq_f64, vextq_p8, vextq_p16, vextq_s8, ++ vextq_s16, vextq_s32, vextq_s64, vextq_u8, vextq_u16, vextq_u32, ++ vextq_u64): Replace __asm with __builtin_shuffle and im_lane_boundsi. ++ ++ 2014-06-03 Alan Lawrence ++ ++ * config/aarch64/aarch64.c (aarch64_evpc_ext): allow and handle ++ location == 0. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r209797. ++ 2014-04-25 Kyrylo Tkachov ++ ++ * config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): ++ Use HOST_WIDE_INT_C for mask literal. ++ (aarch_rev16_shleft_mask_imm_p): Likewise. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211148. ++ 2014-06-02 Andrew Pinski ++ ++ * config/aarch64/aarch64-linux.h (GLIBC_DYNAMIC_LINKER): ++ /lib/ld-linux32-aarch64.so.1 is used for ILP32. ++ (LINUX_TARGET_LINK_SPEC): Update linker script for ILP32. ++ file whose name depends on -mabi= and -mbig-endian. ++ * config/aarch64/t-aarch64-linux (MULTILIB_OSDIRNAMES): Handle LP64 ++ better and handle ilp32 too. ++ (MULTILIB_OPTIONS): Delete. ++ (MULTILIB_DIRNAMES): Delete. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r210828, r211103. ++ 2014-05-31 Kugan Vivekanandarajah ++ ++ * config/arm/arm.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New define. ++ (arm_builtins) : Add ARM_BUILTIN_GET_FPSCR and ARM_BUILTIN_SET_FPSCR. ++ (bdesc_2arg) : Add description for builtins __builtins_arm_set_fpscr ++ and __builtins_arm_get_fpscr. ++ (arm_init_builtins) : Initialize builtins __builtins_arm_set_fpscr and ++ __builtins_arm_get_fpscr. ++ (arm_expand_builtin) : Expand builtins __builtins_arm_set_fpscr and ++ __builtins_arm_ldfpscr. ++ (arm_atomic_assign_expand_fenv): New function. ++ * config/arm/vfp.md (set_fpscr): New pattern. ++ (get_fpscr) : Likewise. ++ * config/arm/unspecs.md (unspecv): Add VUNSPEC_GET_FPSCR and ++ VUNSPEC_SET_FPSCR. ++ * doc/extend.texi (AARCH64 Built-in Functions) : Document ++ __builtins_arm_set_fpscr, __builtins_arm_get_fpscr. ++ ++ 2014-05-23 Kugan Vivekanandarajah ++ ++ * config/aarch64/aarch64.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New ++ define. ++ * config/aarch64/aarch64-protos.h (aarch64_atomic_assign_expand_fenv): ++ New function declaration. ++ * config/aarch64/aarch64-builtins.c (aarch64_builtins) : Add ++ AARCH64_BUILTIN_GET_FPCR, AARCH64_BUILTIN_SET_FPCR. ++ AARCH64_BUILTIN_GET_FPSR and AARCH64_BUILTIN_SET_FPSR. ++ (aarch64_init_builtins) : Initialize builtins ++ __builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr. ++ __builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr. ++ (aarch64_expand_builtin) : Expand builtins __builtins_aarch64_set_fpcr ++ __builtins_aarch64_get_fpcr, __builtins_aarch64_get_fpsr, ++ and __builtins_aarch64_set_fpsr. ++ (aarch64_atomic_assign_expand_fenv): New function. ++ * config/aarch64/aarch64.md (set_fpcr): New pattern. ++ (get_fpcr) : Likewise. ++ (set_fpsr) : Likewise. ++ (get_fpsr) : Likewise. ++ (unspecv): Add UNSPECV_GET_FPCR and UNSPECV_SET_FPCR, UNSPECV_GET_FPSR ++ and UNSPECV_SET_FPSR. ++ * doc/extend.texi (AARCH64 Built-in Functions) : Document ++ __builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr. ++ __builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r210355. ++ 2014-05-13 Ian Bolton ++ ++ * config/aarch64/aarch64-protos.h ++ (aarch64_hard_regno_caller_save_mode): New prototype. ++ * config/aarch64/aarch64.c (aarch64_hard_regno_caller_save_mode): ++ New function. ++ * config/aarch64/aarch64.h (HARD_REGNO_CALLER_SAVE_MODE): New macro. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r209943. ++ 2014-04-30 Alan Lawrence ++ ++ * config/aarch64/arm_neon.h (vuzp1_f32, vuzp1_p8, vuzp1_p16, vuzp1_s8, ++ vuzp1_s16, vuzp1_s32, vuzp1_u8, vuzp1_u16, vuzp1_u32, vuzp1q_f32, ++ vuzp1q_f64, vuzp1q_p8, vuzp1q_p16, vuzp1q_s8, vuzp1q_s16, vuzp1q_s32, ++ vuzp1q_s64, vuzp1q_u8, vuzp1q_u16, vuzp1q_u32, vuzp1q_u64, vuzp2_f32, ++ vuzp2_p8, vuzp2_p16, vuzp2_s8, vuzp2_s16, vuzp2_s32, vuzp2_u8, ++ vuzp2_u16, vuzp2_u32, vuzp2q_f32, vuzp2q_f64, vuzp2q_p8, vuzp2q_p16, ++ vuzp2q_s8, vuzp2q_s16, vuzp2q_s32, vuzp2q_s64, vuzp2q_u8, vuzp2q_u16, ++ vuzp2q_u32, vuzp2q_u64): Replace temporary asm with __builtin_shuffle. ++ ++2014-06-26 Yvan Roux ++ ++ * LINARO-VERSION: Bump version. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -1628,6651 +2628,14955 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. + * LINARO-VERSION: New file. + * configure.ac: Add Linaro version string. ---- a/src/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c -@@ -0,0 +1,65 @@ -+/* { dg-do run } */ -+/* { dg-options "-O2 -fno-inline --save-temps" } */ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211771. ++ 2014-06-18 Kyrylo Tkachov ++ ++ * genattrtab.c (n_bypassed): New variable. ++ (process_bypasses): Initialise n_bypassed. ++ Count number of bypassed reservations. ++ (make_automaton_attrs): Allocate space for bypassed reservations ++ rather than number of bypasses. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r210861. ++ 2014-05-23 Jiong Wang ++ ++ * config/aarch64/predicates.md (aarch64_call_insn_operand): New ++ predicate. ++ * config/aarch64/constraints.md ("Ucs", "Usf"): New constraints. ++ * config/aarch64/aarch64.md (*sibcall_insn, *sibcall_value_insn): ++ Adjust for tailcalling through registers. ++ * config/aarch64/aarch64.h (enum reg_class): New caller save ++ register class. ++ (REG_CLASS_NAMES): Likewise. ++ (REG_CLASS_CONTENTS): Likewise. ++ * config/aarch64/aarch64.c (aarch64_function_ok_for_sibcall): ++ Allow tailcalling without decls. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211314. ++ 2014-06-06 James Greenhalgh ++ ++ * config/aarch64/aarch64-protos.h (aarch64_expand_movmem): New. ++ * config/aarch64/aarch64.c (aarch64_move_pointer): New. ++ (aarch64_progress_pointer): Likewise. ++ (aarch64_copy_one_part_and_move_pointers): Likewise. ++ (aarch64_expand_movmen): Likewise. ++ * config/aarch64/aarch64.h (MOVE_RATIO): Set low. ++ * config/aarch64/aarch64.md (movmem): New. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211185, 211186. ++ 2014-06-03 Alan Lawrence ++ ++ * gcc/config/aarch64/aarch64-builtins.c ++ (aarch64_types_binop_uus_qualifiers, ++ aarch64_types_shift_to_unsigned_qualifiers, ++ aarch64_types_unsigned_shiftacc_qualifiers): Define. ++ * gcc/config/aarch64/aarch64-simd-builtins.def (uqshl, uqrshl, uqadd, ++ uqsub, usqadd, usra_n, ursra_n, uqshrn_n, uqrshrn_n, usri_n, usli_n, ++ sqshlu_n, uqshl_n): Update qualifiers. ++ * gcc/config/aarch64/arm_neon.h (vqadd_u8, vqadd_u16, vqadd_u32, ++ vqadd_u64, vqaddq_u8, vqaddq_u16, vqaddq_u32, vqaddq_u64, vqsub_u8, ++ vqsub_u16, vqsub_u32, vqsub_u64, vqsubq_u8, vqsubq_u16, vqsubq_u32, ++ vqsubq_u64, vqaddb_u8, vqaddh_u16, vqadds_u32, vqaddd_u64, vqrshl_u8, ++ vqrshl_u16, vqrshl_u32, vqrshl_u64, vqrshlq_u8, vqrshlq_u16, ++ vqrshlq_u32, vqrshlq_u64, vqrshlb_u8, vqrshlh_u16, vqrshls_u32, ++ vqrshld_u64, vqrshrn_n_u16, vqrshrn_n_u32, vqrshrn_n_u64, ++ vqrshrnh_n_u16, vqrshrns_n_u32, vqrshrnd_n_u64, vqshl_u8, vqshl_u16, ++ vqshl_u32, vqshl_u64, vqshlq_u8, vqshlq_u16, vqshlq_u32, vqshlq_u64, ++ vqshlb_u8, vqshlh_u16, vqshls_u32, vqshld_u64, vqshl_n_u8, vqshl_n_u16, ++ vqshl_n_u32, vqshl_n_u64, vqshlq_n_u8, vqshlq_n_u16, vqshlq_n_u32, ++ vqshlq_n_u64, vqshlb_n_u8, vqshlh_n_u16, vqshls_n_u32, vqshld_n_u64, ++ vqshlu_n_s8, vqshlu_n_s16, vqshlu_n_s32, vqshlu_n_s64, vqshluq_n_s8, ++ vqshluq_n_s16, vqshluq_n_s32, vqshluq_n_s64, vqshlub_n_s8, ++ vqshluh_n_s16, vqshlus_n_s32, vqshlud_n_s64, vqshrn_n_u16, ++ vqshrn_n_u32, vqshrn_n_u64, vqshrnh_n_u16, vqshrns_n_u32, ++ vqshrnd_n_u64, vqsubb_u8, vqsubh_u16, vqsubs_u32, vqsubd_u64, ++ vrsra_n_u8, vrsra_n_u16, vrsra_n_u32, vrsra_n_u64, vrsraq_n_u8, ++ vrsraq_n_u16, vrsraq_n_u32, vrsraq_n_u64, vrsrad_n_u64, vsli_n_u8, ++ vsli_n_u16, vsli_n_u32,vsli_n_u64, vsliq_n_u8, vsliq_n_u16, ++ vsliq_n_u32, vsliq_n_u64, vslid_n_u64, vsqadd_u8, vsqadd_u16, ++ vsqadd_u32, vsqadd_u64, vsqaddq_u8, vsqaddq_u16, vsqaddq_u32, ++ vsqaddq_u64, vsqaddb_u8, vsqaddh_u16, vsqadds_u32, vsqaddd_u64, ++ vsra_n_u8, vsra_n_u16, vsra_n_u32, vsra_n_u64, vsraq_n_u8, ++ vsraq_n_u16, vsraq_n_u32, vsraq_n_u64, vsrad_n_u64, vsri_n_u8, ++ vsri_n_u16, vsri_n_u32, vsri_n_u64, vsriq_n_u8, vsriq_n_u16, ++ vsriq_n_u32, vsriq_n_u64, vsrid_n_u64): Remove casts. ++ ++ 2014-06-03 Alan Lawrence ++ ++ * gcc/config/aarch64/aarch64-builtins.c ++ (aarch64_types_binop_ssu_qualifiers): New static data. ++ (TYPES_BINOP_SSU): Define. ++ * gcc/config/aarch64/aarch64-simd-builtins.def (suqadd, ushl, urshl, ++ urshr_n, ushll_n): Use appropriate unsigned qualifiers. 47 ++ * gcc/config/aarch64/arm_neon.h (vrshl_u8, vrshl_u16, vrshl_u32, ++ vrshl_u64, vrshlq_u8, vrshlq_u16, vrshlq_u32, vrshlq_u64, vrshld_u64, ++ vrshr_n_u8, vrshr_n_u16, vrshr_n_u32, vrshr_n_u64, vrshrq_n_u8, 50 ++ vrshrq_n_u16, vrshrq_n_u32, vrshrq_n_u64, vrshrd_n_u64, vshll_n_u8, ++ vshll_n_u16, vshll_n_u32, vuqadd_s8, vuqadd_s16, vuqadd_s32, 52 ++ vuqadd_s64, vuqaddq_s8, vuqaddq_s16, vuqaddq_s32, vuqaddq_s64, 53 ++ vuqaddb_s8, vuqaddh_s16, vuqadds_s32, vuqaddd_s64): Add signedness ++ suffix to builtin function name, remove cast. 55 ++ (vshl_s8, vshl_s16, vshl_s32, vshl_s64, vshl_u8, vshl_u16, vshl_u32, ++ vshl_u64, vshlq_s8, vshlq_s16, vshlq_s32, vshlq_s64, vshlq_u8, 57 ++ vshlq_u16, vshlq_u32, vshlq_u64, vshld_s64, vshld_u64): Remove cast. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211408, 211416. ++ 2014-06-10 Marcus Shawcroft ++ ++ * config/aarch64/aarch64.c (aarch64_save_or_restore_fprs): Fix ++ REG_CFA_RESTORE mode. ++ ++ 2014-06-10 Jiong Wang ++ ++ * config/aarch64/aarch64.c (aarch64_save_or_restore_fprs) ++ (aarch64_save_or_restore_callee_save_registers): Fix layout. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211418. ++ 2014-06-10 Kyrylo Tkachov ++ ++ * config/aarch64/aarch64-simd.md (move_lo_quad_): ++ Change second alternative type to f_mcr. ++ * config/aarch64/aarch64.md (*movsi_aarch64): Change 11th ++ and 12th alternatives' types to f_mcr and f_mrc. ++ (*movdi_aarch64): Same for 12th and 13th alternatives. ++ (*movsf_aarch64): Change 9th alternatives' type to mov_reg. ++ (aarch64_movtilow_tilow): Change type to fmov. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211371. ++ 2014-06-09 Ramana Radhakrishnan ++ ++ * config/arm/arm-modes.def: Remove XFmode. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211268. ++ 2014-06-05 Marcus Shawcroft ++ ++ * config/aarch64/aarch64.c (aarch64_expand_prologue): Update stack ++ layout comment. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211129. ++ 2014-06-02 Ramana Radhakrishnan ++ ++ PR target/61154 ++ * config/arm/arm.h (TARGET_SUPPORTS_WIDE_INT): Define. ++ * config/arm/arm.md (mov64 splitter): Replace const_double_operand ++ with immediate_operand. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211073. ++ 2014-05-30 Kyrylo Tkachov ++ ++ * config/arm/thumb2.md (*thumb2_movhi_insn): Set type of movw ++ to mov_imm. ++ * config/arm/vfp.md (*thumb2_movsi_vfp): Likewise. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211050. ++ 2014-05-29 Richard Earnshaw ++ Richard Sandiford ++ ++ * arm/iterators.md (shiftable_ops): New code iterator. ++ (t2_binop0, arith_shift_insn): New code attributes. ++ * arm/predicates.md (shift_nomul_operator): New predicate. ++ * arm/arm.md (insn_enabled): Delete. ++ (enabled): Remove insn_enabled test. ++ (*arith_shiftsi): Delete. Replace with ... ++ (*_multsi): ... new pattern. ++ (*_shiftsi): ... new pattern. ++ * config/arm/arm.c (arm_print_operand): Handle operand format 'b'. + -+extern void abort (void); ++2014-07-16 Yvan Roux + -+typedef long long s64int; -+typedef int s32int; -+typedef unsigned long long u64int; -+typedef unsigned int u32int; ++ Backport from trunk r210996. ++ 2014-05-27 Andrew Pinski + -+s64int -+iordi_di_notdi (s64int a, s64int b) -+{ -+ return (a | ~b); -+} ++ * config/aarch64/aarch64.md (stack_protect_set_): ++ Use for the register in assembly template. ++ (stack_protect_test): Use the mode of operands[0] for the ++ result. ++ (stack_protect_test_): Use for the register ++ in assembly template. + -+s64int -+iordi_di_notzesidi (s64int a, u32int b) -+{ -+ return (a | ~(u64int) b); -+} ++2014-07-16 Yvan Roux + -+s64int -+iordi_notdi_zesidi (s64int a, u32int b) -+{ -+ return (~a | (u64int) b); -+} ++ Backport from trunk r210967. ++ 2014-05-27 Kyrylo Tkachov + -+s64int -+iordi_di_notsesidi (s64int a, s32int b) -+{ -+ return (a | ~(s64int) b); -+} ++ * config/arm/neon.md (neon_bswap): New pattern. ++ * config/arm/arm.c (neon_itype): Add NEON_BSWAP. ++ (arm_init_neon_builtins): Handle NEON_BSWAP. ++ Define required type nodes. ++ (arm_expand_neon_builtin): Handle NEON_BSWAP. ++ (arm_builtin_vectorized_function): Handle BUILTIN_BSWAP builtins. ++ * config/arm/arm_neon_builtins.def (bswap): Define builtins. ++ * config/arm/iterators.md (VDQHSD): New mode iterator. + -+int main () -+{ -+ s64int a64 = 0xdeadbeef00000000ll; -+ s64int b64 = 0x000000004f4f0112ll; -+ s64int c64 = 0xdeadbeef000f0000ll; ++2014-07-16 Yvan Roux + -+ u32int c32 = 0x01124f4f; -+ s32int d32 = 0xabbaface; ++ Backport from trunk r210471. ++ 2014-05-15 Kyrylo Tkachov + -+ s64int z = iordi_di_notdi (a64, b64); -+ if (z != 0xffffffffb0b0feedll) -+ abort (); ++ * config/arm/arm.c (arm_option_override): Use the SCHED_PRESSURE_MODEL ++ enum name for PARAM_SCHED_PRESSURE_ALGORITHM. + -+ z = iordi_di_notzesidi (a64, c32); -+ if (z != 0xfffffffffeedb0b0ll) -+ abort (); ++2014-07-16 Yvan Roux + -+ z = iordi_notdi_zesidi (c64, c32); -+ if (z != 0x21524110fff2ffffll) -+ abort (); ++ Backport from trunk r210369. ++ 2014-05-13 Kyrylo Tkachov + -+ z = iordi_di_notsesidi (a64, d32); -+ if (z != 0xdeadbeef54450531ll) -+ abort (); ++ * config/arm/arm.c (neon_itype): Remove NEON_RESULTPAIR. ++ (arm_init_neon_builtins): Remove handling of NEON_RESULTPAIR. ++ Remove associated type declarations and initialisations. ++ (arm_expand_neon_builtin): Likewise. ++ (neon_emit_pair_result_insn): Delete. ++ * config/arm/arm_neon_builtins (vtrn, vzip, vuzp): Delete. ++ * config/arm/neon.md (neon_vtrn): Delete. ++ (neon_vzip): Likewise. ++ (neon_vuzp): Likewise. + -+ return 0; -+} ++2014-07-16 Yvan Roux + -+/* { dg-final { scan-assembler-times "orn\t" 6 { target arm_thumb2 } } } */ ++ Backport from trunk r211058, 211177. ++ 2014-05-29 Alan Lawrence + -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipp16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipp16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipp16' ARM Neon intrinsic. */ ++ * config/aarch64/aarch64-builtins.c (aarch64_types_binopv_qualifiers, ++ TYPES_BINOPV): New static data. ++ * config/aarch64/aarch64-simd-builtins.def (im_lane_bound): New builtin. ++ * config/aarch64/aarch64-simd.md (aarch64_ext, aarch64_im_lane_boundsi): ++ New patterns. ++ * config/aarch64/aarch64.c (aarch64_expand_vec_perm_const_1): Match ++ patterns for EXT. ++ (aarch64_evpc_ext): New function. + -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ ++ * config/aarch64/iterators.md (UNSPEC_EXT): New enum element. + -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipp16.x" ++ * config/aarch64/arm_neon.h (vext_f32, vext_f64, vext_p8, vext_p16, ++ vext_s8, vext_s16, vext_s32, vext_s64, vext_u8, vext_u16, vext_u32, ++ vext_u64, vextq_f32, vextq_f64, vextq_p8, vextq_p16, vextq_s8, ++ vextq_s16, vextq_s32, vextq_s64, vextq_u8, vextq_u16, vextq_u32, ++ vextq_u64): Replace __asm with __builtin_shuffle and im_lane_boundsi. + -+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzips32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzips32' ARM Neon intrinsic. */ ++ 2014-06-03 Alan Lawrence + -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ ++ * config/aarch64/aarch64.c (aarch64_evpc_ext): allow and handle ++ location == 0. + -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzips32.x" ++2014-07-16 Yvan Roux + -+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqp8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqp8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipQp8' ARM Neon intrinsic. */ ++ Backport from trunk r209797. ++ 2014-04-25 Kyrylo Tkachov + -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ ++ * config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): ++ Use HOST_WIDE_INT_C for mask literal. ++ (aarch_rev16_shleft_mask_imm_p): Likewise. + -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipqp8.x" ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211148. ++ 2014-06-02 Andrew Pinski ++ ++ * config/aarch64/aarch64-linux.h (GLIBC_DYNAMIC_LINKER): ++ /lib/ld-linux32-aarch64.so.1 is used for ILP32. ++ (LINUX_TARGET_LINK_SPEC): Update linker script for ILP32. ++ file whose name depends on -mabi= and -mbig-endian. ++ * config/aarch64/t-aarch64-linux (MULTILIB_OSDIRNAMES): Handle LP64 ++ better and handle ilp32 too. ++ (MULTILIB_OPTIONS): Delete. ++ (MULTILIB_DIRNAMES): Delete. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r210828, r211103. ++ 2014-05-31 Kugan Vivekanandarajah ++ ++ * config/arm/arm.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New define. ++ (arm_builtins) : Add ARM_BUILTIN_GET_FPSCR and ARM_BUILTIN_SET_FPSCR. ++ (bdesc_2arg) : Add description for builtins __builtins_arm_set_fpscr ++ and __builtins_arm_get_fpscr. ++ (arm_init_builtins) : Initialize builtins __builtins_arm_set_fpscr and ++ __builtins_arm_get_fpscr. ++ (arm_expand_builtin) : Expand builtins __builtins_arm_set_fpscr and ++ __builtins_arm_ldfpscr. ++ (arm_atomic_assign_expand_fenv): New function. ++ * config/arm/vfp.md (set_fpscr): New pattern. ++ (get_fpscr) : Likewise. ++ * config/arm/unspecs.md (unspecv): Add VUNSPEC_GET_FPSCR and ++ VUNSPEC_SET_FPSCR. ++ * doc/extend.texi (AARCH64 Built-in Functions) : Document ++ __builtins_arm_set_fpscr, __builtins_arm_get_fpscr. ++ ++ 2014-05-23 Kugan Vivekanandarajah ++ ++ * config/aarch64/aarch64.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New ++ define. ++ * config/aarch64/aarch64-protos.h (aarch64_atomic_assign_expand_fenv): ++ New function declaration. ++ * config/aarch64/aarch64-builtins.c (aarch64_builtins) : Add ++ AARCH64_BUILTIN_GET_FPCR, AARCH64_BUILTIN_SET_FPCR. ++ AARCH64_BUILTIN_GET_FPSR and AARCH64_BUILTIN_SET_FPSR. ++ (aarch64_init_builtins) : Initialize builtins ++ __builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr. ++ __builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr. ++ (aarch64_expand_builtin) : Expand builtins __builtins_aarch64_set_fpcr ++ __builtins_aarch64_get_fpcr, __builtins_aarch64_get_fpsr, ++ and __builtins_aarch64_set_fpsr. ++ (aarch64_atomic_assign_expand_fenv): New function. ++ * config/aarch64/aarch64.md (set_fpcr): New pattern. ++ (get_fpcr) : Likewise. ++ (set_fpsr) : Likewise. ++ (get_fpsr) : Likewise. ++ (unspecv): Add UNSPECV_GET_FPCR and UNSPECV_SET_FPCR, UNSPECV_GET_FPSR ++ and UNSPECV_SET_FPSR. ++ * doc/extend.texi (AARCH64 Built-in Functions) : Document ++ __builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr. ++ __builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r210355. ++ 2014-05-13 Ian Bolton ++ ++ * config/aarch64/aarch64-protos.h ++ (aarch64_hard_regno_caller_save_mode): New prototype. ++ * config/aarch64/aarch64.c (aarch64_hard_regno_caller_save_mode): ++ New function. ++ * config/aarch64/aarch64.h (HARD_REGNO_CALLER_SAVE_MODE): New macro. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r209943. ++ 2014-04-30 Alan Lawrence ++ ++ * config/aarch64/arm_neon.h (vuzp1_f32, vuzp1_p8, vuzp1_p16, vuzp1_s8, ++ vuzp1_s16, vuzp1_s32, vuzp1_u8, vuzp1_u16, vuzp1_u32, vuzp1q_f32, ++ vuzp1q_f64, vuzp1q_p8, vuzp1q_p16, vuzp1q_s8, vuzp1q_s16, vuzp1q_s32, ++ vuzp1q_s64, vuzp1q_u8, vuzp1q_u16, vuzp1q_u32, vuzp1q_u64, vuzp2_f32, ++ vuzp2_p8, vuzp2_p16, vuzp2_s8, vuzp2_s16, vuzp2_s32, vuzp2_u8, ++ vuzp2_u16, vuzp2_u32, vuzp2q_f32, vuzp2q_f64, vuzp2q_p8, vuzp2q_p16, ++ vuzp2q_s8, vuzp2q_s16, vuzp2q_s32, vuzp2q_s64, vuzp2q_u8, vuzp2q_u16, ++ vuzp2q_u32, vuzp2q_u64): Replace temporary asm with __builtin_shuffle. ++ ++2014-06-26 Yvan Roux + -+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipu32' ARM Neon intrinsic. */ ++ * LINARO-VERSION: Bump version. + -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ ++2014-06-25 Yvan Roux + -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipu32.x" ++ GCC Linaro 4.9-2014.06-1 released. ++ * LINARO-VERSION: Update. + -+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzips16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzips16' ARM Neon intrinsic. */ ++2014-06-24 Yvan Roux + -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ ++ Revert: ++ 2014-05-23 Yvan Roux + -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzips16.x" ++ Backport from trunk r209643. ++ 2014-04-22 Ramana Radhakrishnan + -+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipu16' ARM Neon intrinsic. */ ++ * config/aarch64/aarch64.c (TARGET_FLAGS_REGNUM): Define. + -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ ++2014-06-13 Yvan Roux + -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipu16.x" ++ Backport from trunk r210493, 210494, 210495, 210496, 210497, 210498, ++ 210499, 210500, 210501, 210502, 210503, 210504, 210505, 210506, 210507, ++ 210508, 210509, 210510, 210512, 211205, 211206. ++ 2014-05-16 James Greenhalgh + -+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqf32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqf32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipQf32' ARM Neon intrinsic. */ ++ * config/aarch64/aarch64-protos.h (scale_addr_mode_cost): New. ++ (cpu_addrcost_table): Use it. ++ * config/aarch64/aarch64.c (generic_addrcost_table): Initialize it. ++ (aarch64_address_cost): Rewrite using aarch64_classify_address, ++ move it. + -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ ++ 2014-05-16 James Greenhalgh + -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipqf32.x" ++ * config/aarch64/aarch64.c (cortexa57_addrcost_table): New. ++ (cortexa57_vector_cost): Likewise. ++ (cortexa57_tunings): Use them. + -+/* { dg-final { scan-assembler-times "vzip\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipQs8' ARM Neon intrinsic. */ ++ 2014-05-16 James Greenhalgh + -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ ++ * config/aarch64/aarch64.c (aarch64_rtx_costs_wrapper): New. ++ (TARGET_RTX_COSTS): Call it. + -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipqs8.x" ++ 2014-05-16 James Greenhalgh ++ Philipp Tomsich + -+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/simd.exp -+++ b/src/gcc/testsuite/gcc.target/arm/simd/simd.exp -@@ -0,0 +1,35 @@ -+# Copyright (C) 1997-2014 Free Software Foundation, Inc. ++ * config/aarch64/aarch64.c (aarch64_build_constant): Conditionally ++ emit instructions, return number of instructions which would ++ be emitted. ++ (aarch64_add_constant): Update call to aarch64_build_constant. ++ (aarch64_output_mi_thunk): Likewise. ++ (aarch64_rtx_costs): Estimate cost of a CONST_INT, cost ++ a CONST_DOUBLE. + -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 3 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with GCC; see the file COPYING3. If not see -+# . ++ 2014-05-16 James Greenhalgh ++ Philipp Tomsich + -+# GCC testsuite that uses the `dg.exp' driver. ++ * config/aarch64/aarch64.c (aarch64_strip_shift_or_extend): Rename ++ to... ++ (aarch64_strip_extend): ...this, don't strip shifts, check RTX is ++ well formed. ++ (aarch64_rtx_mult_cost): New. ++ (aarch64_rtx_costs): Use it, refactor as appropriate. + -+# Exit immediately if this isn't an ARM target. -+if ![istarget arm*-*-*] then { -+ return -+} ++ 2014-05-16 James Greenhalgh + -+# Load support procs. -+load_lib gcc-dg.exp ++ * config/aarch64/aarch64.c (aarch64_rtx_costs): Set default costs. + -+# Initialize `dg'. -+dg-init ++ 2014-05-16 James Greenhalgh ++ Philip Tomsich + -+# Main loop. -+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \ -+ "" "" ++ * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costing ++ for SET RTX. + -+# All done. -+dg-finish ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipQu8' ARM Neon intrinsic. */ ++ 2014-05-16 James Greenhalgh ++ Philipp Tomsich + -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ ++ * config/aarch64/aarch64.c (aarch64_rtx_costs): Use address ++ costs when costing loads and stores to memory. + -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipqu8.x" ++ 2014-05-16 James Greenhalgh ++ Philipp Tomsich + -+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipp8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipp8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipp8' ARM Neon intrinsic. */ ++ * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve cost for ++ logical operations. + -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ ++ 2014-05-16 James Greenhalgh ++ Philipp Tomsich + -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipp8.x" ++ * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost ++ ZERO_EXTEND and SIGN_EXTEND better. + -+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqp16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqp16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipQp16' ARM Neon intrinsic. */ ++ 2014-05-16 James Greenhalgh ++ Philipp Tomsich + -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ ++ * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costs for ++ rotates and shifts. + -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipqp16.x" ++ 2014-03-16 James Greenhalgh ++ Philipp Tomsich + -+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipQs32' ARM Neon intrinsic. */ ++ * config/aarch64/aarch64.c (aarch64_rtx_arith_op_extract_p): New. ++ (aarch64_rtx_costs): Improve costs for SIGN/ZERO_EXTRACT. + -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ ++ 2014-05-16 James Greenhalgh ++ Philipp Tomsich + -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipqs32.x" ++ * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costs for ++ DIV/MOD. + -+/* { dg-final { scan-assembler-times "vzip\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipQs16' ARM Neon intrinsic. */ ++ 2014-05-16 James Greenhalgh ++ Philipp Tomsich + -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ ++ * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost comparison ++ operators. + -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipqs16.x" ++ 2014-05-16 James Greenhalgh ++ Philipp Tomsich + -+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipQu32' ARM Neon intrinsic. */ ++ * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost FMA, ++ FLOAT_EXTEND, FLOAT_TRUNCATE, ABS, SMAX, and SMIN. + -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ ++ 2014-05-16 James Greenhalgh ++ Philipp Tomsich + -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipqu32.x" ++ * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost TRUNCATE. + -+/* { dg-final { scan-assembler-times "vzip\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzips8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzips8' ARM Neon intrinsic. */ ++ 2014-05-16 James Greenhalgh + -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ ++ * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost SYMBOL_REF, ++ HIGH, LO_SUM. + -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzips8.x" ++ 2014-05-16 James Greenhalgh + -+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipf32_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipf32_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipf32' ARM Neon intrinsic. */ ++ * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle the case ++ where we were unable to cost an RTX. + -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ ++ 2014-05-16 James Greenhalgh + -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipf32.x" ++ * config/aarch64/aarch64.c (aarch64_rtx_mult_cost): Fix FNMUL case. + -+/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu16_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu16_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipQu16' ARM Neon intrinsic. */ ++ 2014-06-03 Andrew Pinski + -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ ++ * config/aarch64/aarch64.c (aarch64_if_then_else_costs): New function. ++ (aarch64_rtx_costs): Use aarch64_if_then_else_costs. + -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipqu16.x" ++ 2014-06-03 Andrew Pinski + -+/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu8_1.c -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu8_1.c -@@ -0,0 +1,12 @@ -+/* Test the `vzipu8' ARM Neon intrinsic. */ ++ * config/aarch64/aarch64.c (aarch64_if_then_else_costs): Allow non ++ comparisons for OP0. + -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -O1 -fno-inline" } */ -+/* { dg-add-options arm_neon } */ ++2014-06-13 Yvan Roux + -+#include "arm_neon.h" -+#include "../../aarch64/simd/vzipu8.x" ++ * LINARO-VERSION: Bump version. + -+/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/arm/tail-long-call.c -+++ b/src/gcc/testsuite/gcc.target/arm/tail-long-call.c -@@ -0,0 +1,12 @@ -+/* { dg-skip-if "need at least armv5te" { *-*-* } { "-march=armv[234]*" "-mthumb" } { "" } } */ -+/* { dg-options "-O2 -march=armv5te -marm" } */ -+/* { dg-final { scan-assembler "bx" } } */ -+/* { dg-final { scan-assembler-not "blx" } } */ ++2014-06-12 Yvan Roux + -+int lcal (int) __attribute__ ((long_call)); ++ GCC Linaro 4.9-2014.06 released. ++ * LINARO-VERSION: Update. + -+int -+dec (int a) -+{ -+ return lcal (a); -+} ---- a/src/gcc/testsuite/gcc.target/arm/rev16.c -+++ b/src/gcc/testsuite/gcc.target/arm/rev16.c -@@ -0,0 +1,35 @@ -+/* { dg-options "-O2" } */ -+/* { dg-do run } */ ++2014-06-04 Yvan Roux + -+extern void abort (void); ++ Backport from trunk r211211. ++ 2014-06-04 Bin Cheng + -+typedef unsigned int __u32; ++ * config/aarch64/aarch64.c (aarch64_classify_address) ++ (aarch64_legitimize_reload_address): Support full addressing modes ++ for vector modes. ++ * config/aarch64/aarch64.md (mov, movmisalign) ++ (*aarch64_simd_mov, *aarch64_simd_mov): Relax predicates. + -+__u32 -+__rev16_32_alt (__u32 x) -+{ -+ return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8) -+ | (((__u32)(x) & (__u32)0x00ff00ffUL) << 8); -+} ++2014-05-25 Yvan Roux + -+__u32 -+__rev16_32 (__u32 x) -+{ -+ return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8) -+ | (((__u32)(x) & (__u32)0xff00ff00UL) >> 8); -+} ++ Backport from trunk r209906. ++ 2014-04-29 Alan Lawrence + -+int -+main (void) -+{ -+ volatile __u32 in32 = 0x12345678; -+ volatile __u32 expected32 = 0x34127856; ++ * config/aarch64/arm_neon.h (vzip1_f32, vzip1_p8, vzip1_p16, vzip1_s8, ++ vzip1_s16, vzip1_s32, vzip1_u8, vzip1_u16, vzip1_u32, vzip1q_f32, ++ vzip1q_f64, vzip1q_p8, vzip1q_p16, vzip1q_s8, vzip1q_s16, vzip1q_s32, ++ vzip1q_s64, vzip1q_u8, vzip1q_u16, vzip1q_u32, vzip1q_u64, vzip2_f32, ++ vzip2_p8, vzip2_p16, vzip2_s8, vzip2_s16, vzip2_s32, vzip2_u8, ++ vzip2_u16, vzip2_u32, vzip2q_f32, vzip2q_f64, vzip2q_p8, vzip2q_p16, ++ vzip2q_s8, vzip2q_s16, vzip2q_s32, vzip2q_s64, vzip2q_u8, vzip2q_u16, ++ vzip2q_u32, vzip2q_u64): Replace inline __asm__ with __builtin_shuffle. + -+ if (__rev16_32 (in32) != expected32) -+ abort (); ++2014-05-25 Yvan Roux + -+ if (__rev16_32_alt (in32) != expected32) -+ abort (); ++ Backport from trunk r209897. ++ 2014-04-29 James Greenhalgh + -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c -@@ -0,0 +1,65 @@ -+/* { dg-do run } */ -+/* { dg-options "-O2 -fno-inline --save-temps" } */ ++ * calls.c (initialize_argument_information): Always treat ++ PUSH_ARGS_REVERSED as 1, simplify code accordingly. ++ (expand_call): Likewise. ++ (emit_library_call_calue_1): Likewise. ++ * expr.c (PUSH_ARGS_REVERSED): Do not define. ++ (emit_push_insn): Always treat PUSH_ARGS_REVERSED as 1, simplify ++ code accordingly. + -+extern void abort (void); ++2014-05-25 Yvan Roux + -+typedef long long s64int; -+typedef int s32int; -+typedef unsigned long long u64int; -+typedef unsigned int u32int; ++ Backport from trunk r209880. ++ 2014-04-28 James Greenhalgh + -+s64int -+anddi_di_notdi (s64int a, s64int b) -+{ -+ return (a & ~b); -+} ++ * config/aarch64/aarch64-builtins.c ++ (aarch64_types_storestruct_lane_qualifiers): New. ++ (TYPES_STORESTRUCT_LANE): Likewise. ++ * config/aarch64/aarch64-simd-builtins.def (st2_lane): New. ++ (st3_lane): Likewise. ++ (st4_lane): Likewise. ++ * config/aarch64/aarch64-simd.md (vec_store_lanesoi_lane): New. ++ (vec_store_lanesci_lane): Likewise. ++ (vec_store_lanesxi_lane): Likewise. ++ (aarch64_st2_lane): Likewise. ++ (aarch64_st3_lane): Likewise. ++ (aarch64_st4_lane): Likewise. ++ * config/aarch64/aarch64.md (unspec): Add UNSPEC_ST{2,3,4}_LANE. ++ * config/aarch64/arm_neon.h ++ (__ST2_LANE_FUNC): Rewrite using builtins, update use points to ++ use new macro arguments. ++ (__ST3_LANE_FUNC): Likewise. ++ (__ST4_LANE_FUNC): Likewise. ++ * config/aarch64/iterators.md (V_TWO_ELEM): New. ++ (V_THREE_ELEM): Likewise. ++ (V_FOUR_ELEM): Likewise. + -+s64int -+anddi_di_notzesidi (s64int a, u32int b) -+{ -+ return (a & ~(u64int) b); -+} ++2014-05-25 Yvan Roux + -+s64int -+anddi_notdi_zesidi (s64int a, u32int b) -+{ -+ return (~a & (u64int) b); -+} ++ Backport from trunk r209878. ++ 2014-04-28 James Greenhalgh + -+s64int -+anddi_di_notsesidi (s64int a, s32int b) -+{ -+ return (a & ~(s64int) b); -+} ++ * config/aarch64/aarch64-protos.h (aarch64_modes_tieable_p): New. ++ * config/aarch64/aarch64.c ++ (aarch64_cannot_change_mode_class): Weaken conditions. ++ (aarch64_modes_tieable_p): New. ++ * config/aarch64/aarch64.h (MODES_TIEABLE_P): Use it. + -+int main () -+{ -+ s64int a64 = 0xdeadbeef0000ffffll; -+ s64int b64 = 0x000000005f470112ll; -+ s64int c64 = 0xdeadbeef300f0000ll; ++2014-05-25 Yvan Roux + -+ u32int c32 = 0x01124f4f; -+ s32int d32 = 0xabbaface; ++ Backport from trunk r209808. ++ 2014-04-25 Jiong Wang + -+ s64int z = anddi_di_notdi (c64, b64); -+ if (z != 0xdeadbeef20080000ll) -+ abort (); ++ * config/arm/predicates.md (call_insn_operand): Add long_call check. ++ * config/arm/arm.md (sibcall, sibcall_value): Force the address to ++ reg for long_call. ++ * config/arm/arm.c (arm_function_ok_for_sibcall): Remove long_call ++ restriction. + -+ z = anddi_di_notzesidi (a64, c32); -+ if (z != 0xdeadbeef0000b0b0ll) -+ abort (); ++2014-05-25 Yvan Roux + -+ z = anddi_notdi_zesidi (c64, c32); -+ if (z != 0x0000000001104f4fll) -+ abort (); ++ Backport from trunk r209806. ++ 2014-04-25 Kyrylo Tkachov + -+ z = anddi_di_notsesidi (a64, d32); -+ if (z != 0x0000000000000531ll) -+ abort (); ++ * config/arm/arm.c (arm_cortex_a8_tune): Initialise ++ T16-related fields. + -+ return 0; -+} ++2014-05-25 Yvan Roux + -+/* { dg-final { scan-assembler-times "bic\t" 6 } } */ ++ Backport from trunk r209742, 209749. ++ 2014-04-24 Alan Lawrence + -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c -@@ -0,0 +1,343 @@ -+/* Test vdup_lane intrinsics work correctly. */ -+/* { dg-do run } */ -+/* { dg-options "-O1 --save-temps" } */ ++ * config/aarch64/aarch64.c (aarch64_evpc_tbl): Enable for bigendian. + -+#include ++ 2014-04-24 Tejas Belagod + -+#define force_simd(V1) asm volatile ("" \ -+ : "=w"(V1) \ -+ : "w"(V1) \ -+ : /* No clobbers */) ++ * config/aarch64/aarch64.c (aarch64_evpc_tbl): Reverse order of elements ++ for big-endian. + -+extern void abort (void); ++2014-05-23 Yvan Roux + -+float32_t __attribute__ ((noinline)) -+wrap_vdups_lane_f32_0 (float32x2_t dummy, float32x2_t a) -+{ -+ return vdups_lane_f32 (a, 0); -+} -+ -+float32_t __attribute__ ((noinline)) -+wrap_vdups_lane_f32_1 (float32x2_t a) -+{ -+ return vdups_lane_f32 (a, 1); -+} ++ Backport from trunk r209736. ++ 2014-04-24 Kyrylo Tkachov + -+int __attribute__ ((noinline)) -+test_vdups_lane_f32 () -+{ -+ float32x2_t a; -+ float32_t b; -+ float32_t c[2] = { 0.0, 1.0 }; ++ * config/aarch64/aarch64-builtins.c ++ (aarch64_builtin_vectorized_function): Handle BUILT_IN_BSWAP16, ++ BUILT_IN_BSWAP32, BUILT_IN_BSWAP64. ++ * config/aarch64/aarch64-simd.md (bswap): New pattern. ++ * config/aarch64/aarch64-simd-builtins.def: Define vector bswap ++ builtins. ++ * config/aarch64/iterator.md (VDQHSD): New mode iterator. ++ (Vrevsuff): New mode attribute. + -+ a = vld1_f32 (c); -+ b = wrap_vdups_lane_f32_0 (a, a); -+ if (c[0] != b) -+ return 1; -+ b = wrap_vdups_lane_f32_1 (a); -+ if (c[1] != b) -+ return 1; -+ return 0; -+} ++2014-05-23 Yvan Roux + -+float64_t __attribute__ ((noinline)) -+wrap_vdupd_lane_f64_0 (float64x1_t dummy, float64x1_t a) -+{ -+ return vdupd_lane_f64 (a, 0); -+} ++ Backport from trunk r209712. ++ 2014-04-23 Venkataramanan Kumar + -+int __attribute__ ((noinline)) -+test_vdupd_lane_f64 () -+{ -+ float64x1_t a; -+ float64_t b; -+ float64_t c[1] = { 0.0 }; -+ a = vld1_f64 (c); -+ b = wrap_vdupd_lane_f64_0 (a, a); -+ if (c[0] != b) -+ return 1; -+ return 0; -+} ++ * config/aarch64/aarch64.md (stack_protect_set, stack_protect_test) ++ (stack_protect_set_, stack_protect_test_): Add ++ machine descriptions for Stack Smashing Protector. + -+int8_t __attribute__ ((noinline)) -+wrap_vdupb_lane_s8_0 (int8x8_t dummy, int8x8_t a) -+{ -+ int8_t result = vdupb_lane_s8 (a, 0); -+ force_simd (result); -+ return result; -+} ++2014-05-23 Yvan Roux + -+int8_t __attribute__ ((noinline)) -+wrap_vdupb_lane_s8_1 (int8x8_t a) -+{ -+ int8_t result = vdupb_lane_s8 (a, 1); -+ force_simd (result); -+ return result; -+} ++ Backport from trunk r209711. ++ 2014-04-23 Richard Earnshaw + -+int __attribute__ ((noinline)) -+test_vdupb_lane_s8 () -+{ -+ int8x8_t a; -+ int8_t b; -+ int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; ++ * aarch64.md (_rol3): New pattern. ++ (_rolsi3_uxtw): Likewise. ++ * aarch64.c (aarch64_strip_shift): Handle ROTATE and ROTATERT. + -+ a = vld1_s8 (c); -+ b = wrap_vdupb_lane_s8_0 (a, a); -+ if (c[0] != b) -+ return 1; -+ b = wrap_vdupb_lane_s8_1 (a); -+ if (c[1] != b) -+ return 1; ++2014-05-23 Yvan Roux + -+ return 0; -+} ++ Backport from trunk r209710. ++ 2014-04-23 James Greenhalgh + -+uint8_t __attribute__ ((noinline)) -+wrap_vdupb_lane_u8_0 (uint8x8_t dummy, uint8x8_t a) -+{ -+ uint8_t result = vdupb_lane_u8 (a, 0); -+ force_simd (result); -+ return result; -+} ++ * config/arm/arm.c (arm_cortex_a57_tune): Initialize all fields. ++ (arm_cortex_a12_tune): Likewise. + -+uint8_t __attribute__ ((noinline)) -+wrap_vdupb_lane_u8_1 (uint8x8_t a) -+{ -+ uint8_t result = vdupb_lane_u8 (a, 1); -+ force_simd (result); -+ return result; -+} ++2014-05-23 Yvan Roux + -+int __attribute__ ((noinline)) -+test_vdupb_lane_u8 () -+{ -+ uint8x8_t a; -+ uint8_t b; -+ uint8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; ++ Backport from trunk r209706. ++ 2014-04-23 Kyrylo Tkachov + -+ a = vld1_u8 (c); -+ b = wrap_vdupb_lane_u8_0 (a, a); -+ if (c[0] != b) -+ return 1; -+ b = wrap_vdupb_lane_u8_1 (a); -+ if (c[1] != b) -+ return 1; -+ return 0; -+} ++ * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle BSWAP. + -+int16_t __attribute__ ((noinline)) -+wrap_vduph_lane_s16_0 (int16x4_t dummy, int16x4_t a) -+{ -+ int16_t result = vduph_lane_s16 (a, 0); -+ force_simd (result); -+ return result; -+} ++2014-05-23 Yvan Roux + -+int16_t __attribute__ ((noinline)) -+wrap_vduph_lane_s16_1 (int16x4_t a) -+{ -+ int16_t result = vduph_lane_s16 (a, 1); -+ force_simd (result); -+ return result; -+} ++ Backport from trunk r209701, 209702, 209703, 209704, 209705. ++ 2014-04-23 Kyrylo Tkachov + -+int __attribute__ ((noinline)) -+test_vduph_lane_s16 () -+{ -+ int16x4_t a; -+ int16_t b; -+ int16_t c[4] = { 0, 1, 2, 3 }; ++ * config/arm/arm.md (arm_rev16si2): New pattern. ++ (arm_rev16si2_alt): Likewise. ++ * config/arm/arm.c (arm_new_rtx_costs): Handle rev16 case. + -+ a = vld1_s16 (c); -+ b = wrap_vduph_lane_s16_0 (a, a); -+ if (c[0] != b) -+ return 1; -+ b = wrap_vduph_lane_s16_1 (a); -+ if (c[1] != b) -+ return 1; -+ return 0; -+} ++ 2014-04-23 Kyrylo Tkachov ++ * config/aarch64/aarch64.md (rev162): New pattern. ++ (rev162_alt): Likewise. ++ * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle rev16 case. ++ * config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): New. ++ (aarch_rev16_shleft_mask_imm_p): Likewise. ++ (aarch_rev16_p_1): Likewise. ++ (aarch_rev16_p): Likewise. ++ * config/arm/aarch-common-protos.h (aarch_rev16_p): Declare extern. ++ (aarch_rev16_shright_mask_imm_p): Likewise. ++ (aarch_rev16_shleft_mask_imm_p): Likewise. + -+uint16_t __attribute__ ((noinline)) -+wrap_vduph_lane_u16_0 (uint16x4_t dummy, uint16x4_t a) -+{ -+ uint16_t result = vduph_lane_u16 (a, 0); -+ force_simd (result); -+ return result; -+} ++ 2014-04-23 Kyrylo Tkachov + -+uint16_t __attribute__ ((noinline)) -+wrap_vduph_lane_u16_1 (uint16x4_t a) -+{ -+ uint16_t result = vduph_lane_u16 (a, 1); -+ force_simd (result); -+ return result; -+} ++ * config/arm/aarch-common-protos.h (alu_cost_table): Add rev field. ++ * config/arm/aarch-cost-tables.h (generic_extra_costs): Specify ++ rev cost. ++ (cortex_a53_extra_costs): Likewise. ++ (cortex_a57_extra_costs): Likewise. ++ * config/arm/arm.c (cortexa9_extra_costs): Likewise. ++ (cortexa7_extra_costs): Likewise. ++ (cortexa8_extra_costs): Likewise. ++ (cortexa12_extra_costs): Likewise. ++ (cortexa15_extra_costs): Likewise. ++ (v7m_extra_costs): Likewise. ++ (arm_new_rtx_costs): Handle BSWAP. + -+int __attribute__ ((noinline)) -+test_vduph_lane_u16 () -+{ -+ uint16x4_t a; -+ uint16_t b; -+ uint16_t c[4] = { 0, 1, 2, 3 }; ++ 2013-04-23 Kyrylo Tkachov + -+ a = vld1_u16 (c); -+ b = wrap_vduph_lane_u16_0 (a, a); -+ if (c[0] != b) -+ return 1; -+ b = wrap_vduph_lane_u16_1 (a); -+ if (c[1] != b) -+ return 1; -+ return 0; -+} ++ * config/arm/arm.c (cortexa8_extra_costs): New table. ++ (arm_cortex_a8_tune): New tuning struct. ++ * config/arm/arm-cores.def (cortex-a8): Use cortex_a8 tuning struct. + -+int32_t __attribute__ ((noinline)) -+wrap_vdups_lane_s32_0 (int32x2_t dummy, int32x2_t a) -+{ -+ int32_t result = vdups_lane_s32 (a, 0); -+ force_simd (result); -+ return result; -+} ++ 2014-04-23 Kyrylo Tkachov + -+int32_t __attribute__ ((noinline)) -+wrap_vdups_lane_s32_1 (int32x2_t a) -+{ -+ int32_t result = vdups_lane_s32 (a, 1); -+ force_simd (result); -+ return result; -+} ++ * config/arm/arm.c (arm_new_rtx_costs): Handle FMA. + -+int __attribute__ ((noinline)) -+test_vdups_lane_s32 () -+{ -+ int32x2_t a; -+ int32_t b; -+ int32_t c[2] = { 0, 1 }; ++2014-05-23 Yvan Roux + -+ a = vld1_s32 (c); -+ b = wrap_vdups_lane_s32_0 (vcreate_s32 (0), a); -+ if (c[0] != b) -+ return 1; -+ b = wrap_vdups_lane_s32_1 (a); -+ if (c[1] != b) -+ return 1; -+ return 0; -+} ++ Backport from trunk r209659. ++ 2014-04-22 Richard Henderson + -+uint32_t __attribute__ ((noinline)) -+wrap_vdups_lane_u32_0 (uint32x2_t dummy, uint32x2_t a) -+{ -+ uint32_t result = vdups_lane_u32 (a, 0); -+ force_simd (result); -+ return result; -+} -+ -+uint32_t __attribute__ ((noinline)) -+wrap_vdups_lane_u32_1 (uint32x2_t a) -+{ -+ uint32_t result = vdups_lane_u32 (a, 1); -+ force_simd (result); -+ return result; -+} -+ -+int __attribute__ ((noinline)) -+test_vdups_lane_u32 () -+{ -+ uint32x2_t a; -+ uint32_t b; -+ uint32_t c[2] = { 0, 1 }; -+ a = vld1_u32 (c); -+ b = wrap_vdups_lane_u32_0 (a, a); -+ if (c[0] != b) -+ return 1; -+ b = wrap_vdups_lane_u32_1 (a); -+ if (c[1] != b) -+ return 1; -+ return 0; -+} -+ -+uint64_t __attribute__ ((noinline)) -+wrap_vdupd_lane_u64_0 (uint64x1_t dummy, uint64x1_t a) -+{ -+ return vdupd_lane_u64 (a, 0);; -+} -+ -+int __attribute__ ((noinline)) -+test_vdupd_lane_u64 () -+{ -+ uint64x1_t a; -+ uint64_t b; -+ uint64_t c[1] = { 0 }; -+ -+ a = vld1_u64 (c); -+ b = wrap_vdupd_lane_u64_0 (a, a); -+ if (c[0] != b) -+ return 1; -+ return 0; -+} -+ -+int64_t __attribute__ ((noinline)) -+wrap_vdupd_lane_s64_0 (uint64x1_t dummy, int64x1_t a) -+{ -+ return vdupd_lane_u64 (a, 0); -+} -+ -+int __attribute__ ((noinline)) -+test_vdupd_lane_s64 () -+{ -+ int64x1_t a; -+ int64_t b; -+ int64_t c[1] = { 0 }; -+ -+ a = vld1_s64 (c); -+ b = wrap_vdupd_lane_s64_0 (a, a); -+ if (c[0] != b) -+ return 1; -+ return 0; -+} -+ -+int -+main () -+{ -+ if (test_vdups_lane_f32 ()) -+ abort (); -+ if (test_vdupd_lane_f64 ()) -+ abort (); -+ if (test_vdupb_lane_s8 ()) -+ abort (); -+ if (test_vdupb_lane_u8 ()) -+ abort (); -+ if (test_vduph_lane_s16 ()) -+ abort (); -+ if (test_vduph_lane_u16 ()) -+ abort (); -+ if (test_vdups_lane_s32 ()) -+ abort (); -+ if (test_vdups_lane_u32 ()) -+ abort (); -+ if (test_vdupd_lane_s64 ()) -+ abort (); -+ if (test_vdupd_lane_u64 ()) -+ abort (); -+ return 0; -+} -+ -+/* Asm check for vdupb_lane_s8, vdupb_lane_u8. */ -+/* { dg-final { scan-assembler-not "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[0\\\]" } } */ -+/* { dg-final { scan-assembler-times "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[1\\\]" 2 } } */ -+ -+/* Asm check for vduph_lane_h16, vduph_lane_h16. */ -+/* { dg-final { scan-assembler-not "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[0\\\]" } } */ -+/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[1\\\]" 2 } } */ -+ -+/* Asm check for vdups_lane_f32, vdups_lane_s32, vdups_lane_u32. */ -+/* Can't generate "dup s, v[0]" for vdups_lane_s32 and vdups_lane_u32. */ -+/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[0\\\]" 1} } */ -+/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[1\\\]" 3 } } */ -+ -+/* Asm check for vdupd_lane_f64, vdupd_lane_s64, vdupd_lane_u64. */ -+/* Attempts to make the compiler generate vdupd are not practical. */ -+/* { dg-final { scan-assembler-not "dup\\td\[0-9\]+, v\[0-9\]+\.d\\\[0\\\]" } } -+ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/vqabs_s64_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vqabs_s64_1.c -@@ -0,0 +1,54 @@ -+/* Test vqabs_s64 intrinsics work correctly. */ -+/* { dg-do run } */ -+/* { dg-options "--save-temps" } */ ++ * config/aarch64/aarch64 (addti3, subti3): New expanders. ++ (add3_compare0): Remove leading * from name. ++ (add3_carryin): Likewise. ++ (sub3_compare0): Likewise. ++ (sub3_carryin): Likewise. ++ (mulditi3): New expander. ++ (multi3): New expander. ++ (madd): Remove leading * from name. + -+#include ++2014-05-23 Yvan Roux + -+extern void abort (void); ++ Backport from trunk r209645. ++ 2014-04-22 Andrew Pinski + -+int __attribute__ ((noinline)) -+test_vqabs_s64 (int64x1_t passed, int64_t expected) -+{ -+ return vget_lane_s64 (vqabs_s64 (passed), 0) != expected; -+} ++ * config/aarch64/aarch64.c (aarch64_load_symref_appropriately): ++ Handle TLS for ILP32. ++ * config/aarch64/aarch64.md (tlsie_small): Rename to ... ++ (tlsie_small_): this and handle PTR. ++ (tlsie_small_sidi): New pattern. ++ (tlsle_small): Change to an expand to handle ILP32. ++ (tlsle_small_): New pattern. ++ (tlsdesc_small): Rename to ... ++ (tlsdesc_small_): this and handle PTR. + -+int __attribute__ ((noinline)) -+test_vqabsd_s64 (int64_t passed, int64_t expected) -+{ -+ return vqabsd_s64 (passed) != expected; -+} ++2014-05-23 Yvan Roux + -+/* { dg-final { scan-assembler-times "sqabs\\td\[0-9\]+, d\[0-9\]+" 2 } } */ ++ Backport from trunk r209643. ++ 2014-04-22 Ramana Radhakrishnan + -+int -+main (int argc, char **argv) -+{ -+ /* Basic test. */ -+ if (test_vqabs_s64 (vcreate_s64 (-1), 1)) -+ abort (); -+ if (test_vqabsd_s64 (-1, 1)) -+ abort (); ++ * config/aarch64/aarch64.c (TARGET_FLAGS_REGNUM): Define. + -+ /* Getting absolute value of min int64_t. -+ Note, exact result cannot be represented in int64_t, -+ so max int64_t is expected. */ -+ if (test_vqabs_s64 (vcreate_s64 (0x8000000000000000), 0x7fffffffffffffff)) -+ abort (); -+ if (test_vqabsd_s64 (0x8000000000000000, 0x7fffffffffffffff)) -+ abort (); ++2014-05-23 Yvan Roux + -+ /* Another input that gets max int64_t. */ -+ if (test_vqabs_s64 (vcreate_s64 (0x8000000000000001), 0x7fffffffffffffff)) -+ abort (); -+ if (test_vqabsd_s64 (0x8000000000000001, 0x7fffffffffffffff)) -+ abort (); ++ Backport from trunk r209641, 209642. ++ 2014-04-22 Alex Velenko + -+ /* Checking that large positive numbers stay the same. */ -+ if (test_vqabs_s64 (vcreate_s64 (0x7fffffffffffffff), 0x7fffffffffffffff)) -+ abort (); -+ if (test_vqabsd_s64 (0x7fffffffffffffff, 0x7fffffffffffffff)) -+ abort (); ++ * config/aarch64/aarch64-builtins.c (TYPES_REINTERP): Removed. ++ (aarch64_types_signed_unsigned_qualifiers): Qualifier added. ++ (aarch64_types_signed_poly_qualifiers): Likewise. ++ (aarch64_types_unsigned_signed_qualifiers): Likewise. ++ (aarch64_types_poly_signed_qualifiers): Likewise. ++ (TYPES_REINTERP_SS): Type macro added. ++ (TYPES_REINTERP_SU): Likewise. ++ (TYPES_REINTERP_SP): Likewise. ++ (TYPES_REINTERP_US): Likewise. ++ (TYPES_REINTERP_PS): Likewise. ++ (aarch64_fold_builtin): New expression folding added. ++ * config/aarch64/aarch64-simd-builtins.def (REINTERP): ++ Declarations removed. ++ (REINTERP_SS): Declarations added. ++ (REINTERP_US): Likewise. ++ (REINTERP_PS): Likewise. ++ (REINTERP_SU): Likewise. ++ (REINTERP_SP): Likewise. ++ * config/aarch64/arm_neon.h (vreinterpret_p8_f64): Implemented. ++ (vreinterpretq_p8_f64): Likewise. ++ (vreinterpret_p16_f64): Likewise. ++ (vreinterpretq_p16_f64): Likewise. ++ (vreinterpret_f32_f64): Likewise. ++ (vreinterpretq_f32_f64): Likewise. ++ (vreinterpret_f64_f32): Likewise. ++ (vreinterpret_f64_p8): Likewise. ++ (vreinterpret_f64_p16): Likewise. ++ (vreinterpret_f64_s8): Likewise. ++ (vreinterpret_f64_s16): Likewise. ++ (vreinterpret_f64_s32): Likewise. ++ (vreinterpret_f64_s64): Likewise. ++ (vreinterpret_f64_u8): Likewise. ++ (vreinterpret_f64_u16): Likewise. ++ (vreinterpret_f64_u32): Likewise. ++ (vreinterpret_f64_u64): Likewise. ++ (vreinterpretq_f64_f32): Likewise. ++ (vreinterpretq_f64_p8): Likewise. ++ (vreinterpretq_f64_p16): Likewise. ++ (vreinterpretq_f64_s8): Likewise. ++ (vreinterpretq_f64_s16): Likewise. ++ (vreinterpretq_f64_s32): Likewise. ++ (vreinterpretq_f64_s64): Likewise. ++ (vreinterpretq_f64_u8): Likewise. ++ (vreinterpretq_f64_u16): Likewise. ++ (vreinterpretq_f64_u32): Likewise. ++ (vreinterpretq_f64_u64): Likewise. ++ (vreinterpret_s64_f64): Likewise. ++ (vreinterpretq_s64_f64): Likewise. ++ (vreinterpret_u64_f64): Likewise. ++ (vreinterpretq_u64_f64): Likewise. ++ (vreinterpret_s8_f64): Likewise. ++ (vreinterpretq_s8_f64): Likewise. ++ (vreinterpret_s16_f64): Likewise. ++ (vreinterpretq_s16_f64): Likewise. ++ (vreinterpret_s32_f64): Likewise. ++ (vreinterpretq_s32_f64): Likewise. ++ (vreinterpret_u8_f64): Likewise. ++ (vreinterpretq_u8_f64): Likewise. ++ (vreinterpret_u16_f64): Likewise. ++ (vreinterpretq_u16_f64): Likewise. ++ (vreinterpret_u32_f64): Likewise. ++ (vreinterpretq_u32_f64): Likewise. + -+ return 0; -+} -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/vreinterpret_f64_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vreinterpret_f64_1.c -@@ -0,0 +1,596 @@ -+/* Test vreinterpret_f64_* and vreinterpret_*_f64 intrinsics work correctly. */ -+/* { dg-do run } */ -+/* { dg-options "-O3" } */ ++ 2014-04-22 Alex Velenko + -+#include ++ * config/aarch64/aarch64/aarch64-builtins.c (TYPES_REINTERP): Removed. ++ * config/aarch64/aarch64/aarch64-simd-builtins.def (REINTERP): Removed. ++ (vreinterpret_p8_s8): Likewise. ++ * config/aarch64/aarch64/arm_neon.h (vreinterpret_p8_s8): Uses cast. ++ (vreinterpret_p8_s16): Likewise. ++ (vreinterpret_p8_s32): Likewise. ++ (vreinterpret_p8_s64): Likewise. ++ (vreinterpret_p8_f32): Likewise. ++ (vreinterpret_p8_u8): Likewise. ++ (vreinterpret_p8_u16): Likewise. ++ (vreinterpret_p8_u32): Likewise. ++ (vreinterpret_p8_u64): Likewise. ++ (vreinterpret_p8_p16): Likewise. ++ (vreinterpretq_p8_s8): Likewise. ++ (vreinterpretq_p8_s16): Likewise. ++ (vreinterpretq_p8_s32): Likewise. ++ (vreinterpretq_p8_s64): Likewise. ++ (vreinterpretq_p8_f32): Likewise. ++ (vreinterpretq_p8_u8): Likewise. ++ (vreinterpretq_p8_u16): Likewise. ++ (vreinterpretq_p8_u32): Likewise. ++ (vreinterpretq_p8_u64): Likewise. ++ (vreinterpretq_p8_p16): Likewise. ++ (vreinterpret_p16_s8): Likewise. ++ (vreinterpret_p16_s16): Likewise. ++ (vreinterpret_p16_s32): Likewise. ++ (vreinterpret_p16_s64): Likewise. ++ (vreinterpret_p16_f32): Likewise. ++ (vreinterpret_p16_u8): Likewise. ++ (vreinterpret_p16_u16): Likewise. ++ (vreinterpret_p16_u32): Likewise. ++ (vreinterpret_p16_u64): Likewise. ++ (vreinterpret_p16_p8): Likewise. ++ (vreinterpretq_p16_s8): Likewise. ++ (vreinterpretq_p16_s16): Likewise. ++ (vreinterpretq_p16_s32): Likewise. ++ (vreinterpretq_p16_s64): Likewise. ++ (vreinterpretq_p16_f32): Likewise. ++ (vreinterpretq_p16_u8): Likewise. ++ (vreinterpretq_p16_u16): Likewise. ++ (vreinterpretq_p16_u32): Likewise. ++ (vreinterpretq_p16_u64): Likewise. ++ (vreinterpretq_p16_p8): Likewise. ++ (vreinterpret_f32_s8): Likewise. ++ (vreinterpret_f32_s16): Likewise. ++ (vreinterpret_f32_s32): Likewise. ++ (vreinterpret_f32_s64): Likewise. ++ (vreinterpret_f32_u8): Likewise. ++ (vreinterpret_f32_u16): Likewise. ++ (vreinterpret_f32_u32): Likewise. ++ (vreinterpret_f32_u64): Likewise. ++ (vreinterpret_f32_p8): Likewise. ++ (vreinterpret_f32_p16): Likewise. ++ (vreinterpretq_f32_s8): Likewise. ++ (vreinterpretq_f32_s16): Likewise. ++ (vreinterpretq_f32_s32): Likewise. ++ (vreinterpretq_f32_s64): Likewise. ++ (vreinterpretq_f32_u8): Likewise. ++ (vreinterpretq_f32_u16): Likewise. ++ (vreinterpretq_f32_u32): Likewise. ++ (vreinterpretq_f32_u64): Likewise. ++ (vreinterpretq_f32_p8): Likewise. ++ (vreinterpretq_f32_p16): Likewise. ++ (vreinterpret_s64_s8): Likewise. ++ (vreinterpret_s64_s16): Likewise. ++ (vreinterpret_s64_s32): Likewise. ++ (vreinterpret_s64_f32): Likewise. ++ (vreinterpret_s64_u8): Likewise. ++ (vreinterpret_s64_u16): Likewise. ++ (vreinterpret_s64_u32): Likewise. ++ (vreinterpret_s64_u64): Likewise. ++ (vreinterpret_s64_p8): Likewise. ++ (vreinterpret_s64_p16): Likewise. ++ (vreinterpretq_s64_s8): Likewise. ++ (vreinterpretq_s64_s16): Likewise. ++ (vreinterpretq_s64_s32): Likewise. ++ (vreinterpretq_s64_f32): Likewise. ++ (vreinterpretq_s64_u8): Likewise. ++ (vreinterpretq_s64_u16): Likewise. ++ (vreinterpretq_s64_u32): Likewise. ++ (vreinterpretq_s64_u64): Likewise. ++ (vreinterpretq_s64_p8): Likewise. ++ (vreinterpretq_s64_p16): Likewise. ++ (vreinterpret_u64_s8): Likewise. ++ (vreinterpret_u64_s16): Likewise. ++ (vreinterpret_u64_s32): Likewise. ++ (vreinterpret_u64_s64): Likewise. ++ (vreinterpret_u64_f32): Likewise. ++ (vreinterpret_u64_u8): Likewise. ++ (vreinterpret_u64_u16): Likewise. ++ (vreinterpret_u64_u32): Likewise. ++ (vreinterpret_u64_p8): Likewise. ++ (vreinterpret_u64_p16): Likewise. ++ (vreinterpretq_u64_s8): Likewise. ++ (vreinterpretq_u64_s16): Likewise. ++ (vreinterpretq_u64_s32): Likewise. ++ (vreinterpretq_u64_s64): Likewise. ++ (vreinterpretq_u64_f32): Likewise. ++ (vreinterpretq_u64_u8): Likewise. ++ (vreinterpretq_u64_u16): Likewise. ++ (vreinterpretq_u64_u32): Likewise. ++ (vreinterpretq_u64_p8): Likewise. ++ (vreinterpretq_u64_p16): Likewise. ++ (vreinterpret_s8_s16): Likewise. ++ (vreinterpret_s8_s32): Likewise. ++ (vreinterpret_s8_s64): Likewise. ++ (vreinterpret_s8_f32): Likewise. ++ (vreinterpret_s8_u8): Likewise. ++ (vreinterpret_s8_u16): Likewise. ++ (vreinterpret_s8_u32): Likewise. ++ (vreinterpret_s8_u64): Likewise. ++ (vreinterpret_s8_p8): Likewise. ++ (vreinterpret_s8_p16): Likewise. ++ (vreinterpretq_s8_s16): Likewise. ++ (vreinterpretq_s8_s32): Likewise. ++ (vreinterpretq_s8_s64): Likewise. ++ (vreinterpretq_s8_f32): Likewise. ++ (vreinterpretq_s8_u8): Likewise. ++ (vreinterpretq_s8_u16): Likewise. ++ (vreinterpretq_s8_u32): Likewise. ++ (vreinterpretq_s8_u64): Likewise. ++ (vreinterpretq_s8_p8): Likewise. ++ (vreinterpretq_s8_p16): Likewise. ++ (vreinterpret_s16_s8): Likewise. ++ (vreinterpret_s16_s32): Likewise. ++ (vreinterpret_s16_s64): Likewise. ++ (vreinterpret_s16_f32): Likewise. ++ (vreinterpret_s16_u8): Likewise. ++ (vreinterpret_s16_u16): Likewise. ++ (vreinterpret_s16_u32): Likewise. ++ (vreinterpret_s16_u64): Likewise. ++ (vreinterpret_s16_p8): Likewise. ++ (vreinterpret_s16_p16): Likewise. ++ (vreinterpretq_s16_s8): Likewise. ++ (vreinterpretq_s16_s32): Likewise. ++ (vreinterpretq_s16_s64): Likewise. ++ (vreinterpretq_s16_f32): Likewise. ++ (vreinterpretq_s16_u8): Likewise. ++ (vreinterpretq_s16_u16): Likewise. ++ (vreinterpretq_s16_u32): Likewise. ++ (vreinterpretq_s16_u64): Likewise. ++ (vreinterpretq_s16_p8): Likewise. ++ (vreinterpretq_s16_p16): Likewise. ++ (vreinterpret_s32_s8): Likewise. ++ (vreinterpret_s32_s16): Likewise. ++ (vreinterpret_s32_s64): Likewise. ++ (vreinterpret_s32_f32): Likewise. ++ (vreinterpret_s32_u8): Likewise. ++ (vreinterpret_s32_u16): Likewise. ++ (vreinterpret_s32_u32): Likewise. ++ (vreinterpret_s32_u64): Likewise. ++ (vreinterpret_s32_p8): Likewise. ++ (vreinterpret_s32_p16): Likewise. ++ (vreinterpretq_s32_s8): Likewise. ++ (vreinterpretq_s32_s16): Likewise. ++ (vreinterpretq_s32_s64): Likewise. ++ (vreinterpretq_s32_f32): Likewise. ++ (vreinterpretq_s32_u8): Likewise. ++ (vreinterpretq_s32_u16): Likewise. ++ (vreinterpretq_s32_u32): Likewise. ++ (vreinterpretq_s32_u64): Likewise. ++ (vreinterpretq_s32_p8): Likewise. ++ (vreinterpretq_s32_p16): Likewise. ++ (vreinterpret_u8_s8): Likewise. ++ (vreinterpret_u8_s16): Likewise. ++ (vreinterpret_u8_s32): Likewise. ++ (vreinterpret_u8_s64): Likewise. ++ (vreinterpret_u8_f32): Likewise. ++ (vreinterpret_u8_u16): Likewise. ++ (vreinterpret_u8_u32): Likewise. ++ (vreinterpret_u8_u64): Likewise. ++ (vreinterpret_u8_p8): Likewise. ++ (vreinterpret_u8_p16): Likewise. ++ (vreinterpretq_u8_s8): Likewise. ++ (vreinterpretq_u8_s16): Likewise. ++ (vreinterpretq_u8_s32): Likewise. ++ (vreinterpretq_u8_s64): Likewise. ++ (vreinterpretq_u8_f32): Likewise. ++ (vreinterpretq_u8_u16): Likewise. ++ (vreinterpretq_u8_u32): Likewise. ++ (vreinterpretq_u8_u64): Likewise. ++ (vreinterpretq_u8_p8): Likewise. ++ (vreinterpretq_u8_p16): Likewise. ++ (vreinterpret_u16_s8): Likewise. ++ (vreinterpret_u16_s16): Likewise. ++ (vreinterpret_u16_s32): Likewise. ++ (vreinterpret_u16_s64): Likewise. ++ (vreinterpret_u16_f32): Likewise. ++ (vreinterpret_u16_u8): Likewise. ++ (vreinterpret_u16_u32): Likewise. ++ (vreinterpret_u16_u64): Likewise. ++ (vreinterpret_u16_p8): Likewise. ++ (vreinterpret_u16_p16): Likewise. ++ (vreinterpretq_u16_s8): Likewise. ++ (vreinterpretq_u16_s16): Likewise. ++ (vreinterpretq_u16_s32): Likewise. ++ (vreinterpretq_u16_s64): Likewise. ++ (vreinterpretq_u16_f32): Likewise. ++ (vreinterpretq_u16_u8): Likewise. ++ (vreinterpretq_u16_u32): Likewise. ++ (vreinterpretq_u16_u64): Likewise. ++ (vreinterpretq_u16_p8): Likewise. ++ (vreinterpretq_u16_p16): Likewise. ++ (vreinterpret_u32_s8): Likewise. ++ (vreinterpret_u32_s16): Likewise. ++ (vreinterpret_u32_s32): Likewise. ++ (vreinterpret_u32_s64): Likewise. ++ (vreinterpret_u32_f32): Likewise. ++ (vreinterpret_u32_u8): Likewise. ++ (vreinterpret_u32_u16): Likewise. ++ (vreinterpret_u32_u64): Likewise. ++ (vreinterpret_u32_p8): Likewise. ++ (vreinterpret_u32_p16): Likewise. ++ (vreinterpretq_u32_s8): Likewise. ++ (vreinterpretq_u32_s16): Likewise. ++ (vreinterpretq_u32_s32): Likewise. ++ (vreinterpretq_u32_s64): Likewise. ++ (vreinterpretq_u32_f32): Likewise. ++ (vreinterpretq_u32_u8): Likewise. ++ (vreinterpretq_u32_u16): Likewise. ++ (vreinterpretq_u32_u64): Likewise. ++ (vreinterpretq_u32_p8): Likewise. ++ (vreinterpretq_u32_p16): Likewise. + -+extern void abort (void); ++2014-05-23 Yvan Roux + -+#define ABS(a) __builtin_fabs (a) -+#define ISNAN(a) __builtin_isnan (a) ++ Backport from trunk r209640. ++ 2014-04-22 Alex Velenko + -+#define DOUBLE_EQUALS(a, b, epsilon) \ -+( \ -+ ((a) == (b)) \ -+ || (ISNAN (a) && ISNAN (b)) \ -+ || (ABS (a - b) < epsilon) \ -+) ++ * gcc/config/aarch64/aarch64-simd.md (aarch64_s): ++ Pattern extended. ++ * config/aarch64/aarch64-simd-builtins.def (sqneg): Iterator ++ extended. ++ (sqabs): Likewise. ++ * config/aarch64/arm_neon.h (vqneg_s64): New intrinsic. ++ (vqnegd_s64): Likewise. ++ (vqabs_s64): Likewise. ++ (vqabsd_s64): Likewise. + -+/* Pi accurate up to 16 digits. -+ Further digits are a closest binary approximation. */ -+#define PI_F64 3.14159265358979311599796346854 -+/* Hex representation in Double (IEEE754 Double precision 64-bit) is: -+ 0x400921FB54442D18. */ ++2014-05-23 Yvan Roux + -+/* E accurate up to 16 digits. -+ Further digits are a closest binary approximation. */ -+#define E_F64 2.71828182845904509079559829843 -+/* Hex representation in Double (IEEE754 Double precision 64-bit) is: -+ 0x4005BF0A8B145769. */ ++ Backport from trunk r209627, 209636. ++ 2014-04-22 Renlin ++ Jiong Wang + -+float32x2_t __attribute__ ((noinline)) -+wrap_vreinterpret_f32_f64 (float64x1_t __a) -+{ -+ return vreinterpret_f32_f64 (__a); -+} ++ * config/aarch64/aarch64.h (aarch64_frame): Delete "fp_lr_offset". ++ * config/aarch64/aarch64.c (aarch64_layout_frame) ++ (aarch64_initial_elimination_offset): Likewise. + -+int __attribute__ ((noinline)) -+test_vreinterpret_f32_f64 () -+{ -+ float64x1_t a; -+ float32x2_t b; -+ float64_t c[1] = { PI_F64 }; -+ /* Values { 0x54442D18, 0x400921FB } reinterpreted as f32. */ -+ float32_t d[2] = { 3.3702805504E12, 2.1426990032196044921875E0 }; -+ float32_t e[2]; -+ int i; ++ 2014-04-22 Marcus Shawcroft + -+ a = vld1_f64 (c); -+ b = wrap_vreinterpret_f32_f64 (a); -+ vst1_f32 (e, b); -+ for (i = 0; i < 2; i++) -+ if (!DOUBLE_EQUALS (d[i], e[i], __FLT_EPSILON__)) -+ return 1; -+ return 0; -+}; ++ * config/aarch64/aarch64.c (aarch64_initial_elimination_offset): ++ Fix indentation. + -+int8x8_t __attribute__ ((noinline)) -+wrap_vreinterpret_s8_f64 (float64x1_t __a) -+{ -+ return vreinterpret_s8_f64 (__a); -+} ++2014-05-23 Yvan Roux + -+int __attribute__ ((noinline)) -+test_vreinterpret_s8_f64 () -+{ -+ float64x1_t a; -+ int8x8_t b; -+ float64_t c[1] = { PI_F64 }; -+ int8_t d[8] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40 }; -+ int8_t e[8]; -+ int i; ++ Backport from trunk r209618. ++ 2014-04-22 Renlin Li + -+ a = vld1_f64 (c); -+ b = wrap_vreinterpret_s8_f64 (a); -+ vst1_s8 (e, b); -+ for (i = 0; i < 8; i++) -+ if (d[i] != e[i]) -+ return 1; -+ return 0; -+}; ++ * config/aarch64/aarch64.c (aarch64_print_operand_address): Adjust ++ the output asm format. + -+int16x4_t __attribute__ ((noinline)) -+wrap_vreinterpret_s16_f64 (float64x1_t __a) -+{ -+ return vreinterpret_s16_f64 (__a); -+} ++2014-05-23 Yvan Roux + -+int __attribute__ ((noinline)) -+test_vreinterpret_s16_f64 () -+{ -+ float64x1_t a; -+ int16x4_t b; -+ float64_t c[1] = { PI_F64 }; -+ int16_t d[4] = { 0x2D18, 0x5444, 0x21FB, 0x4009 }; -+ int16_t e[4]; -+ int i; ++ Backport from trunk r209617. ++ 2014-04-22 James Greenhalgh + -+ a = vld1_f64 (c); -+ b = wrap_vreinterpret_s16_f64 (a); -+ vst1_s16 (e, b); -+ for (i = 0; i < 4; i++) -+ if (d[i] != e[i]) -+ return 1; -+ return 0; -+}; ++ * config/aarch64/aarch64-simd.md ++ (aarch64_cmdi): Always split. ++ (*aarch64_cmdi): New. ++ (aarch64_cmtstdi): Always split. ++ (*aarch64_cmtstdi): New. + -+int32x2_t __attribute__ ((noinline)) -+wrap_vreinterpret_s32_f64 (float64x1_t __a) -+{ -+ return vreinterpret_s32_f64 (__a); -+} ++2014-05-23 Yvan Roux + -+int __attribute__ ((noinline)) -+test_vreinterpret_s32_f64 () -+{ -+ float64x1_t a; -+ int32x2_t b; -+ float64_t c[1] = { PI_F64 }; -+ int32_t d[2] = { 0x54442D18, 0x400921FB }; -+ int32_t e[2]; -+ int i; ++ Backport from trunk r209615. ++ 2014-04-22 Ramana Radhakrishnan + -+ a = vld1_f64 (c); -+ b = wrap_vreinterpret_s32_f64 (a); -+ vst1_s32 (e, b); -+ for (i = 0; i < 2; i++) -+ if (d[i] != e[i]) -+ return 1; -+ return 0; -+}; ++ * config/arm/arm.c (arm_hard_regno_mode_ok): Loosen ++ restrictions on core registers for DImode values in Thumb2. + -+int64x1_t __attribute__ ((noinline)) -+wrap_vreinterpret_s64_f64 (float64x1_t __a) -+{ -+ return vreinterpret_s64_f64 (__a); -+} ++2014-05-23 Yvan Roux + -+int __attribute__ ((noinline)) -+test_vreinterpret_s64_f64 () -+{ -+ float64x1_t a; -+ int64x1_t b; -+ float64_t c[1] = { PI_F64 }; -+ int64_t d[1] = { 0x400921FB54442D18 }; -+ int64_t e[1]; -+ int i; ++ Backport from trunk r209613, r209614. ++ 2014-04-22 Ian Bolton + -+ a = vld1_f64 (c); -+ b = wrap_vreinterpret_s64_f64 (a); -+ vst1_s64 (e, b); -+ if (d[0] != e[0]) -+ return 1; -+ return 0; -+}; ++ * config/arm/arm.md (*anddi_notdi_zesidi): New pattern. ++ * config/arm/thumb2.md (*iordi_notdi_zesidi): New pattern. + -+float32x4_t __attribute__ ((noinline)) -+wrap_vreinterpretq_f32_f64 (float64x2_t __a) -+{ -+ return vreinterpretq_f32_f64 (__a); -+} ++ 2014-04-22 Ian Bolton + -+int __attribute__ ((noinline)) -+test_vreinterpretq_f32_f64 () -+{ -+ float64x2_t a; -+ float32x4_t b; -+ float64_t c[2] = { PI_F64, E_F64 }; ++ * config/arm/thumb2.md (*iordi_notdi_di): New pattern. ++ (*iordi_notzesidi_di): Likewise. ++ (*iordi_notsesidi_di): Likewise. + -+ /* Values corresponding to f32 reinterpret of -+ { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A }. */ -+ float32_t d[4] = { 3.3702805504E12, -+ 2.1426990032196044921875E0, -+ -2.8569523269651966444143014594E-32, -+ 2.089785099029541015625E0 }; -+ float32_t e[4]; -+ int i; ++2014-05-23 Yvan Roux + -+ a = vld1q_f64 (c); -+ b = wrap_vreinterpretq_f32_f64 (a); -+ vst1q_f32 (e, b); -+ for (i = 0; i < 4; i++) -+ { -+ if (!DOUBLE_EQUALS (d[i], e[i], __FLT_EPSILON__)) -+ return 1; -+ } -+ return 0; -+}; ++ Backport from trunk r209561. ++ 2014-04-22 Ian Bolton + -+int8x16_t __attribute__ ((noinline)) -+wrap_vreinterpretq_s8_f64 (float64x2_t __a) -+{ -+ return vreinterpretq_s8_f64 (__a); -+} ++ * config/arm/arm-protos.h (tune_params): New struct members. ++ * config/arm/arm.c: Initialise tune_params per processor. ++ (thumb2_reorg): Suppress conversion from t32 to t16 when optimizing ++ for speed, based on new tune_params. + -+int __attribute__ ((noinline)) -+test_vreinterpretq_s8_f64 () -+{ -+ float64x2_t a; -+ int8x16_t b; -+ float64_t c[2] = { PI_F64, E_F64 }; -+ int8_t d[16] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40, -+ 0x69, 0x57, 0x14, 0x8B, 0x0A, 0xBF, 0x05, 0x40 }; -+ int8_t e[16]; -+ int i; ++2014-05-23 Yvan Roux + -+ a = vld1q_f64 (c); -+ b = wrap_vreinterpretq_s8_f64 (a); -+ vst1q_s8 (e, b); -+ for (i = 0; i < 16; i++) -+ if (d[i] != e[i]) -+ return 1; -+ return 0; -+}; ++ Backport from trunk r209559. ++ 2014-04-22 Alex Velenko + -+int16x8_t __attribute__ ((noinline)) -+wrap_vreinterpretq_s16_f64 (float64x2_t __a) -+{ -+ return vreinterpretq_s16_f64 (__a); -+} ++ * config/aarch64/aarch64-builtins.c (BUILTIN_VDQF_DF): Macro ++ added. ++ * config/aarch64/aarch64-simd-builtins.def (frintn): Use added ++ macro. ++ * config/aarch64/aarch64-simd.md (): Comment ++ corrected. ++ * config/aarch64/aarch64.md (): Likewise. ++ * config/aarch64/arm_neon.h (vrnd_f64): Added. ++ (vrnda_f64): Likewise. ++ (vrndi_f64): Likewise. ++ (vrndm_f64): Likewise. ++ (vrndn_f64): Likewise. ++ (vrndp_f64): Likewise. ++ (vrndx_f64): Likewise. + -+int __attribute__ ((noinline)) -+test_vreinterpretq_s16_f64 () -+{ -+ float64x2_t a; -+ int16x8_t b; -+ float64_t c[2] = { PI_F64, E_F64 }; -+ int16_t d[8] = { 0x2D18, 0x5444, 0x21FB, 0x4009, -+ 0x5769, 0x8B14, 0xBF0A, 0x4005 }; -+ int16_t e[8]; -+ int i; ++2014-05-23 Yvan Roux + -+ a = vld1q_f64 (c); -+ b = wrap_vreinterpretq_s16_f64 (a); -+ vst1q_s16 (e, b); -+ for (i = 0; i < 8; i++) -+ if (d[i] != e[i]) -+ return 1; -+ return 0; -+}; ++ Backport from trunk r209419. ++ 2014-04-15 Kyrylo Tkachov + -+int32x4_t __attribute__ ((noinline)) -+wrap_vreinterpretq_s32_f64 (float64x2_t __a) -+{ -+ return vreinterpretq_s32_f64 (__a); -+} ++ PR rtl-optimization/60663 ++ * config/arm/arm.c (arm_new_rtx_costs): Improve ASM_OPERANDS case, ++ avoid 0 cost. + -+int __attribute__ ((noinline)) -+test_vreinterpretq_s32_f64 () -+{ -+ float64x2_t a; -+ int32x4_t b; -+ float64_t c[2] = { PI_F64, E_F64 }; -+ int32_t d[4] = { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A }; -+ int32_t e[4]; -+ int i; ++2014-05-23 Yvan Roux + -+ a = vld1q_f64 (c); -+ b = wrap_vreinterpretq_s32_f64 (a); -+ vst1q_s32 (e, b); -+ for (i = 0; i < 4; i++) -+ if (d[i] != e[i]) -+ return 1; -+ return 0; -+}; ++ Backport from trunk r209457. ++ 2014-04-16 Andrew Pinski + -+int64x2_t __attribute__ ((noinline)) -+wrap_vreinterpretq_s64_f64 (float64x2_t __a) -+{ -+ return vreinterpretq_s64_f64 (__a); -+} ++ * config/host-linux.c (TRY_EMPTY_VM_SPACE): Change aarch64 ilp32 ++ definition. + -+int __attribute__ ((noinline)) -+test_vreinterpretq_s64_f64 () -+{ -+ float64x2_t a; -+ int64x2_t b; -+ float64_t c[2] = { PI_F64, E_F64 }; -+ int64_t d[2] = { 0x400921FB54442D18, 0x4005BF0A8B145769 }; -+ int64_t e[2]; -+ int i; ++2014-05-19 Yvan Roux + -+ a = vld1q_f64 (c); -+ b = wrap_vreinterpretq_s64_f64 (a); -+ vst1q_s64 (e, b); -+ for (i = 0; i < 2; i++) -+ if (d[i] != e[i]) -+ return 1; -+ return 0; -+}; ++ * LINARO-VERSION: Bump version. + -+float64x1_t __attribute__ ((noinline)) -+wrap_vreinterpret_f64_f32 (float32x2_t __a) -+{ -+ return vreinterpret_f64_f32 (__a); -+} ++2014-05-14 Yvan Roux ++ GCC Linaro 4.9-2014.05 released. ++ * LINARO-VERSION: Update. + -+int __attribute__ ((noinline)) -+test_vreinterpret_f64_f32 () -+{ -+ float32x2_t a; -+ float64x1_t b; -+ /* Values { 0x54442D18, 0x400921FB } reinterpreted as f32. */ -+ float32_t c[2] = { 3.3702805504E12, 2.1426990032196044921875E0 }; -+ float64_t d[1] = { PI_F64 }; -+ float64_t e[1]; -+ int i; ++2014-05-13 Yvan Roux + -+ a = vld1_f32 (c); -+ b = wrap_vreinterpret_f64_f32 (a); -+ vst1_f64 (e, b); -+ if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__)) -+ return 1; -+ return 0; -+}; ++ Backport from trunk r209889. ++ 2014-04-29 Zhenqiang Chen + -+float64x1_t __attribute__ ((noinline)) -+wrap_vreinterpret_f64_s8 (int8x8_t __a) -+{ -+ return vreinterpret_f64_s8 (__a); -+} ++ * config/aarch64/aarch64.md (movcc): New for GPF. + -+int __attribute__ ((noinline)) -+test_vreinterpret_f64_s8 () -+{ -+ int8x8_t a; -+ float64x1_t b; -+ int8_t c[8] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40 }; -+ float64_t d[1] = { PI_F64 }; -+ float64_t e[1]; -+ int i; ++2014-05-13 Yvan Roux + -+ a = vld1_s8 (c); -+ b = wrap_vreinterpret_f64_s8 (a); -+ vst1_f64 (e, b); -+ if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__)) -+ return 1; -+ return 0; -+}; ++ Backport from trunk r209556. ++ 2014-04-22 Zhenqiang Chen + -+float64x1_t __attribute__ ((noinline)) -+wrap_vreinterpret_f64_s16 (int16x4_t __a) -+{ -+ return vreinterpret_f64_s16 (__a); -+} ++ * config/arm/arm.c (arm_print_operand, thumb_exit): Make sure ++ GET_MODE_SIZE argument is enum machine_mode. + -+int __attribute__ ((noinline)) -+test_vreinterpret_f64_s16 () -+{ -+ int16x4_t a; -+ float64x1_t b; -+ int16_t c[4] = { 0x2D18, 0x5444, 0x21FB, 0x4009 }; -+ float64_t d[1] = { PI_F64 }; -+ float64_t e[1]; -+ int i; ++2014-04-28 Yvan Roux + -+ a = vld1_s16 (c); -+ b = wrap_vreinterpret_f64_s16 (a); -+ vst1_f64 (e, b); -+ if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__)) -+ return 1; -+ return 0; -+}; ++ * LINARO-VERSION: Bump version. + -+float64x1_t __attribute__ ((noinline)) -+wrap_vreinterpret_f64_s32 (int32x2_t __a) -+{ -+ return vreinterpret_f64_s32 (__a); -+} ++2014-04-22 Yvan Roux + -+int __attribute__ ((noinline)) -+test_vreinterpret_f64_s32 () -+{ -+ int32x2_t a; -+ float64x1_t b; -+ int32_t c[2] = { 0x54442D18, 0x400921FB }; -+ float64_t d[1] = { PI_F64 }; -+ float64_t e[1]; -+ int i; ++ GCC Linaro 4.9-2014.04 released. ++ * LINARO-VERSION: New file. ++ * configure.ac: Add Linaro version string. +--- a/src/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c ++++ b/src/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c +@@ -0,0 +1,65 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -fno-inline --save-temps" } */ + -+ a = vld1_s32 (c); -+ b = wrap_vreinterpret_f64_s32 (a); -+ vst1_f64 (e, b); -+ if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__)) -+ return 1; -+ return 0; -+}; ++extern void abort (void); + -+float64x1_t __attribute__ ((noinline)) -+wrap_vreinterpret_f64_s64 (int64x1_t __a) ++typedef long long s64int; ++typedef int s32int; ++typedef unsigned long long u64int; ++typedef unsigned int u32int; ++ ++s64int ++iordi_di_notdi (s64int a, s64int b) +{ -+ return vreinterpret_f64_s64 (__a); ++ return (a | ~b); +} + -+int __attribute__ ((noinline)) -+test_vreinterpret_f64_s64 () ++s64int ++iordi_di_notzesidi (s64int a, u32int b) +{ -+ int64x1_t a; -+ float64x1_t b; -+ int64_t c[1] = { 0x400921FB54442D18 }; -+ float64_t d[1] = { PI_F64 }; -+ float64_t e[1]; ++ return (a | ~(u64int) b); ++} + -+ a = vld1_s64 (c); -+ b = wrap_vreinterpret_f64_s64 (a); -+ vst1_f64 (e, b); -+ if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__)) -+ return 1; -+ return 0; -+}; ++s64int ++iordi_notdi_zesidi (s64int a, u32int b) ++{ ++ return (~a | (u64int) b); ++} + -+float64x2_t __attribute__ ((noinline)) -+wrap_vreinterpretq_f64_f32 (float32x4_t __a) ++s64int ++iordi_di_notsesidi (s64int a, s32int b) +{ -+ return vreinterpretq_f64_f32 (__a); ++ return (a | ~(s64int) b); +} + -+int __attribute__ ((noinline)) -+test_vreinterpretq_f64_f32 () ++int main () +{ -+ float32x4_t a; -+ float64x2_t b; -+ /* Values corresponding to f32 reinterpret of -+ { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A }. */ -+ float32_t c[4] = { 3.3702805504E12, -+ 2.1426990032196044921875E0, -+ -2.8569523269651966444143014594E-32, -+ 2.089785099029541015625E0 }; ++ s64int a64 = 0xdeadbeef00000000ll; ++ s64int b64 = 0x000000004f4f0112ll; ++ s64int c64 = 0xdeadbeef000f0000ll; + -+ float64_t d[2] = { PI_F64, E_F64 }; -+ float64_t e[2]; -+ int i; ++ u32int c32 = 0x01124f4f; ++ s32int d32 = 0xabbaface; + -+ a = vld1q_f32 (c); -+ b = wrap_vreinterpretq_f64_f32 (a); -+ vst1q_f64 (e, b); -+ for (i = 0; i < 2; i++) -+ if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__)) -+ return 1; -+ return 0; -+}; ++ s64int z = iordi_di_notdi (a64, b64); ++ if (z != 0xffffffffb0b0feedll) ++ abort (); + -+float64x2_t __attribute__ ((noinline)) -+wrap_vreinterpretq_f64_s8 (int8x16_t __a) -+{ -+ return vreinterpretq_f64_s8 (__a); -+} ++ z = iordi_di_notzesidi (a64, c32); ++ if (z != 0xfffffffffeedb0b0ll) ++ abort (); + -+int __attribute__ ((noinline)) -+test_vreinterpretq_f64_s8 () -+{ -+ int8x16_t a; -+ float64x2_t b; -+ int8_t c[16] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40, -+ 0x69, 0x57, 0x14, 0x8B, 0x0A, 0xBF, 0x05, 0x40 }; -+ float64_t d[2] = { PI_F64, E_F64 }; -+ float64_t e[2]; -+ int i; ++ z = iordi_notdi_zesidi (c64, c32); ++ if (z != 0x21524110fff2ffffll) ++ abort (); ++ ++ z = iordi_di_notsesidi (a64, d32); ++ if (z != 0xdeadbeef54450531ll) ++ abort (); + -+ a = vld1q_s8 (c); -+ b = wrap_vreinterpretq_f64_s8 (a); -+ vst1q_f64 (e, b); -+ for (i = 0; i < 2; i++) -+ if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__)) -+ return 1; + return 0; -+}; -+ -+float64x2_t __attribute__ ((noinline)) -+wrap_vreinterpretq_f64_s16 (int16x8_t __a) -+{ -+ return vreinterpretq_f64_s16 (__a); +} + -+int __attribute__ ((noinline)) -+test_vreinterpretq_f64_s16 () -+{ -+ int16x8_t a; -+ float64x2_t b; -+ int16_t c[8] = { 0x2D18, 0x5444, 0x21FB, 0x4009, -+ 0x5769, 0x8B14, 0xBF0A, 0x4005 }; -+ float64_t d[2] = { PI_F64, E_F64 }; -+ float64_t e[2]; -+ int i; ++/* { dg-final { scan-assembler-times "orn\t" 6 { target arm_thumb2 } } } */ + -+ a = vld1q_s16 (c); -+ b = wrap_vreinterpretq_f64_s16 (a); -+ vst1q_f64 (e, b); -+ for (i = 0; i < 2; i++) -+ if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__)) -+ return 1; -+ return 0; -+}; ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vzips16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vzips16' ARM Neon intrinsic. */ + -+float64x2_t __attribute__ ((noinline)) -+wrap_vreinterpretq_f64_s32 (int32x4_t __a) -+{ -+ return vreinterpretq_f64_s32 (__a); -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int __attribute__ ((noinline)) -+test_vreinterpretq_f64_s32 () -+{ -+ int32x4_t a; -+ float64x2_t b; -+ int32_t c[4] = { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A }; -+ float64_t d[2] = { PI_F64, E_F64 }; -+ float64_t e[2]; -+ int i; ++#include "arm_neon.h" ++#include "../../aarch64/simd/vzips16.x" + -+ a = vld1q_s32 (c); -+ b = wrap_vreinterpretq_f64_s32 (a); -+ vst1q_f64 (e, b); -+ for (i = 0; i < 2; i++) -+ if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__)) -+ return 1; -+ return 0; -+}; ++/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vexts64_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts64_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vexts64' ARM Neon intrinsic. */ + -+float64x2_t __attribute__ ((noinline)) -+wrap_vreinterpretq_f64_s64 (int64x2_t __a) -+{ -+ return vreinterpretq_f64_s64 (__a); -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int __attribute__ ((noinline)) -+test_vreinterpretq_f64_s64 () -+{ -+ int64x2_t a; -+ float64x2_t b; -+ int64_t c[2] = { 0x400921FB54442D18, 0x4005BF0A8B145769 }; -+ float64_t d[2] = { PI_F64, E_F64 }; -+ float64_t e[2]; -+ int i; ++#include "arm_neon.h" ++#include "../../aarch64/simd/ext_s64.x" + -+ a = vld1q_s64 (c); -+ b = wrap_vreinterpretq_f64_s64 (a); -+ vst1q_f64 (e, b); -+ for (i = 0; i < 2; i++) -+ if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__)) -+ return 1; -+ return 0; -+}; ++/* Don't scan assembler for vext - it can be optimized into a move from r0. */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vtrns16' ARM Neon intrinsic. */ + -+int -+main (int argc, char **argv) -+{ -+ if (test_vreinterpret_f32_f64 ()) -+ abort (); ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+ if (test_vreinterpret_s8_f64 ()) -+ abort (); -+ if (test_vreinterpret_s16_f64 ()) -+ abort (); -+ if (test_vreinterpret_s32_f64 ()) -+ abort (); -+ if (test_vreinterpret_s64_f64 ()) -+ abort (); ++#include "arm_neon.h" ++#include "../../aarch64/simd/vtrns16.x" + -+ if (test_vreinterpretq_f32_f64 ()) -+ abort (); ++/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vzipu16' ARM Neon intrinsic. */ + -+ if (test_vreinterpretq_s8_f64 ()) -+ abort (); -+ if (test_vreinterpretq_s16_f64 ()) -+ abort (); -+ if (test_vreinterpretq_s32_f64 ()) -+ abort (); -+ if (test_vreinterpretq_s64_f64 ()) -+ abort (); ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+ if (test_vreinterpret_f64_f32 ()) -+ abort (); ++#include "arm_neon.h" ++#include "../../aarch64/simd/vzipu16.x" + -+ if (test_vreinterpret_f64_s8 ()) -+ abort (); -+ if (test_vreinterpret_f64_s16 ()) -+ abort (); -+ if (test_vreinterpret_f64_s32 ()) -+ abort (); -+ if (test_vreinterpret_f64_s64 ()) -+ abort (); ++/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vzipQs8' ARM Neon intrinsic. */ + -+ if (test_vreinterpretq_f64_f32 ()) -+ abort (); ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+ if (test_vreinterpretq_f64_s8 ()) -+ abort (); -+ if (test_vreinterpretq_f64_s16 ()) -+ abort (); -+ if (test_vreinterpretq_f64_s32 ()) -+ abort (); -+ if (test_vreinterpretq_f64_s64 ()) -+ abort (); ++#include "arm_neon.h" ++#include "../../aarch64/simd/vzipqs8.x" ++ ++/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vextQu8' ARM Neon intrinsic. */ + -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c -@@ -0,0 +1,105 @@ -+/* Test vrnd_f64 works correctly. */ +/* { dg-do run } */ -+/* { dg-options "--save-temps" } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" ++#include "../../aarch64/simd/extq_u8.x" + -+extern void abort (void); -+ -+/* Bit offset to round mode field in FPCR. */ -+#define RMODE_START 22 ++/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqf32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqf32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vtrnQf32' ARM Neon intrinsic. */ + -+#define FPROUNDING_ZERO 3 ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+/* Set RMODE field of FPCR control register -+ to rounding mode passed. */ -+void __inline __attribute__ ((__always_inline__)) -+set_rounding_mode (uint32_t mode) -+{ -+ uint32_t r; ++#include "arm_neon.h" ++#include "../../aarch64/simd/vtrnqf32.x" + -+ /* Read current FPCR. */ -+ asm volatile ("mrs %[r], fpcr" : [r] "=r" (r) : :); ++/* { dg-final { scan-assembler-times "vtrn\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vextu64_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu64_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vextu64' ARM Neon intrinsic. */ + -+ /* Clear rmode. */ -+ r &= ~(3 << RMODE_START); -+ /* Calculate desired FPCR. */ -+ r |= mode << RMODE_START; ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+ /* Write desired FPCR back. */ -+ asm volatile ("msr fpcr, %[r]" : : [r] "r" (r) :); -+} ++#include "arm_neon.h" ++#include "../../aarch64/simd/ext_u64.x" + -+float64x1_t __attribute__ ((noinline)) -+compare_f64 (float64x1_t passed, float64_t expected) -+{ -+ return (__builtin_fabs (vget_lane_f64 (passed, 0) - expected) -+ > __DBL_EPSILON__); -+} ++/* Don't scan assembler for vext - it can be optimized into a move from r0. */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vtrnQs8' ARM Neon intrinsic. */ + -+void __attribute__ ((noinline)) -+run_round_tests (float64x1_t *tests, -+ float64_t expectations[][6]) -+{ -+ int i; ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+ for (i = 0; i < 6; i++) -+ { -+ if (compare_f64 (vrnd_f64 (tests[i]), expectations[0][i])) -+ abort (); -+ if (compare_f64 (vrndx_f64 (tests[i]), expectations[1][i])) -+ abort (); -+ if (compare_f64 (vrndp_f64 (tests[i]), expectations[2][i])) -+ abort (); -+ if (compare_f64 (vrndn_f64 (tests[i]), expectations[3][i])) -+ abort (); -+ if (compare_f64 (vrndm_f64 (tests[i]), expectations[4][i])) -+ abort (); -+ if (compare_f64 (vrndi_f64 (tests[i]), expectations[5][i])) -+ abort (); -+ if (compare_f64 (vrnda_f64 (tests[i]), expectations[6][i])) -+ abort (); -+ } -+} ++#include "arm_neon.h" ++#include "../../aarch64/simd/vtrnqs8.x" + -+int -+main (int argc, char **argv) -+{ -+ float64x1_t tests[6] = -+ { -+ vcreate_f64 (0x3FE0000000000000), /* Hex for: 0.5. */ -+ vcreate_f64 (0x3FD999999999999A), /* Hex for: 0.4. */ -+ vcreate_f64 (0x3FE3333333333333), /* Hex for: 0.6. */ -+ vcreate_f64 (0xBFE0000000000000), /* Hex for: -0.5. */ -+ vcreate_f64 (0xBFD999999999999A), /* Hex for: -0.4. */ -+ vcreate_f64 (0xBFE3333333333333), /* Hex for: -0.6. */ -+ }; ++/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vtrnu16' ARM Neon intrinsic. */ + -+ float64_t expectations[7][6] = -+ { -+ { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, /* vrnd - round towards zero. */ -+ { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, /* vrndx - round using FPCR mode. */ -+ { 1.0, 1.0, 1.0, 0.0, 0.0, 0.0 }, /* vrndp - round to plus infinity. */ -+ { 0.0, 0.0, 1.0, 0.0, 0.0, -1.0 }, /* vrndn - round ties to even. */ -+ { 0.0, 0.0, 0.0, -1.0, -1.0, -1.0 }, /* vrndm - round to minus infinity. */ -+ { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, /* vrndi - round using FPCR mode. */ -+ { 1.0, 0.0, 1.0, -1.0, 0.0, -1.0 }, /* vrnda - round ties away from 0. */ -+ }; ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+ /* Set floating point control register -+ to have predictable vrndx and vrndi behaviour. */ -+ set_rounding_mode (FPROUNDING_ZERO); ++#include "arm_neon.h" ++#include "../../aarch64/simd/vtrnu16.x" + -+ run_round_tests (tests, expectations); ++/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vuzpQp8' ARM Neon intrinsic. */ + -+ return 0; -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+/* { dg-final { scan-assembler-times "frintz\\td\[0-9\]+, d\[0-9\]+" 1 } } */ -+/* { dg-final { scan-assembler-times "frintx\\td\[0-9\]+, d\[0-9\]+" 1 } } */ -+/* { dg-final { scan-assembler-times "frintp\\td\[0-9\]+, d\[0-9\]+" 1 } } */ -+/* { dg-final { scan-assembler-times "frintn\\td\[0-9\]+, d\[0-9\]+" 1 } } */ -+/* { dg-final { scan-assembler-times "frintm\\td\[0-9\]+, d\[0-9\]+" 1 } } */ -+/* { dg-final { scan-assembler-times "frinti\\td\[0-9\]+, d\[0-9\]+" 1 } } */ -+/* { dg-final { scan-assembler-times "frinta\\td\[0-9\]+, d\[0-9\]+" 1 } } */ ++#include "arm_neon.h" ++#include "../../aarch64/simd/vuzpqp8.x" ++ ++/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzip_p16' AArch64 SIMD intrinsic. */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vextu8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vextu8' ARM Neon intrinsic. */ + +/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+#include -+#include "vzipp16.x" ++#include "arm_neon.h" ++#include "../../aarch64/simd/ext_u8.x" + -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzip_s32' AArch64 SIMD intrinsic. */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vtrnQs16' ARM Neon intrinsic. */ + +/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+#include -+#include "vzips32.x" ++#include "arm_neon.h" ++#include "../../aarch64/simd/vtrnqs16.x" + -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzip_s16' AArch64 SIMD intrinsic. */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs64_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs64_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vextQs64' ARM Neon intrinsic. */ + +/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+#include -+#include "vzips16.x" ++#include "arm_neon.h" ++#include "../../aarch64/simd/extq_s64.x" + -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "vext\.64\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzip_u32' AArch64 SIMD intrinsic. */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vtrns8' ARM Neon intrinsic. */ + +/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+#include -+#include "vzipu32.x" ++#include "arm_neon.h" ++#include "../../aarch64/simd/vtrns8.x" + -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzipq_s8' AArch64 SIMD intrinsic. */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vtrnQu16' ARM Neon intrinsic. */ + +/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+#include -+#include "vzipqs8.x" ++#include "arm_neon.h" ++#include "../../aarch64/simd/vtrnqu16.x" + -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzip_u16' AArch64 SIMD intrinsic. */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu64_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu64_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vextQu64' ARM Neon intrinsic. */ + +/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+#include -+#include "vzipu16.x" ++#include "arm_neon.h" ++#include "../../aarch64/simd/extq_u64.x" + -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "vext\.64\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzipq_u8' AArch64 SIMD intrinsic. */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vuzpQp16' ARM Neon intrinsic. */ + +/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+#include -+#include "vzipqu8.x" ++#include "arm_neon.h" ++#include "../../aarch64/simd/vuzpqp16.x" + -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8.x -@@ -0,0 +1,29 @@ -+extern void abort (void); +--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vuzpQs32' ARM Neon intrinsic. */ + -+poly8x16x2_t -+test_vzipqp8 (poly8x16_t _a, poly8x16_t _b) -+{ -+ return vzipq_p8 (_a, _b); -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+ poly8_t second[] = -+ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; -+ poly8x16x2_t result = test_vzipqp8 (vld1q_p8 (first), vld1q_p8 (second)); -+ poly8x16_t res1 = result.val[0], res2 = result.val[1]; -+ poly8_t exp1[] = {1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24}; -+ poly8_t exp2[] = -+ {9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, 16, 32}; -+ poly8x16_t expected1 = vld1q_p8 (exp1); -+ poly8x16_t expected2 = vld1q_p8 (exp2); ++#include "arm_neon.h" ++#include "../../aarch64/simd/vuzpqs32.x" + -+ for (i = 0; i < 16; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); ++/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vextp16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vextp16' ARM Neon intrinsic. */ + -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32.x -@@ -0,0 +1,27 @@ -+extern void abort (void); ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+float32x4x2_t -+test_vzipqf32 (float32x4_t _a, float32x4_t _b) -+{ -+ return vzipq_f32 (_a, _b); -+} ++#include "arm_neon.h" ++#include "../../aarch64/simd/ext_p16.x" + -+int -+main (int argc, char **argv) -+{ -+ int i; -+ float32_t first[] = {1, 2, 3, 4}; -+ float32_t second[] = {5, 6, 7, 8}; -+ float32x4x2_t result = test_vzipqf32 (vld1q_f32 (first), vld1q_f32 (second)); -+ float32x4_t res1 = result.val[0], res2 = result.val[1]; -+ float32_t exp1[] = {1, 5, 2, 6}; -+ float32_t exp2[] = {3, 7, 4, 8}; -+ float32x4_t expected1 = vld1q_f32 (exp1); -+ float32x4_t expected2 = vld1q_f32 (exp2); ++/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vexts32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vexts32' ARM Neon intrinsic. */ + -+ for (i = 0; i < 4; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzip_p8' AArch64 SIMD intrinsic. */ ++#include "arm_neon.h" ++#include "../../aarch64/simd/ext_s32.x" ++ ++/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vuzps8' ARM Neon intrinsic. */ + +/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+#include -+#include "vzipp8.x" ++#include "arm_neon.h" ++#include "../../aarch64/simd/vuzps8.x" + -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16.x -@@ -0,0 +1,27 @@ -+extern void abort (void); +--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vuzpQu32' ARM Neon intrinsic. */ + -+poly16x8x2_t -+test_vzipqp16 (poly16x8_t _a, poly16x8_t _b) -+{ -+ return vzipq_p16 (_a, _b); -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ poly16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ poly16x8x2_t result = test_vzipqp16 (vld1q_p16 (first), vld1q_p16 (second)); -+ poly16x8_t res1 = result.val[0], res2 = result.val[1]; -+ poly16_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; -+ poly16_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; -+ poly16x8_t expected1 = vld1q_p16 (exp1); -+ poly16x8_t expected2 = vld1q_p16 (exp2); ++#include "arm_neon.h" ++#include "../../aarch64/simd/vuzpqu32.x" + -+ for (i = 0; i < 8; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); ++/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vextu32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vextu32' ARM Neon intrinsic. */ + -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32.x -@@ -0,0 +1,27 @@ -+extern void abort (void); -+ -+int32x4x2_t -+test_vzipqs32 (int32x4_t _a, int32x4_t _b) -+{ -+ return vzipq_s32 (_a, _b); -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int32_t first[] = {1, 2, 3, 4}; -+ int32_t second[] = {5, 6, 7, 8}; -+ int32x4x2_t result = test_vzipqs32 (vld1q_s32 (first), vld1q_s32 (second)); -+ int32x4_t res1 = result.val[0], res2 = result.val[1]; -+ int32_t exp1[] = {1, 5, 2, 6}; -+ int32_t exp2[] = {3, 7, 4, 8}; -+ int32x4_t expected1 = vld1q_s32 (exp1); -+ int32x4_t expected2 = vld1q_s32 (exp2); ++#include "arm_neon.h" ++#include "../../aarch64/simd/ext_u32.x" + -+ for (i = 0; i < 4; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); ++/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqp8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqp8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vzipQp8' ARM Neon intrinsic. */ + -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8.x -@@ -0,0 +1,27 @@ -+extern void abort (void); ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int8x8x2_t -+test_vzips8 (int8x8_t _a, int8x8_t _b) -+{ -+ return vzip_s8 (_a, _b); -+} ++#include "arm_neon.h" ++#include "../../aarch64/simd/vzipqp8.x" + -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ int8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ int8x8x2_t result = test_vzips8 (vld1_s8 (first), vld1_s8 (second)); -+ int8x8_t res1 = result.val[0], res2 = result.val[1]; -+ int8_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; -+ int8_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; -+ int8x8_t expected1 = vld1_s8 (exp1); -+ int8x8_t expected2 = vld1_s8 (exp2); ++/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vtrnQp8' ARM Neon intrinsic. */ + -+ for (i = 0; i < 8; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32.x -@@ -0,0 +1,27 @@ -+extern void abort (void); ++#include "arm_neon.h" ++#include "../../aarch64/simd/vtrnqp8.x" + -+uint32x4x2_t -+test_vzipqu32 (uint32x4_t _a, uint32x4_t _b) -+{ -+ return vzipq_u32 (_a, _b); -+} ++/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/simd.exp ++++ b/src/gcc/testsuite/gcc.target/arm/simd/simd.exp +@@ -0,0 +1,35 @@ ++# Copyright (C) 1997-2014 Free Software Foundation, Inc. + -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint32_t first[] = {1, 2, 3, 4}; -+ uint32_t second[] = {5, 6, 7, 8}; -+ uint32x4x2_t result = test_vzipqu32 (vld1q_u32 (first), vld1q_u32 (second)); -+ uint32x4_t res1 = result.val[0], res2 = result.val[1]; -+ uint32_t exp1[] = {1, 5, 2, 6}; -+ uint32_t exp2[] = {3, 7, 4, 8}; -+ uint32x4_t expected1 = vld1q_u32 (exp1); -+ uint32x4_t expected2 = vld1q_u32 (exp2); ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . + -+ for (i = 0; i < 4; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); ++# GCC testsuite that uses the `dg.exp' driver. + -+ return 0; ++# Exit immediately if this isn't an ARM target. ++if ![istarget arm*-*-*] then { ++ return +} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32.x -@@ -0,0 +1,27 @@ -+extern void abort (void); + -+float32x2x2_t -+test_vzipf32 (float32x2_t _a, float32x2_t _b) -+{ -+ return vzip_f32 (_a, _b); -+} ++# Load support procs. ++load_lib gcc-dg.exp + -+int -+main (int argc, char **argv) -+{ -+ int i; -+ float32_t first[] = {1, 2}; -+ float32_t second[] = {3, 4}; -+ float32x2x2_t result = test_vzipf32 (vld1_f32 (first), vld1_f32 (second)); -+ float32x2_t res1 = result.val[0], res2 = result.val[1]; -+ float32_t exp1[] = {1, 3}; -+ float32_t exp2[] = {2, 4}; -+ float32x2_t expected1 = vld1_f32 (exp1); -+ float32x2_t expected2 = vld1_f32 (exp2); ++# Initialize `dg'. ++dg-init + -+ for (i = 0; i < 2; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); ++# Main loop. ++dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \ ++ "" "" + -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16.x -@@ -0,0 +1,27 @@ -+extern void abort (void); ++# All done. ++dg-finish +--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpp16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpp16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vuzpp16' ARM Neon intrinsic. */ + -+int16x8x2_t -+test_vzipqs16 (int16x8_t _a, int16x8_t _b) -+{ -+ return vzipq_s16 (_a, _b); -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ int16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ int16x8x2_t result = test_vzipqs16 (vld1q_s16 (first), vld1q_s16 (second)); -+ int16x8_t res1 = result.val[0], res2 = result.val[1]; -+ int16_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; -+ int16_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; -+ int16x8_t expected1 = vld1q_s16 (exp1); -+ int16x8_t expected2 = vld1q_s16 (exp2); ++#include "arm_neon.h" ++#include "../../aarch64/simd/vuzpp16.x" + -+ for (i = 0; i < 8; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); ++/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vuzps32' ARM Neon intrinsic. */ + -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8.x -@@ -0,0 +1,27 @@ -+extern void abort (void); ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+uint8x8x2_t -+test_vzipu8 (uint8x8_t _a, uint8x8_t _b) -+{ -+ return vzip_u8 (_a, _b); -+} ++#include "arm_neon.h" ++#include "../../aarch64/simd/vuzps32.x" + -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ uint8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ uint8x8x2_t result = test_vzipu8 (vld1_u8 (first), vld1_u8 (second)); -+ uint8x8_t res1 = result.val[0], res2 = result.val[1]; -+ uint8_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; -+ uint8_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; -+ uint8x8_t expected1 = vld1_u8 (exp1); -+ uint8x8_t expected2 = vld1_u8 (exp2); ++/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vuzpu32' ARM Neon intrinsic. */ + -+ for (i = 0; i < 8; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16.x -@@ -0,0 +1,27 @@ -+extern void abort (void); ++#include "arm_neon.h" ++#include "../../aarch64/simd/vuzpu32.x" + -+uint16x8x2_t -+test_vzipqu16 (uint16x8_t _a, uint16x8_t _b) -+{ -+ return vzipq_u16 (_a, _b); -+} ++/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vextQp16' ARM Neon intrinsic. */ + -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ uint16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ uint16x8x2_t result = test_vzipqu16 (vld1q_u16 (first), vld1q_u16 (second)); -+ uint16x8_t res1 = result.val[0], res2 = result.val[1]; -+ uint16_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; -+ uint16_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; -+ uint16x8_t expected1 = vld1q_u16 (exp1); -+ uint16x8_t expected2 = vld1q_u16 (exp2); ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+ for (i = 0; i < 8; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); ++#include "arm_neon.h" ++#include "../../aarch64/simd/extq_p16.x" + -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzipq_p8' AArch64 SIMD intrinsic. */ ++/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vextQs32' ARM Neon intrinsic. */ + +/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+#include -+#include "vzipqp8.x" ++#include "arm_neon.h" ++#include "../../aarch64/simd/extq_s32.x" + -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzipq_f32' AArch64 SIMD intrinsic. */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqp16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqp16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vzipQp16' ARM Neon intrinsic. */ + +/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+#include -+#include "vzipqf32.x" ++#include "arm_neon.h" ++#include "../../aarch64/simd/vzipqp16.x" + -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16.x -@@ -0,0 +1,27 @@ -+extern void abort (void); +--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vzipQs32' ARM Neon intrinsic. */ + -+poly16x4x2_t -+test_vzipp16 (poly16x4_t _a, poly16x4_t _b) -+{ -+ return vzip_p16 (_a, _b); -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly16_t first[] = {1, 2, 3, 4}; -+ poly16_t second[] = {5, 6, 7, 8}; -+ poly16x4x2_t result = test_vzipp16 (vld1_p16 (first), vld1_p16 (second)); -+ poly16x4_t res1 = result.val[0], res2 = result.val[1]; -+ poly16_t exp1[] = {1, 5, 2, 6}; -+ poly16_t exp2[] = {3, 7, 4, 8}; -+ poly16x4_t expected1 = vld1_p16 (exp1); -+ poly16x4_t expected2 = vld1_p16 (exp2); ++#include "arm_neon.h" ++#include "../../aarch64/simd/vzipqs32.x" + -+ for (i = 0; i < 4; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); ++/* { dg-final { scan-assembler-times "vzip\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnp8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnp8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vtrnp8' ARM Neon intrinsic. */ + -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32.x -@@ -0,0 +1,27 @@ -+extern void abort (void); ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int32x2x2_t -+test_vzips32 (int32x2_t _a, int32x2_t _b) -+{ -+ return vzip_s32 (_a, _b); -+} ++#include "arm_neon.h" ++#include "../../aarch64/simd/vtrnp8.x" + -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int32_t first[] = {1, 2}; -+ int32_t second[] = {3, 4}; -+ int32x2x2_t result = test_vzips32 (vld1_s32 (first), vld1_s32 (second)); -+ int32x2_t res1 = result.val[0], res2 = result.val[1]; -+ int32_t exp1[] = {1, 3}; -+ int32_t exp2[] = {2, 4}; -+ int32x2_t expected1 = vld1_s32 (exp1); -+ int32x2_t expected2 = vld1_s32 (exp2); ++/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vextQu32' ARM Neon intrinsic. */ + -+ for (i = 0; i < 2; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/simd.exp -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/simd.exp -@@ -0,0 +1,45 @@ -+# Specific regression driver for AArch64 SIMD instructions. -+# Copyright (C) 2014 Free Software Foundation, Inc. -+# Contributed by ARM Ltd. -+# -+# This file is part of GCC. -+# -+# GCC is free software; you can redistribute it and/or modify it -+# under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 3, or (at your option) -+# any later version. -+# -+# GCC is distributed in the hope that it will be useful, but -+# WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+# General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with GCC; see the file COPYING3. If not see -+# . */ ++#include "arm_neon.h" ++#include "../../aarch64/simd/extq_u32.x" + -+# GCC testsuite that uses the `dg.exp' driver. ++/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vuzpQu8' ARM Neon intrinsic. */ + -+# Exit immediately if this isn't an AArch64 target. -+if {![istarget aarch64*-*-*] } then { -+ return -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+# Load support procs. -+load_lib gcc-dg.exp ++#include "arm_neon.h" ++#include "../../aarch64/simd/vuzpqu8.x" + -+# If a testcase doesn't have special options, use these. -+global DEFAULT_CFLAGS -+if ![info exists DEFAULT_CFLAGS] then { -+ set DEFAULT_CFLAGS " -ansi -pedantic-errors" -+} ++/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vzips8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vzips8' ARM Neon intrinsic. */ + -+# Initialize `dg'. -+dg-init ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+# Main loop. -+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \ -+ "" $DEFAULT_CFLAGS ++#include "arm_neon.h" ++#include "../../aarch64/simd/vzips8.x" + -+# All done. -+dg-finish ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32.x -@@ -0,0 +1,27 @@ -+extern void abort (void); ++/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vzipQu32' ARM Neon intrinsic. */ + -+uint32x2x2_t -+test_vzipu32 (uint32x2_t _a, uint32x2_t _b) -+{ -+ return vzip_u32 (_a, _b); -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint32_t first[] = {1, 2}; -+ uint32_t second[] = {3, 4}; -+ uint32x2x2_t result = test_vzipu32 (vld1_u32 (first), vld1_u32 (second)); -+ uint32x2_t res1 = result.val[0], res2 = result.val[1]; -+ uint32_t exp1[] = {1, 3}; -+ uint32_t exp2[] = {2, 4}; -+ uint32x2_t expected1 = vld1_u32 (exp1); -+ uint32x2_t expected2 = vld1_u32 (exp2); ++#include "arm_neon.h" ++#include "../../aarch64/simd/vzipqu32.x" + -+ for (i = 0; i < 2; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); ++/* { dg-final { scan-assembler-times "vzip\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpp8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpp8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vuzpp8' ARM Neon intrinsic. */ + -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16.x -@@ -0,0 +1,27 @@ -+extern void abort (void); ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int16x4x2_t -+test_vzips16 (int16x4_t _a, int16x4_t _b) -+{ -+ return vzip_s16 (_a, _b); -+} ++#include "arm_neon.h" ++#include "../../aarch64/simd/vuzpp8.x" + -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int16_t first[] = {1, 2, 3, 4}; -+ int16_t second[] = {5, 6, 7, 8}; -+ int16x4x2_t result = test_vzips16 (vld1_s16 (first), vld1_s16 (second)); -+ int16x4_t res1 = result.val[0], res2 = result.val[1]; -+ int16_t exp1[] = {1, 5, 2, 6}; -+ int16_t exp2[] = {3, 7, 4, 8}; -+ int16x4_t expected1 = vld1_s16 (exp1); -+ int16x4_t expected2 = vld1_s16 (exp2); ++/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipp16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipp16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vzipp16' ARM Neon intrinsic. */ + -+ for (i = 0; i < 4; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8.x -@@ -0,0 +1,29 @@ -+extern void abort (void); ++#include "arm_neon.h" ++#include "../../aarch64/simd/vzipp16.x" + -+int8x16x2_t -+test_vzipqs8 (int8x16_t _a, int8x16_t _b) -+{ -+ return vzipq_s8 (_a, _b); -+} ++/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vzips32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vzips32' ARM Neon intrinsic. */ + -+int -+main (int argc, char **argv) -+{ -+ int i; -+ int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+ int8_t second[] = -+ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; -+ int8x16x2_t result = test_vzipqs8 (vld1q_s8 (first), vld1q_s8 (second)); -+ int8x16_t res1 = result.val[0], res2 = result.val[1]; -+ int8_t exp1[] = {1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24}; -+ int8_t exp2[] = -+ {9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, 16, 32}; -+ int8x16_t expected1 = vld1q_s8 (exp1); -+ int8x16_t expected2 = vld1q_s8 (exp2); ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+ for (i = 0; i < 16; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); ++#include "arm_neon.h" ++#include "../../aarch64/simd/vzips32.x" + -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16.x -@@ -0,0 +1,27 @@ -+extern void abort (void); ++/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnp16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnp16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vtrnp16' ARM Neon intrinsic. */ + -+uint16x4x2_t -+test_vzipu16 (uint16x4_t _a, uint16x4_t _b) -+{ -+ return vzip_u16 (_a, _b); -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int -+main (int argc, char **argv) -+{ -+ int i; -+ uint16_t first[] = {1, 2, 3, 4}; -+ uint16_t second[] = {5, 6, 7, 8}; -+ uint16x4x2_t result = test_vzipu16 (vld1_u16 (first), vld1_u16 (second)); -+ uint16x4_t res1 = result.val[0], res2 = result.val[1]; -+ uint16_t exp1[] = {1, 5, 2, 6}; -+ uint16_t exp2[] = {3, 7, 4, 8}; -+ uint16x4_t expected1 = vld1_u16 (exp1); -+ uint16x4_t expected2 = vld1_u16 (exp2); ++#include "arm_neon.h" ++#include "../../aarch64/simd/vtrnp16.x" + -+ for (i = 0; i < 4; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); ++/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vextp64_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp64_1.c +@@ -0,0 +1,26 @@ ++/* Test the `vextp64' ARM Neon intrinsic. */ + -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8.x -@@ -0,0 +1,29 @@ -+extern void abort (void); ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_crypto_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_crypto } */ + -+uint8x16x2_t -+test_vzipqu8 (uint8x16_t _a, uint8x16_t _b) -+{ -+ return vzipq_u8 (_a, _b); -+} ++#include "arm_neon.h" ++ ++extern void abort (void); + +int +main (int argc, char **argv) +{ + int i; -+ uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -+ uint8_t second[] = -+ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; -+ uint8x16x2_t result = test_vzipqu8 (vld1q_u8 (first), vld1q_u8 (second)); -+ uint8x16_t res1 = result.val[0], res2 = result.val[1]; -+ uint8_t exp1[] = {1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24}; -+ uint8_t exp2[] = -+ {9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, 16, 32}; -+ uint8x16_t expected1 = vld1q_u8 (exp1); -+ uint8x16_t expected2 = vld1q_u8 (exp2); -+ -+ for (i = 0; i < 16; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); ++ poly64x1_t in1 = {0}; ++ poly64x1_t in2 = {1}; ++ poly64x1_t actual = vext_p64 (in1, in2, 0); ++ if (actual != in1) ++ abort (); + + return 0; +} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzipq_p16' AArch64 SIMD intrinsic. */ ++ ++/* Don't scan assembler for vext - it can be optimized into a move from r0. ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vtrns32' ARM Neon intrinsic. */ + +/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+#include -+#include "vzipqp16.x" ++#include "arm_neon.h" ++#include "../../aarch64/simd/vtrns32.x" + -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzipq_s32' AArch64 SIMD intrinsic. */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vzipu32' ARM Neon intrinsic. */ + +/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+#include -+#include "vzipqs32.x" ++#include "arm_neon.h" ++#include "../../aarch64/simd/vzipu32.x" + -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzipq_u32' AArch64 SIMD intrinsic. */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vextQs8' ARM Neon intrinsic. */ + +/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+#include -+#include "vzipqu32.x" ++#include "arm_neon.h" ++#include "../../aarch64/simd/extq_s8.x" + -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ +/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzip_s8' AArch64 SIMD intrinsic. */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vtrnu32' ARM Neon intrinsic. */ + +/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+#include -+#include "vzips8.x" ++#include "arm_neon.h" ++#include "../../aarch64/simd/vtrnu32.x" + -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzip_f32' AArch64 SIMD intrinsic. */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vzipQu8' ARM Neon intrinsic. */ + +/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+#include -+#include "vzipf32.x" ++#include "arm_neon.h" ++#include "../../aarch64/simd/vzipqu8.x" + -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzipq_s16' AArch64 SIMD intrinsic. */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vtrnQu8' ARM Neon intrinsic. */ + +/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+#include -+#include "vzipqs16.x" ++#include "arm_neon.h" ++#include "../../aarch64/simd/vtrnqu8.x" + -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzipq_u16' AArch64 SIMD intrinsic. */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqf32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqf32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vuzpQf32' ARM Neon intrinsic. */ + +/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+#include -+#include "vzipqu16.x" ++#include "arm_neon.h" ++#include "../../aarch64/simd/vuzpqf32.x" + -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8.x -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8.x -@@ -0,0 +1,27 @@ -+extern void abort (void); +--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipp8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipp8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vzipp8' ARM Neon intrinsic. */ + -+poly8x8x2_t -+test_vzipp8 (poly8x8_t _a, poly8x8_t _b) -+{ -+ return vzip_p8 (_a, _b); -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int -+main (int argc, char **argv) -+{ -+ int i; -+ poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; -+ poly8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; -+ poly8x8x2_t result = test_vzipp8 (vld1_p8 (first), vld1_p8 (second)); -+ poly8x8_t res1 = result.val[0], res2 = result.val[1]; -+ poly8_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; -+ poly8_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; -+ poly8x8_t expected1 = vld1_p8 (exp1); -+ poly8x8_t expected2 = vld1_p8 (exp2); ++#include "arm_neon.h" ++#include "../../aarch64/simd/vzipp8.x" + -+ for (i = 0; i < 8; i++) -+ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) -+ abort (); ++/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vtrnQp16' ARM Neon intrinsic. */ + -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8_1.c -@@ -0,0 +1,11 @@ -+/* Test the `vzip_u8' AArch64 SIMD intrinsic. */ ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ ++ ++#include "arm_neon.h" ++#include "../../aarch64/simd/vtrnqp16.x" ++ ++/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vexts8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vexts8' ARM Neon intrinsic. */ + +/* { dg-do run } */ -+/* { dg-options "-save-temps -fno-inline" } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+#include -+#include "vzipu8.x" ++#include "arm_neon.h" ++#include "../../aarch64/simd/ext_s8.x" + -+/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ -+/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/vqneg_s64_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vqneg_s64_1.c -@@ -0,0 +1,47 @@ -+/* Test vqneg_s64 intrinsics work correctly. */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp64_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp64_1.c +@@ -0,0 +1,33 @@ ++/* Test the `vextQp64' ARM Neon intrinsic. */ ++ +/* { dg-do run } */ -+/* { dg-options "--save-temps" } */ ++/* { dg-require-effective-target arm_crypto_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_crypto } */ + -+#include ++#include "arm_neon.h" + +extern void abort (void); + -+int __attribute__ ((noinline)) -+test_vqneg_s64 (int64x1_t passed, int64_t expected) -+{ -+ return vget_lane_s64 (vqneg_s64 (passed), 0) != expected; -+} -+ -+int __attribute__ ((noinline)) -+test_vqnegd_s64 (int64_t passed, int64_t expected) ++poly64x2_t ++test_vextq_p64_1 (poly64x2_t a, poly64x2_t b) +{ -+ return vqnegd_s64 (passed) != expected; ++ return vextq_p64(a, b, 1); +} + -+/* { dg-final { scan-assembler-times "sqneg\\td\[0-9\]+, d\[0-9\]+" 2 } } */ -+ +int +main (int argc, char **argv) +{ -+ /* Basic test. */ -+ if (test_vqneg_s64 (vcreate_s64 (-1), 1)) -+ abort (); -+ if (test_vqnegd_s64 (-1, 1)) -+ abort (); -+ -+ /* Negating max int64_t. */ -+ if (test_vqneg_s64 (vcreate_s64 (0x7fffffffffffffff), 0x8000000000000001)) -+ abort (); -+ if (test_vqnegd_s64 (0x7fffffffffffffff, 0x8000000000000001)) -+ abort (); -+ -+ /* Negating min int64_t. -+ Note, exact negation cannot be represented as int64_t. */ -+ if (test_vqneg_s64 (vcreate_s64 (0x8000000000000000), 0x7fffffffffffffff)) -+ abort (); -+ if (test_vqnegd_s64 (0x8000000000000000, 0x7fffffffffffffff)) -+ abort (); ++ int i, off; ++ poly64x2_t in1 = {0, 1}; ++ poly64x2_t in2 = {2, 3}; ++ poly64x2_t actual = test_vextq_p64_1 (in1, in2); ++ for (i = 0; i < 2; i++) ++ if (actual[i] != i + 1) ++ abort (); + + return 0; +} ++ ++/* { dg-final { scan-assembler-times "vext\.64\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c -@@ -0,0 +1,430 @@ -+/* Test vdup_lane intrinsics work correctly. */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vextf32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vextf32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vextf32' ARM Neon intrinsic. */ ++ +/* { dg-do run } */ -+/* { dg-options "--save-temps -O1" } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+#include ++#include "arm_neon.h" ++#include "../../aarch64/simd/ext_f32.x" + -+extern void abort (void); ++/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vtrnQs32' ARM Neon intrinsic. */ + -+float32x2_t __attribute__ ((noinline)) -+wrap_vdup_lane_f32_0 (float32x2_t a) -+{ -+ return vdup_lane_f32 (a, 0); -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+float32x2_t __attribute__ ((noinline)) -+wrap_vdup_lane_f32_1 (float32x2_t a) -+{ -+ return vdup_lane_f32 (a, 1); -+} ++#include "arm_neon.h" ++#include "../../aarch64/simd/vtrnqs32.x" + -+int __attribute__ ((noinline)) -+test_vdup_lane_f32 () -+{ -+ float32x2_t a; -+ float32x2_t b; -+ int i; -+ float32_t c[2] = { 0.0 , 3.14 }; -+ float32_t d[2]; ++/* { dg-final { scan-assembler-times "vtrn\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vtrnQu32' ARM Neon intrinsic. */ + -+ a = vld1_f32 (c); -+ b = wrap_vdup_lane_f32_0 (a); -+ vst1_f32 (d, b); -+ for (i = 0; i < 2; i++) -+ if (c[0] != d[i]) -+ return 1; ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+ b = wrap_vdup_lane_f32_1 (a); -+ vst1_f32 (d, b); -+ for (i = 0; i < 2; i++) -+ if (c[1] != d[i]) -+ return 1; -+ return 0; -+} ++#include "arm_neon.h" ++#include "../../aarch64/simd/vtrnqu32.x" + -+float32x4_t __attribute__ ((noinline)) -+wrap_vdupq_lane_f32_0 (float32x2_t a) -+{ -+ return vdupq_lane_f32 (a, 0); -+} ++/* { dg-final { scan-assembler-times "vtrn\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vtrnu8' ARM Neon intrinsic. */ + -+float32x4_t __attribute__ ((noinline)) -+wrap_vdupq_lane_f32_1 (float32x2_t a) -+{ -+ return vdupq_lane_f32 (a, 1); -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int __attribute__ ((noinline)) -+test_vdupq_lane_f32 () -+{ -+ float32x2_t a; -+ float32x4_t b; -+ int i; -+ float32_t c[2] = { 0.0 , 3.14 }; -+ float32_t d[4]; ++#include "arm_neon.h" ++#include "../../aarch64/simd/vtrnu8.x" + -+ a = vld1_f32 (c); -+ b = wrap_vdupq_lane_f32_0 (a); -+ vst1q_f32 (d, b); -+ for (i = 0; i < 4; i++) -+ if (c[0] != d[i]) -+ return 1; ++/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vextQp8' ARM Neon intrinsic. */ + -+ b = wrap_vdupq_lane_f32_1 (a); -+ vst1q_f32 (d, b); -+ for (i = 0; i < 4; i++) -+ if (c[1] != d[i]) -+ return 1; -+ return 0; -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int8x8_t __attribute__ ((noinline)) -+wrap_vdup_lane_s8_0 (int8x8_t a) -+{ -+ return vdup_lane_s8 (a, 0); -+} ++#include "arm_neon.h" ++#include "../../aarch64/simd/extq_p8.x" + -+int8x8_t __attribute__ ((noinline)) -+wrap_vdup_lane_s8_1 (int8x8_t a) -+{ -+ return vdup_lane_s8 (a, 1); -+} ++/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpf32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpf32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vuzpf32' ARM Neon intrinsic. */ + -+int __attribute__ ((noinline)) -+test_vdup_lane_s8 () -+{ -+ int8x8_t a; -+ int8x8_t b; -+ int i; -+ /* Only two first cases are interesting. */ -+ int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; -+ int8_t d[8]; ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+ a = vld1_s8 (c); -+ b = wrap_vdup_lane_s8_0 (a); -+ vst1_s8 (d, b); -+ for (i = 0; i < 8; i++) -+ if (c[0] != d[i]) -+ return 1; ++#include "arm_neon.h" ++#include "../../aarch64/simd/vuzpf32.x" + -+ b = wrap_vdup_lane_s8_1 (a); -+ vst1_s8 (d, b); -+ for (i = 0; i < 8; i++) -+ if (c[1] != d[i]) -+ return 1; -+ return 0; -+} ++/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vuzpQs16' ARM Neon intrinsic. */ + -+int8x16_t __attribute__ ((noinline)) -+wrap_vdupq_lane_s8_0 (int8x8_t a) -+{ -+ return vdupq_lane_s8 (a, 0); -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int8x16_t __attribute__ ((noinline)) -+wrap_vdupq_lane_s8_1 (int8x8_t a) -+{ -+ return vdupq_lane_s8 (a, 1); -+} ++#include "arm_neon.h" ++#include "../../aarch64/simd/vuzpqs16.x" + -+int __attribute__ ((noinline)) -+test_vdupq_lane_s8 () -+{ -+ int8x8_t a; -+ int8x16_t b; -+ int i; -+ /* Only two first cases are interesting. */ -+ int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; -+ int8_t d[16]; ++/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vexts16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vexts16' ARM Neon intrinsic. */ + -+ a = vld1_s8 (c); -+ b = wrap_vdupq_lane_s8_0 (a); -+ vst1q_s8 (d, b); -+ for (i = 0; i < 16; i++) -+ if (c[0] != d[i]) -+ return 1; ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+ b = wrap_vdupq_lane_s8_1 (a); -+ vst1q_s8 (d, b); -+ for (i = 0; i < 16; i++) -+ if (c[1] != d[i]) -+ return 1; -+ return 0; -+} ++#include "arm_neon.h" ++#include "../../aarch64/simd/ext_s16.x" + -+int16x4_t __attribute__ ((noinline)) -+wrap_vdup_lane_s16_0 (int16x4_t a) -+{ -+ return vdup_lane_s16 (a, 0); -+} ++/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vuzpu8' ARM Neon intrinsic. */ + -+int16x4_t __attribute__ ((noinline)) -+wrap_vdup_lane_s16_1 (int16x4_t a) -+{ -+ return vdup_lane_s16 (a, 1); -+} -+ -+int __attribute__ ((noinline)) -+test_vdup_lane_s16 () -+{ -+ int16x4_t a; -+ int16x4_t b; -+ int i; -+ /* Only two first cases are interesting. */ -+ int16_t c[4] = { 0, 1, 2, 3 }; -+ int16_t d[4]; ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+ a = vld1_s16 (c); -+ b = wrap_vdup_lane_s16_0 (a); -+ vst1_s16 (d, b); -+ for (i = 0; i < 4; i++) -+ if (c[0] != d[i]) -+ return 1; ++#include "arm_neon.h" ++#include "../../aarch64/simd/vuzpu8.x" + -+ b = wrap_vdup_lane_s16_1 (a); -+ vst1_s16 (d, b); -+ for (i = 0; i < 4; i++) -+ if (c[1] != d[i]) -+ return 1; -+ return 0; -+} ++/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vuzpQu16' ARM Neon intrinsic. */ + -+int16x8_t __attribute__ ((noinline)) -+wrap_vdupq_lane_s16_0 (int16x4_t a) -+{ -+ return vdupq_lane_s16 (a, 0); -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int16x8_t __attribute__ ((noinline)) -+wrap_vdupq_lane_s16_1 (int16x4_t a) -+{ -+ return vdupq_lane_s16 (a, 1); -+} ++#include "arm_neon.h" ++#include "../../aarch64/simd/vuzpqu16.x" + -+int __attribute__ ((noinline)) -+test_vdupq_lane_s16 () -+{ -+ int16x4_t a; -+ int16x8_t b; -+ int i; -+ /* Only two first cases are interesting. */ -+ int16_t c[4] = { 0, 1, 2, 3 }; -+ int16_t d[8]; ++/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQf32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQf32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vextQf32' ARM Neon intrinsic. */ + -+ a = vld1_s16 (c); -+ b = wrap_vdupq_lane_s16_0 (a); -+ vst1q_s16 (d, b); -+ for (i = 0; i < 8; i++) -+ if (c[0] != d[i]) -+ return 1; ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+ b = wrap_vdupq_lane_s16_1 (a); -+ vst1q_s16 (d, b); -+ for (i = 0; i < 8; i++) -+ if (c[1] != d[i]) -+ return 1; -+ return 0; -+} ++#include "arm_neon.h" ++#include "../../aarch64/simd/extq_f32.x" + -+int32x2_t __attribute__ ((noinline)) -+wrap_vdup_lane_s32_0 (int32x2_t a) -+{ -+ return vdup_lane_s32 (a, 0); -+} ++/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vextu16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vextu16' ARM Neon intrinsic. */ + -+int32x2_t __attribute__ ((noinline)) -+wrap_vdup_lane_s32_1 (int32x2_t a) -+{ -+ return vdup_lane_s32 (a, 1); -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int __attribute__ ((noinline)) -+test_vdup_lane_s32 () -+{ -+ int32x2_t a; -+ int32x2_t b; -+ int i; -+ int32_t c[2] = { 0, 1 }; -+ int32_t d[2]; ++#include "arm_neon.h" ++#include "../../aarch64/simd/ext_u16.x" + -+ a = vld1_s32 (c); -+ b = wrap_vdup_lane_s32_0 (a); -+ vst1_s32 (d, b); -+ for (i = 0; i < 2; i++) -+ if (c[0] != d[i]) -+ return 1; ++/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqf32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqf32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vzipQf32' ARM Neon intrinsic. */ + -+ b = wrap_vdup_lane_s32_1 (a); -+ vst1_s32 (d, b); -+ for (i = 0; i < 2; i++) -+ if (c[1] != d[i]) -+ return 1; -+ return 0; -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int32x4_t __attribute__ ((noinline)) -+wrap_vdupq_lane_s32_0 (int32x2_t a) -+{ -+ return vdupq_lane_s32 (a, 0); -+} ++#include "arm_neon.h" ++#include "../../aarch64/simd/vzipqf32.x" + -+int32x4_t __attribute__ ((noinline)) -+wrap_vdupq_lane_s32_1 (int32x2_t a) -+{ -+ return vdupq_lane_s32 (a, 1); -+} ++/* { dg-final { scan-assembler-times "vzip\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vuzps16' ARM Neon intrinsic. */ + -+int __attribute__ ((noinline)) -+test_vdupq_lane_s32 () -+{ -+ int32x2_t a; -+ int32x4_t b; -+ int i; -+ int32_t c[2] = { 0, 1 }; -+ int32_t d[4]; ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+ a = vld1_s32 (c); -+ b = wrap_vdupq_lane_s32_0 (a); -+ vst1q_s32 (d, b); -+ for (i = 0; i < 4; i++) -+ if (c[0] != d[i]) -+ return 1; ++#include "arm_neon.h" ++#include "../../aarch64/simd/vuzps16.x" + -+ b = wrap_vdupq_lane_s32_1 (a); -+ vst1q_s32 (d, b); -+ for (i = 0; i < 4; i++) -+ if (c[1] != d[i]) -+ return 1; -+ return 0; -+} ++/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vextp8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vextp8' ARM Neon intrinsic. */ + -+int64x1_t __attribute__ ((noinline)) -+wrap_vdup_lane_s64_0 (int64x1_t a) -+{ -+ return vdup_lane_s64 (a, 0); -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int64x1_t __attribute__ ((noinline)) -+wrap_vdup_lane_s64_1 (int64x1_t a) -+{ -+ return vdup_lane_s64 (a, 1); -+} ++#include "arm_neon.h" ++#include "../../aarch64/simd/ext_p8.x" + -+int __attribute__ ((noinline)) -+test_vdup_lane_s64 () -+{ -+ int64x1_t a; -+ int64x1_t b; -+ int64_t c[1]; -+ int64_t d[1]; ++/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vuzpu16' ARM Neon intrinsic. */ + -+ c[0] = 0; -+ a = vld1_s64 (c); -+ b = wrap_vdup_lane_s64_0 (a); -+ vst1_s64 (d, b); -+ if (c[0] != d[0]) -+ return 1; ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+ c[0] = 1; -+ a = vld1_s64 (c); -+ b = wrap_vdup_lane_s64_1 (a); -+ vst1_s64 (d, b); -+ if (c[0] != d[0]) -+ return 1; -+ return 0; -+} ++#include "arm_neon.h" ++#include "../../aarch64/simd/vuzpu16.x" + -+int64x2_t __attribute__ ((noinline)) -+wrap_vdupq_lane_s64_0 (int64x1_t a) -+{ -+ return vdupq_lane_s64 (a, 0); -+} ++/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vuzpQs8' ARM Neon intrinsic. */ + -+int64x2_t __attribute__ ((noinline)) -+wrap_vdupq_lane_s64_1 (int64x1_t a) -+{ -+ return vdupq_lane_s64 (a, 1); -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int __attribute__ ((noinline)) -+test_vdupq_lane_s64 () -+{ -+ int64x1_t a; -+ int64x2_t b; -+ int i; -+ int64_t c[1]; -+ int64_t d[2]; ++#include "arm_neon.h" ++#include "../../aarch64/simd/vuzpqs8.x" + -+ c[0] = 0; -+ a = vld1_s64 (c); -+ b = wrap_vdupq_lane_s64_0 (a); -+ vst1q_s64 (d, b); -+ for (i = 0; i < 2; i++) -+ if (c[0] != d[i]) -+ return 1; ++/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vextQs16' ARM Neon intrinsic. */ + -+ c[0] = 1; -+ a = vld1_s64 (c); -+ b = wrap_vdupq_lane_s64_1 (a); -+ vst1q_s64 (d, b); -+ for (i = 0; i < 2; i++) -+ if (c[0] != d[i]) -+ return 1; -+ return 0; -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int -+main () -+{ ++#include "arm_neon.h" ++#include "../../aarch64/simd/extq_s16.x" + -+ if (test_vdup_lane_f32 ()) -+ abort (); -+ if (test_vdup_lane_s8 ()) -+ abort (); -+ if (test_vdup_lane_s16 ()) -+ abort (); -+ if (test_vdup_lane_s32 ()) -+ abort (); -+ if (test_vdup_lane_s64 ()) -+ abort (); -+ if (test_vdupq_lane_f32 ()) -+ abort (); -+ if (test_vdupq_lane_s8 ()) -+ abort (); -+ if (test_vdupq_lane_s16 ()) -+ abort (); -+ if (test_vdupq_lane_s32 ()) -+ abort (); -+ if (test_vdupq_lane_s64 ()) -+ abort (); ++/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipf32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipf32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vzipf32' ARM Neon intrinsic. */ + -+ return 0; -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+/* Asm check for test_vdup_lane_s8. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */ ++#include "arm_neon.h" ++#include "../../aarch64/simd/vzipf32.x" + -+/* Asm check for test_vdupq_lane_s8. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */ ++/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vextQu16' ARM Neon intrinsic. */ + -+/* Asm check for test_vdup_lane_s16. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */ -+/* Asm check for test_vdup_lane_s16. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */ ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+/* Asm check for test_vdupq_lane_s16. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */ -+/* Asm check for test_vdupq_lane_s16. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */ ++#include "arm_neon.h" ++#include "../../aarch64/simd/extq_u16.x" + -+/* Asm check for test_vdup_lane_f32 and test_vdup_lane_s32. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */ ++/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vzipQs16' ARM Neon intrinsic. */ + -+/* Asm check for test_vdupq_lane_f32 and test_vdupq_lane_s32. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */ ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ ++ ++#include "arm_neon.h" ++#include "../../aarch64/simd/vzipqs16.x" + ++/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c -@@ -0,0 +1,619 @@ -+/* Test vdup_lane intrinsics work correctly. */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnf32_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnf32_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vtrnf32' ARM Neon intrinsic. */ ++ +/* { dg-do run } */ -+/* { dg-options "-O1 --save-temps" } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+#include ++#include "arm_neon.h" ++#include "../../aarch64/simd/vtrnf32.x" + -+extern void abort (void); ++/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu16_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu16_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vzipQu16' ARM Neon intrinsic. */ + -+float32x2_t __attribute__ ((noinline)) -+wrap_vdup_n_f32 (float32_t a) -+{ -+ return vdup_n_f32 (a); -+} ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ + -+int __attribute__ ((noinline)) -+test_vdup_n_f32 () -+{ -+ float32_t a = 1.0; -+ float32x2_t b; -+ float32_t c[2]; -+ int i; ++#include "arm_neon.h" ++#include "../../aarch64/simd/vzipqu16.x" + -+ b = wrap_vdup_n_f32 (a); -+ vst1_f32 (c, b); -+ for (i = 0; i < 2; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} ++/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu8_1.c ++++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu8_1.c +@@ -0,0 +1,12 @@ ++/* Test the `vzipu8' ARM Neon intrinsic. */ + -+float32x4_t __attribute__ ((noinline)) -+wrap_vdupq_n_f32 (float32_t a) ++/* { dg-do run } */ ++/* { dg-require-effective-target arm_neon_ok } */ ++/* { dg-options "-save-temps -O1 -fno-inline" } */ ++/* { dg-add-options arm_neon } */ ++ ++#include "arm_neon.h" ++#include "../../aarch64/simd/vzipu8.x" ++ ++/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/arm/tail-long-call.c ++++ b/src/gcc/testsuite/gcc.target/arm/tail-long-call.c +@@ -0,0 +1,12 @@ ++/* { dg-skip-if "need at least armv5te" { *-*-* } { "-march=armv[234]*" "-mthumb" } { "" } } */ ++/* { dg-options "-O2 -march=armv5te -marm" } */ ++/* { dg-final { scan-assembler "bx" } } */ ++/* { dg-final { scan-assembler-not "blx" } } */ ++ ++int lcal (int) __attribute__ ((long_call)); ++ ++int ++dec (int a) +{ -+ return vdupq_n_f32 (a); ++ return lcal (a); +} +--- a/src/gcc/testsuite/gcc.target/arm/rev16.c ++++ b/src/gcc/testsuite/gcc.target/arm/rev16.c +@@ -0,0 +1,35 @@ ++/* { dg-options "-O2" } */ ++/* { dg-do run } */ + -+int __attribute__ ((noinline)) -+test_vdupq_n_f32 () -+{ -+ float32_t a = 1.0; -+ float32x4_t b; -+ float32_t c[4]; -+ int i; ++extern void abort (void); + -+ b = wrap_vdupq_n_f32 (a); -+ vst1q_f32 (c, b); -+ for (i = 0; i < 4; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; ++typedef unsigned int __u32; ++ ++__u32 ++__rev16_32_alt (__u32 x) ++{ ++ return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8) ++ | (((__u32)(x) & (__u32)0x00ff00ffUL) << 8); +} + -+float64x1_t __attribute__ ((noinline)) -+wrap_vdup_n_f64 (float64_t a) ++__u32 ++__rev16_32 (__u32 x) +{ -+ return vdup_n_f64 (a); ++ return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8) ++ | (((__u32)(x) & (__u32)0xff00ff00UL) >> 8); +} + -+int __attribute__ ((noinline)) -+test_vdup_n_f64 () ++int ++main (void) +{ -+ float64_t a = 1.0; -+ float64x1_t b; -+ float64_t c[1]; -+ int i; ++ volatile __u32 in32 = 0x12345678; ++ volatile __u32 expected32 = 0x34127856; ++ ++ if (__rev16_32 (in32) != expected32) ++ abort (); ++ ++ if (__rev16_32_alt (in32) != expected32) ++ abort (); + -+ b = wrap_vdup_n_f64 (a); -+ vst1_f64 (c, b); -+ for (i = 0; i < 1; i++) -+ if (a != c[i]) -+ return 1; + return 0; +} +--- a/src/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c ++++ b/src/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c +@@ -0,0 +1,65 @@ ++/* { dg-do run } */ ++/* { dg-options "-O2 -fno-inline --save-temps" } */ + -+float64x2_t __attribute__ ((noinline)) -+wrap_vdupq_n_f64 (float64_t a) ++extern void abort (void); ++ ++typedef long long s64int; ++typedef int s32int; ++typedef unsigned long long u64int; ++typedef unsigned int u32int; ++ ++s64int ++anddi_di_notdi (s64int a, s64int b) +{ -+ return vdupq_n_f64 (a); ++ return (a & ~b); +} + -+int __attribute__ ((noinline)) -+test_vdupq_n_f64 () ++s64int ++anddi_di_notzesidi (s64int a, u32int b) +{ -+ float64_t a = 1.0; -+ float64x2_t b; -+ float64_t c[2]; -+ int i; ++ return (a & ~(u64int) b); ++} + -+ b = wrap_vdupq_n_f64 (a); -+ vst1q_f64 (c, b); -+ for (i = 0; i < 2; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; ++s64int ++anddi_notdi_zesidi (s64int a, u32int b) ++{ ++ return (~a & (u64int) b); +} + -+poly8x8_t __attribute__ ((noinline)) -+wrap_vdup_n_p8 (poly8_t a) ++s64int ++anddi_di_notsesidi (s64int a, s32int b) +{ -+ return vdup_n_p8 (a); ++ return (a & ~(s64int) b); +} + -+int __attribute__ ((noinline)) -+test_vdup_n_p8 () ++int main () +{ -+ poly8_t a = 1; -+ poly8x8_t b; -+ poly8_t c[8]; -+ int i; ++ s64int a64 = 0xdeadbeef0000ffffll; ++ s64int b64 = 0x000000005f470112ll; ++ s64int c64 = 0xdeadbeef300f0000ll; ++ ++ u32int c32 = 0x01124f4f; ++ s32int d32 = 0xabbaface; ++ ++ s64int z = anddi_di_notdi (c64, b64); ++ if (z != 0xdeadbeef20080000ll) ++ abort (); ++ ++ z = anddi_di_notzesidi (a64, c32); ++ if (z != 0xdeadbeef0000b0b0ll) ++ abort (); ++ ++ z = anddi_notdi_zesidi (c64, c32); ++ if (z != 0x0000000001104f4fll) ++ abort (); ++ ++ z = anddi_di_notsesidi (a64, d32); ++ if (z != 0x0000000000000531ll) ++ abort (); + -+ b = wrap_vdup_n_p8 (a); -+ vst1_p8 (c, b); -+ for (i = 0; i < 8; i++) -+ if (a != c[i]) -+ return 1; + return 0; +} + -+poly8x16_t __attribute__ ((noinline)) -+wrap_vdupq_n_p8 (poly8_t a) -+{ -+ return vdupq_n_p8 (a); -+} ++/* { dg-final { scan-assembler-times "bic\t" 6 } } */ ++ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/vqabs_s64_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/vqabs_s64_1.c +@@ -0,0 +1,54 @@ ++/* Test vqabs_s64 intrinsics work correctly. */ ++/* { dg-do run } */ ++/* { dg-options "--save-temps" } */ ++ ++#include ++ ++extern void abort (void); + +int __attribute__ ((noinline)) -+test_vdupq_n_p8 () ++test_vqabs_s64 (int64x1_t passed, int64_t expected) +{ -+ poly8_t a = 1; -+ poly8x16_t b; -+ poly8_t c[16]; -+ int i; -+ -+ b = wrap_vdupq_n_p8 (a); -+ vst1q_p8 (c, b); -+ for (i = 0; i < 16; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; ++ return vget_lane_s64 (vqabs_s64 (passed), 0) != expected; +} + -+int8x8_t __attribute__ ((noinline)) -+wrap_vdup_n_s8 (int8_t a) ++int __attribute__ ((noinline)) ++test_vqabsd_s64 (int64_t passed, int64_t expected) +{ -+ return vdup_n_s8 (a); ++ return vqabsd_s64 (passed) != expected; +} + -+int __attribute__ ((noinline)) -+test_vdup_n_s8 () ++/* { dg-final { scan-assembler-times "sqabs\\td\[0-9\]+, d\[0-9\]+" 2 } } */ ++ ++int ++main (int argc, char **argv) +{ -+ int8_t a = 1; -+ int8x8_t b; -+ int8_t c[8]; -+ int i; ++ /* Basic test. */ ++ if (test_vqabs_s64 (vcreate_s64 (-1), 1)) ++ abort (); ++ if (test_vqabsd_s64 (-1, 1)) ++ abort (); ++ ++ /* Getting absolute value of min int64_t. ++ Note, exact result cannot be represented in int64_t, ++ so max int64_t is expected. */ ++ if (test_vqabs_s64 (vcreate_s64 (0x8000000000000000), 0x7fffffffffffffff)) ++ abort (); ++ if (test_vqabsd_s64 (0x8000000000000000, 0x7fffffffffffffff)) ++ abort (); ++ ++ /* Another input that gets max int64_t. */ ++ if (test_vqabs_s64 (vcreate_s64 (0x8000000000000001), 0x7fffffffffffffff)) ++ abort (); ++ if (test_vqabsd_s64 (0x8000000000000001, 0x7fffffffffffffff)) ++ abort (); ++ ++ /* Checking that large positive numbers stay the same. */ ++ if (test_vqabs_s64 (vcreate_s64 (0x7fffffffffffffff), 0x7fffffffffffffff)) ++ abort (); ++ if (test_vqabsd_s64 (0x7fffffffffffffff, 0x7fffffffffffffff)) ++ abort (); + -+ b = wrap_vdup_n_s8 (a); -+ vst1_s8 (c, b); -+ for (i = 0; i < 8; i++) -+ if (a != c[i]) -+ return 1; + return 0; +} ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/acle/acle.exp ++++ b/src/gcc/testsuite/gcc.target/aarch64/acle/acle.exp +@@ -0,0 +1,35 @@ ++# Copyright (C) 2014 Free Software Foundation, Inc. + -+int8x16_t __attribute__ ((noinline)) -+wrap_vdupq_n_s8 (int8_t a) -+{ -+ return vdupq_n_s8 (a); -+} ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . + -+int __attribute__ ((noinline)) -+test_vdupq_n_s8 () -+{ -+ int8_t a = 1; -+ int8x16_t b; -+ int8_t c[16]; -+ int i; ++# GCC testsuite that uses the `dg.exp' driver. + -+ b = wrap_vdupq_n_s8 (a); -+ vst1q_s8 (c, b); -+ for (i = 0; i < 16; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; ++# Exit immediately if this isn't an AArch64 target. ++if ![istarget aarch64*-*-*] then { ++ return +} + -+uint8x8_t __attribute__ ((noinline)) -+wrap_vdup_n_u8 (uint8_t a) ++# Load support procs. ++load_lib gcc-dg.exp ++ ++# Initialize `dg'. ++dg-init ++ ++# Main loop. ++dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \ ++ "" "" ++ ++# All done. ++dg-finish +--- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32b.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32b.c +@@ -0,0 +1,15 @@ ++/* Test the crc32b ACLE intrinsic. */ ++ ++/* { dg-do assemble } */ ++/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ ++ ++#include "arm_acle.h" ++ ++uint32_t ++test_crc32b (uint32_t arg0, uint8_t arg1) +{ -+ return vdup_n_u8 (a); ++ return __crc32b (arg0, arg1); +} + -+int __attribute__ ((noinline)) -+test_vdup_n_u8 () -+{ -+ uint8_t a = 1; -+ uint8x8_t b; -+ uint8_t c[8]; -+ int i; ++/* { dg-final { scan-assembler "crc32b\tw..?, w..?, w..?\n" } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32d.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32d.c +@@ -0,0 +1,15 @@ ++/* Test the crc32d ACLE intrinsic. */ + -+ b = wrap_vdup_n_u8 (a); -+ vst1_u8 (c, b); -+ for (i = 0; i < 8; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} ++/* { dg-do assemble } */ ++/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ + -+uint8x16_t __attribute__ ((noinline)) -+wrap_vdupq_n_u8 (uint8_t a) ++#include "arm_acle.h" ++ ++uint32_t ++test_crc32d (uint32_t arg0, uint64_t arg1) +{ -+ return vdupq_n_u8 (a); ++ return __crc32d (arg0, arg1); +} + -+int __attribute__ ((noinline)) -+test_vdupq_n_u8 () -+{ -+ uint8_t a = 1; -+ uint8x16_t b; -+ uint8_t c[16]; -+ int i; ++/* { dg-final { scan-assembler "crc32x\tw..?, w..?, x..?\n" } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cb.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cb.c +@@ -0,0 +1,15 @@ ++/* Test the crc32cb ACLE intrinsic. */ + -+ b = wrap_vdupq_n_u8 (a); -+ vst1q_u8 (c, b); -+ for (i = 0; i < 16; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} ++/* { dg-do assemble } */ ++/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ + -+poly16x4_t __attribute__ ((noinline)) -+wrap_vdup_n_p16 (poly16_t a) ++#include "arm_acle.h" ++ ++uint32_t ++test_crc32cb (uint32_t arg0, uint8_t arg1) +{ -+ return vdup_n_p16 (a); ++ return __crc32cb (arg0, arg1); +} + -+int __attribute__ ((noinline)) -+test_vdup_n_p16 () -+{ -+ poly16_t a = 1; -+ poly16x4_t b; -+ poly16_t c[4]; -+ int i; ++/* { dg-final { scan-assembler "crc32cb\tw..?, w..?, w..?\n" } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cd.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cd.c +@@ -0,0 +1,15 @@ ++/* Test the crc32cd ACLE intrinsic. */ + -+ b = wrap_vdup_n_p16 (a); -+ vst1_p16 (c, b); -+ for (i = 0; i < 4; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} ++/* { dg-do assemble } */ ++/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ + -+poly16x8_t __attribute__ ((noinline)) -+wrap_vdupq_n_p16 (poly16_t a) ++#include "arm_acle.h" ++ ++uint32_t ++test_crc32cd (uint32_t arg0, uint64_t arg1) +{ -+ return vdupq_n_p16 (a); ++ return __crc32cd (arg0, arg1); +} + -+int __attribute__ ((noinline)) -+test_vdupq_n_p16 () -+{ -+ poly16_t a = 1; -+ poly16x8_t b; -+ poly16_t c[8]; -+ int i; ++/* { dg-final { scan-assembler "crc32cx\tw..?, w..?, x..?\n" } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32w.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32w.c +@@ -0,0 +1,15 @@ ++/* Test the crc32w ACLE intrinsic. */ + -+ b = wrap_vdupq_n_p16 (a); -+ vst1q_p16 (c, b); -+ for (i = 0; i < 8; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} ++/* { dg-do assemble } */ ++/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ + -+int16x4_t __attribute__ ((noinline)) -+wrap_vdup_n_s16 (int16_t a) ++#include "arm_acle.h" ++ ++uint32_t ++test_crc32w (uint32_t arg0, uint32_t arg1) +{ -+ return vdup_n_s16 (a); ++ return __crc32w (arg0, arg1); +} + -+int __attribute__ ((noinline)) -+test_vdup_n_s16 () -+{ -+ int16_t a = 1; -+ int16x4_t b; -+ int16_t c[4]; -+ int i; ++/* { dg-final { scan-assembler "crc32w\tw..?, w..?, w..?\n" } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32h.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32h.c +@@ -0,0 +1,15 @@ ++/* Test the crc32h ACLE intrinsic. */ + -+ b = wrap_vdup_n_s16 (a); -+ vst1_s16 (c, b); -+ for (i = 0; i < 4; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; -+} ++/* { dg-do assemble } */ ++/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ + -+int16x8_t __attribute__ ((noinline)) -+wrap_vdupq_n_s16 (int16_t a) ++#include "arm_acle.h" ++ ++uint32_t ++test_crc32h (uint32_t arg0, uint16_t arg1) +{ -+ return vdupq_n_s16 (a); ++ return __crc32h (arg0, arg1); +} + -+int __attribute__ ((noinline)) -+test_vdupq_n_s16 () ++/* { dg-final { scan-assembler "crc32h\tw..?, w..?, w..?\n" } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cw.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cw.c +@@ -0,0 +1,15 @@ ++/* Test the crc32cw ACLE intrinsic. */ ++ ++/* { dg-do assemble } */ ++/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ ++ ++#include "arm_acle.h" ++ ++uint32_t ++test_crc32cw (uint32_t arg0, uint32_t arg1) +{ -+ int16_t a = 1; -+ int16x8_t b; -+ int16_t c[8]; -+ int i; ++ return __crc32cw (arg0, arg1); ++} + -+ b = wrap_vdupq_n_s16 (a); -+ vst1q_s16 (c, b); -+ for (i = 0; i < 8; i++) -+ if (a != c[i]) -+ return 1; -+ return 0; ++/* { dg-final { scan-assembler "crc32cw\tw..?, w..?, w..?\n" } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32ch.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32ch.c +@@ -0,0 +1,15 @@ ++/* Test the crc32ch ACLE intrinsic. */ ++ ++/* { dg-do assemble } */ ++/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ ++ ++#include "arm_acle.h" ++ ++uint32_t ++test_crc32ch (uint32_t arg0, uint16_t arg1) ++{ ++ return __crc32ch (arg0, arg1); +} + -+uint16x4_t __attribute__ ((noinline)) -+wrap_vdup_n_u16 (uint16_t a) ++/* { dg-final { scan-assembler "crc32ch\tw..?, w..?, w..?\n" } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/vreinterpret_f64_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/vreinterpret_f64_1.c +@@ -0,0 +1,596 @@ ++/* Test vreinterpret_f64_* and vreinterpret_*_f64 intrinsics work correctly. */ ++/* { dg-do run } */ ++/* { dg-options "-O3" } */ ++ ++#include ++ ++extern void abort (void); ++ ++#define ABS(a) __builtin_fabs (a) ++#define ISNAN(a) __builtin_isnan (a) ++ ++#define DOUBLE_EQUALS(a, b, epsilon) \ ++( \ ++ ((a) == (b)) \ ++ || (ISNAN (a) && ISNAN (b)) \ ++ || (ABS (a - b) < epsilon) \ ++) ++ ++/* Pi accurate up to 16 digits. ++ Further digits are a closest binary approximation. */ ++#define PI_F64 3.14159265358979311599796346854 ++/* Hex representation in Double (IEEE754 Double precision 64-bit) is: ++ 0x400921FB54442D18. */ ++ ++/* E accurate up to 16 digits. ++ Further digits are a closest binary approximation. */ ++#define E_F64 2.71828182845904509079559829843 ++/* Hex representation in Double (IEEE754 Double precision 64-bit) is: ++ 0x4005BF0A8B145769. */ ++ ++float32x2_t __attribute__ ((noinline)) ++wrap_vreinterpret_f32_f64 (float64x1_t __a) +{ -+ return vdup_n_u16 (a); ++ return vreinterpret_f32_f64 (__a); +} + +int __attribute__ ((noinline)) -+test_vdup_n_u16 () ++test_vreinterpret_f32_f64 () +{ -+ uint16_t a = 1; -+ uint16x4_t b; -+ uint16_t c[4]; ++ float64x1_t a; ++ float32x2_t b; ++ float64_t c[1] = { PI_F64 }; ++ /* Values { 0x54442D18, 0x400921FB } reinterpreted as f32. */ ++ float32_t d[2] = { 3.3702805504E12, 2.1426990032196044921875E0 }; ++ float32_t e[2]; + int i; + -+ b = wrap_vdup_n_u16 (a); -+ vst1_u16 (c, b); -+ for (i = 0; i < 4; i++) -+ if (a != c[i]) ++ a = vld1_f64 (c); ++ b = wrap_vreinterpret_f32_f64 (a); ++ vst1_f32 (e, b); ++ for (i = 0; i < 2; i++) ++ if (!DOUBLE_EQUALS (d[i], e[i], __FLT_EPSILON__)) + return 1; + return 0; -+} ++}; + -+uint16x8_t __attribute__ ((noinline)) -+wrap_vdupq_n_u16 (uint16_t a) ++int8x8_t __attribute__ ((noinline)) ++wrap_vreinterpret_s8_f64 (float64x1_t __a) +{ -+ return vdupq_n_u16 (a); ++ return vreinterpret_s8_f64 (__a); +} + +int __attribute__ ((noinline)) -+test_vdupq_n_u16 () ++test_vreinterpret_s8_f64 () +{ -+ uint16_t a = 1; -+ uint16x8_t b; -+ uint16_t c[8]; ++ float64x1_t a; ++ int8x8_t b; ++ float64_t c[1] = { PI_F64 }; ++ int8_t d[8] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40 }; ++ int8_t e[8]; + int i; + -+ b = wrap_vdupq_n_u16 (a); -+ vst1q_u16 (c, b); ++ a = vld1_f64 (c); ++ b = wrap_vreinterpret_s8_f64 (a); ++ vst1_s8 (e, b); + for (i = 0; i < 8; i++) -+ if (a != c[i]) ++ if (d[i] != e[i]) + return 1; + return 0; -+} ++}; + -+int32x2_t __attribute__ ((noinline)) -+wrap_vdup_n_s32 (int32_t a) ++int16x4_t __attribute__ ((noinline)) ++wrap_vreinterpret_s16_f64 (float64x1_t __a) +{ -+ return vdup_n_s32 (a); ++ return vreinterpret_s16_f64 (__a); +} + +int __attribute__ ((noinline)) -+test_vdup_n_s32 () ++test_vreinterpret_s16_f64 () +{ -+ int32_t a = 1; -+ int32x2_t b; -+ int32_t c[2]; ++ float64x1_t a; ++ int16x4_t b; ++ float64_t c[1] = { PI_F64 }; ++ int16_t d[4] = { 0x2D18, 0x5444, 0x21FB, 0x4009 }; ++ int16_t e[4]; + int i; + -+ b = wrap_vdup_n_s32 (a); -+ vst1_s32 (c, b); -+ for (i = 0; i < 2; i++) -+ if (a != c[i]) ++ a = vld1_f64 (c); ++ b = wrap_vreinterpret_s16_f64 (a); ++ vst1_s16 (e, b); ++ for (i = 0; i < 4; i++) ++ if (d[i] != e[i]) + return 1; + return 0; -+} ++}; + -+int32x4_t __attribute__ ((noinline)) -+wrap_vdupq_n_s32 (int32_t a) ++int32x2_t __attribute__ ((noinline)) ++wrap_vreinterpret_s32_f64 (float64x1_t __a) +{ -+ return vdupq_n_s32 (a); ++ return vreinterpret_s32_f64 (__a); +} + +int __attribute__ ((noinline)) -+test_vdupq_n_s32 () ++test_vreinterpret_s32_f64 () +{ -+ int32_t a = 1; -+ int32x4_t b; -+ int32_t c[4]; ++ float64x1_t a; ++ int32x2_t b; ++ float64_t c[1] = { PI_F64 }; ++ int32_t d[2] = { 0x54442D18, 0x400921FB }; ++ int32_t e[2]; + int i; + -+ b = wrap_vdupq_n_s32 (a); -+ vst1q_s32 (c, b); -+ for (i = 0; i < 4; i++) -+ if (a != c[i]) ++ a = vld1_f64 (c); ++ b = wrap_vreinterpret_s32_f64 (a); ++ vst1_s32 (e, b); ++ for (i = 0; i < 2; i++) ++ if (d[i] != e[i]) + return 1; + return 0; -+} ++}; + -+uint32x2_t __attribute__ ((noinline)) -+wrap_vdup_n_u32 (uint32_t a) ++int64x1_t __attribute__ ((noinline)) ++wrap_vreinterpret_s64_f64 (float64x1_t __a) +{ -+ return vdup_n_u32 (a); ++ return vreinterpret_s64_f64 (__a); +} + +int __attribute__ ((noinline)) -+test_vdup_n_u32 () ++test_vreinterpret_s64_f64 () +{ -+ uint32_t a = 1; -+ uint32x2_t b; -+ uint32_t c[2]; ++ float64x1_t a; ++ int64x1_t b; ++ float64_t c[1] = { PI_F64 }; ++ int64_t d[1] = { 0x400921FB54442D18 }; ++ int64_t e[1]; + int i; + -+ b = wrap_vdup_n_u32 (a); -+ vst1_u32 (c, b); -+ for (i = 0; i < 2; i++) -+ if (a != c[i]) -+ return 1; ++ a = vld1_f64 (c); ++ b = wrap_vreinterpret_s64_f64 (a); ++ vst1_s64 (e, b); ++ if (d[0] != e[0]) ++ return 1; + return 0; -+} ++}; + -+uint32x4_t __attribute__ ((noinline)) -+wrap_vdupq_n_u32 (uint32_t a) ++float32x4_t __attribute__ ((noinline)) ++wrap_vreinterpretq_f32_f64 (float64x2_t __a) +{ -+ return vdupq_n_u32 (a); ++ return vreinterpretq_f32_f64 (__a); +} + +int __attribute__ ((noinline)) -+test_vdupq_n_u32 () ++test_vreinterpretq_f32_f64 () +{ -+ uint32_t a = 1; -+ uint32x4_t b; -+ uint32_t c[4]; ++ float64x2_t a; ++ float32x4_t b; ++ float64_t c[2] = { PI_F64, E_F64 }; ++ ++ /* Values corresponding to f32 reinterpret of ++ { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A }. */ ++ float32_t d[4] = { 3.3702805504E12, ++ 2.1426990032196044921875E0, ++ -2.8569523269651966444143014594E-32, ++ 2.089785099029541015625E0 }; ++ float32_t e[4]; + int i; + -+ b = wrap_vdupq_n_u32 (a); -+ vst1q_u32 (c, b); ++ a = vld1q_f64 (c); ++ b = wrap_vreinterpretq_f32_f64 (a); ++ vst1q_f32 (e, b); + for (i = 0; i < 4; i++) -+ if (a != c[i]) -+ return 1; ++ { ++ if (!DOUBLE_EQUALS (d[i], e[i], __FLT_EPSILON__)) ++ return 1; ++ } + return 0; -+} ++}; + -+int64x1_t __attribute__ ((noinline)) -+wrap_vdup_n_s64 (int64_t a) ++int8x16_t __attribute__ ((noinline)) ++wrap_vreinterpretq_s8_f64 (float64x2_t __a) +{ -+ return vdup_n_s64 (a); ++ return vreinterpretq_s8_f64 (__a); +} + +int __attribute__ ((noinline)) -+test_vdup_n_s64 () ++test_vreinterpretq_s8_f64 () +{ -+ int64_t a = 1; -+ int64x1_t b; -+ int64_t c[1]; ++ float64x2_t a; ++ int8x16_t b; ++ float64_t c[2] = { PI_F64, E_F64 }; ++ int8_t d[16] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40, ++ 0x69, 0x57, 0x14, 0x8B, 0x0A, 0xBF, 0x05, 0x40 }; ++ int8_t e[16]; + int i; + -+ b = wrap_vdup_n_s64 (a); -+ vst1_s64 (c, b); -+ for (i = 0; i < 1; i++) -+ if (a != c[i]) ++ a = vld1q_f64 (c); ++ b = wrap_vreinterpretq_s8_f64 (a); ++ vst1q_s8 (e, b); ++ for (i = 0; i < 16; i++) ++ if (d[i] != e[i]) + return 1; + return 0; -+} ++}; + -+int64x2_t __attribute__ ((noinline)) -+wrap_vdupq_n_s64 (int64_t a) ++int16x8_t __attribute__ ((noinline)) ++wrap_vreinterpretq_s16_f64 (float64x2_t __a) +{ -+ return vdupq_n_s64 (a); ++ return vreinterpretq_s16_f64 (__a); +} + +int __attribute__ ((noinline)) -+test_vdupq_n_s64 () ++test_vreinterpretq_s16_f64 () +{ -+ int64_t a = 1; -+ int64x2_t b; -+ int64_t c[2]; ++ float64x2_t a; ++ int16x8_t b; ++ float64_t c[2] = { PI_F64, E_F64 }; ++ int16_t d[8] = { 0x2D18, 0x5444, 0x21FB, 0x4009, ++ 0x5769, 0x8B14, 0xBF0A, 0x4005 }; ++ int16_t e[8]; + int i; + -+ b = wrap_vdupq_n_s64 (a); -+ vst1q_s64 (c, b); -+ for (i = 0; i < 2; i++) -+ if (a != c[i]) ++ a = vld1q_f64 (c); ++ b = wrap_vreinterpretq_s16_f64 (a); ++ vst1q_s16 (e, b); ++ for (i = 0; i < 8; i++) ++ if (d[i] != e[i]) + return 1; + return 0; -+} ++}; + -+uint64x1_t __attribute__ ((noinline)) -+wrap_vdup_n_u64 (uint64_t a) ++int32x4_t __attribute__ ((noinline)) ++wrap_vreinterpretq_s32_f64 (float64x2_t __a) +{ -+ return vdup_n_u64 (a); ++ return vreinterpretq_s32_f64 (__a); +} + +int __attribute__ ((noinline)) -+test_vdup_n_u64 () ++test_vreinterpretq_s32_f64 () +{ -+ uint64_t a = 1; -+ uint64x1_t b; -+ uint64_t c[1]; ++ float64x2_t a; ++ int32x4_t b; ++ float64_t c[2] = { PI_F64, E_F64 }; ++ int32_t d[4] = { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A }; ++ int32_t e[4]; + int i; + -+ b = wrap_vdup_n_u64 (a); -+ vst1_u64 (c, b); -+ for (i = 0; i < 1; i++) -+ if (a != c[i]) ++ a = vld1q_f64 (c); ++ b = wrap_vreinterpretq_s32_f64 (a); ++ vst1q_s32 (e, b); ++ for (i = 0; i < 4; i++) ++ if (d[i] != e[i]) + return 1; + return 0; -+} ++}; + -+uint64x2_t __attribute__ ((noinline)) -+wrap_vdupq_n_u64 (uint64_t a) ++int64x2_t __attribute__ ((noinline)) ++wrap_vreinterpretq_s64_f64 (float64x2_t __a) +{ -+ return vdupq_n_u64 (a); ++ return vreinterpretq_s64_f64 (__a); +} + +int __attribute__ ((noinline)) -+test_vdupq_n_u64 () ++test_vreinterpretq_s64_f64 () +{ -+ uint64_t a = 1; -+ uint64x2_t b; -+ uint64_t c[2]; ++ float64x2_t a; ++ int64x2_t b; ++ float64_t c[2] = { PI_F64, E_F64 }; ++ int64_t d[2] = { 0x400921FB54442D18, 0x4005BF0A8B145769 }; ++ int64_t e[2]; + int i; + -+ b = wrap_vdupq_n_u64 (a); -+ vst1q_u64 (c, b); ++ a = vld1q_f64 (c); ++ b = wrap_vreinterpretq_s64_f64 (a); ++ vst1q_s64 (e, b); + for (i = 0; i < 2; i++) -+ if (a != c[i]) ++ if (d[i] != e[i]) + return 1; + return 0; -+} ++}; + -+int -+main () ++float64x1_t __attribute__ ((noinline)) ++wrap_vreinterpret_f64_f32 (float32x2_t __a) +{ -+ if (test_vdup_n_f32 ()) -+ abort (); -+ if (test_vdup_n_f64 ()) -+ abort (); -+ if (test_vdup_n_p8 ()) -+ abort (); -+ if (test_vdup_n_u8 ()) -+ abort (); -+ if (test_vdup_n_s8 ()) -+ abort (); -+ if (test_vdup_n_p16 ()) -+ abort (); -+ if (test_vdup_n_s16 ()) -+ abort (); -+ if (test_vdup_n_u16 ()) -+ abort (); -+ if (test_vdup_n_s32 ()) -+ abort (); -+ if (test_vdup_n_u32 ()) -+ abort (); -+ if (test_vdup_n_s64 ()) -+ abort (); -+ if (test_vdup_n_u64 ()) -+ abort (); -+ if (test_vdupq_n_f32 ()) -+ abort (); -+ if (test_vdupq_n_f64 ()) -+ abort (); -+ if (test_vdupq_n_p8 ()) -+ abort (); -+ if (test_vdupq_n_u8 ()) -+ abort (); -+ if (test_vdupq_n_s8 ()) -+ abort (); -+ if (test_vdupq_n_p16 ()) -+ abort (); -+ if (test_vdupq_n_s16 ()) -+ abort (); -+ if (test_vdupq_n_u16 ()) -+ abort (); -+ if (test_vdupq_n_s32 ()) -+ abort (); -+ if (test_vdupq_n_u32 ()) -+ abort (); -+ if (test_vdupq_n_s64 ()) -+ abort (); -+ if (test_vdupq_n_u64 ()) -+ abort (); -+ return 0; ++ return vreinterpret_f64_f32 (__a); +} + -+/* No asm checks for vdup_n_f32, vdupq_n_f32, vdup_n_f64 and vdupq_n_f64. -+ Cannot force floating point value in general purpose regester. */ -+ -+/* Asm check for test_vdup_n_p8, test_vdup_n_s8, test_vdup_n_u8. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, w\[0-9\]+" 3 } } */ -+ -+/* Asm check for test_vdupq_n_p8, test_vdupq_n_s8, test_vdupq_n_u8. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, w\[0-9\]+" 3 } } */ -+ -+/* Asm check for test_vdup_n_p16, test_vdup_n_s16, test_vdup_n_u16. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, w\[0-9\]+" 3 } } */ -+ -+/* Asm check for test_vdupq_n_p16, test_vdupq_n_s16, test_vdupq_n_u16. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, w\[0-9\]+" 3 } } */ -+ -+/* Asm check for test_vdup_n_s32, test_vdup_n_u32. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, w\[0-9\]+" 2 } } */ ++int __attribute__ ((noinline)) ++test_vreinterpret_f64_f32 () ++{ ++ float32x2_t a; ++ float64x1_t b; ++ /* Values { 0x54442D18, 0x400921FB } reinterpreted as f32. */ ++ float32_t c[2] = { 3.3702805504E12, 2.1426990032196044921875E0 }; ++ float64_t d[1] = { PI_F64 }; ++ float64_t e[1]; ++ int i; + -+/* Asm check for test_vdupq_n_s32, test_vdupq_n_u32. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, w\[0-9\]+" 2 } } */ ++ a = vld1_f32 (c); ++ b = wrap_vreinterpret_f64_f32 (a); ++ vst1_f64 (e, b); ++ if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__)) ++ return 1; ++ return 0; ++}; + -+/* Asm check for test_vdup_n_s64, test_vdup_n_u64 are left out. -+ Attempts to make the compiler generate "dup\\td\[0-9\]+, x\[0-9\]+" -+ are not practical. */ ++float64x1_t __attribute__ ((noinline)) ++wrap_vreinterpret_f64_s8 (int8x8_t __a) ++{ ++ return vreinterpret_f64_s8 (__a); ++} + -+/* Asm check for test_vdupq_n_s64, test_vdupq_n_u64. */ -+/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2d, x\[0-9\]+" 2 } } */ ++int __attribute__ ((noinline)) ++test_vreinterpret_f64_s8 () ++{ ++ int8x8_t a; ++ float64x1_t b; ++ int8_t c[8] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40 }; ++ float64_t d[1] = { PI_F64 }; ++ float64_t e[1]; ++ int i; + -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/fcsel_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/fcsel_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-options " -O2 " } */ ++ a = vld1_s8 (c); ++ b = wrap_vreinterpret_f64_s8 (a); ++ vst1_f64 (e, b); ++ if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__)) ++ return 1; ++ return 0; ++}; + -+float -+f_1 (float a, float b, float c, float d) ++float64x1_t __attribute__ ((noinline)) ++wrap_vreinterpret_f64_s16 (int16x4_t __a) +{ -+ if (a > 0.0) -+ return c; -+ else -+ return 2.0; ++ return vreinterpret_f64_s16 (__a); +} + -+double -+f_2 (double a, double b, double c, double d) ++int __attribute__ ((noinline)) ++test_vreinterpret_f64_s16 () +{ -+ if (a > b) -+ return c; -+ else -+ return d; -+} -+ -+/* { dg-final { scan-assembler-times "\tfcsel" 2 } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/rev16_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/rev16_1.c -@@ -0,0 +1,59 @@ -+/* { dg-options "-O2" } */ -+/* { dg-do run } */ -+ -+extern void abort (void); ++ int16x4_t a; ++ float64x1_t b; ++ int16_t c[4] = { 0x2D18, 0x5444, 0x21FB, 0x4009 }; ++ float64_t d[1] = { PI_F64 }; ++ float64_t e[1]; ++ int i; + -+typedef unsigned int __u32; ++ a = vld1_s16 (c); ++ b = wrap_vreinterpret_f64_s16 (a); ++ vst1_f64 (e, b); ++ if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__)) ++ return 1; ++ return 0; ++}; + -+__u32 -+__rev16_32_alt (__u32 x) ++float64x1_t __attribute__ ((noinline)) ++wrap_vreinterpret_f64_s32 (int32x2_t __a) +{ -+ return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8) -+ | (((__u32)(x) & (__u32)0x00ff00ffUL) << 8); ++ return vreinterpret_f64_s32 (__a); +} + -+__u32 -+__rev16_32 (__u32 x) ++int __attribute__ ((noinline)) ++test_vreinterpret_f64_s32 () +{ -+ return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8) -+ | (((__u32)(x) & (__u32)0xff00ff00UL) >> 8); -+} ++ int32x2_t a; ++ float64x1_t b; ++ int32_t c[2] = { 0x54442D18, 0x400921FB }; ++ float64_t d[1] = { PI_F64 }; ++ float64_t e[1]; ++ int i; + -+typedef unsigned long long __u64; ++ a = vld1_s32 (c); ++ b = wrap_vreinterpret_f64_s32 (a); ++ vst1_f64 (e, b); ++ if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__)) ++ return 1; ++ return 0; ++}; + -+__u64 -+__rev16_64_alt (__u64 x) ++float64x1_t __attribute__ ((noinline)) ++wrap_vreinterpret_f64_s64 (int64x1_t __a) +{ -+ return (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8) -+ | (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8); ++ return vreinterpret_f64_s64 (__a); +} + -+__u64 -+__rev16_64 (__u64 x) ++int __attribute__ ((noinline)) ++test_vreinterpret_f64_s64 () +{ -+ return (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8) -+ | (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8); ++ int64x1_t a; ++ float64x1_t b; ++ int64_t c[1] = { 0x400921FB54442D18 }; ++ float64_t d[1] = { PI_F64 }; ++ float64_t e[1]; ++ ++ a = vld1_s64 (c); ++ b = wrap_vreinterpret_f64_s64 (a); ++ vst1_f64 (e, b); ++ if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__)) ++ return 1; ++ return 0; ++}; ++ ++float64x2_t __attribute__ ((noinline)) ++wrap_vreinterpretq_f64_f32 (float32x4_t __a) ++{ ++ return vreinterpretq_f64_f32 (__a); +} + -+int -+main (void) ++int __attribute__ ((noinline)) ++test_vreinterpretq_f64_f32 () +{ -+ volatile __u32 in32 = 0x12345678; -+ volatile __u32 expected32 = 0x34127856; -+ volatile __u64 in64 = 0x1234567890abcdefUL; -+ volatile __u64 expected64 = 0x34127856ab90efcdUL; ++ float32x4_t a; ++ float64x2_t b; ++ /* Values corresponding to f32 reinterpret of ++ { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A }. */ ++ float32_t c[4] = { 3.3702805504E12, ++ 2.1426990032196044921875E0, ++ -2.8569523269651966444143014594E-32, ++ 2.089785099029541015625E0 }; + -+ if (__rev16_32 (in32) != expected32) -+ abort (); ++ float64_t d[2] = { PI_F64, E_F64 }; ++ float64_t e[2]; ++ int i; + -+ if (__rev16_32_alt (in32) != expected32) -+ abort (); ++ a = vld1q_f32 (c); ++ b = wrap_vreinterpretq_f64_f32 (a); ++ vst1q_f64 (e, b); ++ for (i = 0; i < 2; i++) ++ if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__)) ++ return 1; ++ return 0; ++}; + -+ if (__rev16_64 (in64) != expected64) -+ abort (); ++float64x2_t __attribute__ ((noinline)) ++wrap_vreinterpretq_f64_s8 (int8x16_t __a) ++{ ++ return vreinterpretq_f64_s8 (__a); ++} + -+ if (__rev16_64_alt (in64) != expected64) -+ abort (); ++int __attribute__ ((noinline)) ++test_vreinterpretq_f64_s8 () ++{ ++ int8x16_t a; ++ float64x2_t b; ++ int8_t c[16] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40, ++ 0x69, 0x57, 0x14, 0x8B, 0x0A, 0xBF, 0x05, 0x40 }; ++ float64_t d[2] = { PI_F64, E_F64 }; ++ float64_t e[2]; ++ int i; + ++ a = vld1q_s8 (c); ++ b = wrap_vreinterpretq_f64_s8 (a); ++ vst1q_f64 (e, b); ++ for (i = 0; i < 2; i++) ++ if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__)) ++ return 1; + return 0; ++}; ++ ++float64x2_t __attribute__ ((noinline)) ++wrap_vreinterpretq_f64_s16 (int16x8_t __a) ++{ ++ return vreinterpretq_f64_s16 (__a); +} ---- a/src/gcc/testsuite/lib/target-supports.exp -+++ b/src/gcc/testsuite/lib/target-supports.exp -@@ -3306,6 +3306,24 @@ - return $et_vect_shift_saved - } - -+# Return 1 if the target supports vector bswap operations. + -+proc check_effective_target_vect_bswap { } { -+ global et_vect_bswap_saved ++int __attribute__ ((noinline)) ++test_vreinterpretq_f64_s16 () ++{ ++ int16x8_t a; ++ float64x2_t b; ++ int16_t c[8] = { 0x2D18, 0x5444, 0x21FB, 0x4009, ++ 0x5769, 0x8B14, 0xBF0A, 0x4005 }; ++ float64_t d[2] = { PI_F64, E_F64 }; ++ float64_t e[2]; ++ int i; + -+ if [info exists et_vect_bswap_saved] { -+ verbose "check_effective_target_vect_bswap: using cached result" 2 -+ } else { -+ set et_vect_bswap_saved 0 -+ if { [istarget aarch64*-*-*] } { -+ set et_vect_bswap_saved 1 -+ } -+ } ++ a = vld1q_s16 (c); ++ b = wrap_vreinterpretq_f64_s16 (a); ++ vst1q_f64 (e, b); ++ for (i = 0; i < 2; i++) ++ if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__)) ++ return 1; ++ return 0; ++}; + -+ verbose "check_effective_target_vect_bswap: returning $et_vect_bswap_saved" 2 -+ return $et_vect_bswap_saved ++float64x2_t __attribute__ ((noinline)) ++wrap_vreinterpretq_f64_s32 (int32x4_t __a) ++{ ++ return vreinterpretq_f64_s32 (__a); +} + - # Return 1 if the target supports hardware vector shift operation for char. - - proc check_effective_target_vect_shift_char { } { -@@ -3504,8 +3522,7 @@ - } else { - set et_vect_perm_saved 0 - if { [is-effective-target arm_neon_ok] -- || ([istarget aarch64*-*-*] -- && [is-effective-target aarch64_little_endian]) -+ || [istarget aarch64*-*-*] - || [istarget powerpc*-*-*] - || [istarget spu-*-*] - || [istarget i?86-*-*] ---- a/src/gcc/testsuite/ChangeLog.linaro -+++ b/src/gcc/testsuite/ChangeLog.linaro -@@ -0,0 +1,176 @@ -+2014-06-25 Yvan Roux ++int __attribute__ ((noinline)) ++test_vreinterpretq_f64_s32 () ++{ ++ int32x4_t a; ++ float64x2_t b; ++ int32_t c[4] = { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A }; ++ float64_t d[2] = { PI_F64, E_F64 }; ++ float64_t e[2]; ++ int i; + -+ GCC Linaro 4.9-2014.06-1 released. ++ a = vld1q_s32 (c); ++ b = wrap_vreinterpretq_f64_s32 (a); ++ vst1q_f64 (e, b); ++ for (i = 0; i < 2; i++) ++ if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__)) ++ return 1; ++ return 0; ++}; + -+2014-06-13 Yvan Roux ++float64x2_t __attribute__ ((noinline)) ++wrap_vreinterpretq_f64_s64 (int64x2_t __a) ++{ ++ return vreinterpretq_f64_s64 (__a); ++} + -+ Backport from trunk r211206. -+ 2014-06-03 Andrew Pinski ++int __attribute__ ((noinline)) ++test_vreinterpretq_f64_s64 () ++{ ++ int64x2_t a; ++ float64x2_t b; ++ int64_t c[2] = { 0x400921FB54442D18, 0x4005BF0A8B145769 }; ++ float64_t d[2] = { PI_F64, E_F64 }; ++ float64_t e[2]; ++ int i; + -+ * gcc.c-torture/compile/20140528-1.c: New testcase. ++ a = vld1q_s64 (c); ++ b = wrap_vreinterpretq_f64_s64 (a); ++ vst1q_f64 (e, b); ++ for (i = 0; i < 2; i++) ++ if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__)) ++ return 1; ++ return 0; ++}; + -+2014-06-12 Yvan Roux ++int ++main (int argc, char **argv) ++{ ++ if (test_vreinterpret_f32_f64 ()) ++ abort (); + -+ GCC Linaro 4.9-2014.06 released. ++ if (test_vreinterpret_s8_f64 ()) ++ abort (); ++ if (test_vreinterpret_s16_f64 ()) ++ abort (); ++ if (test_vreinterpret_s32_f64 ()) ++ abort (); ++ if (test_vreinterpret_s64_f64 ()) ++ abort (); + -+2014-05-25 Yvan Roux ++ if (test_vreinterpretq_f32_f64 ()) ++ abort (); + -+ Backport from trunk r209908. -+ 2013-04-29 Alan Lawrence ++ if (test_vreinterpretq_s8_f64 ()) ++ abort (); ++ if (test_vreinterpretq_s16_f64 ()) ++ abort (); ++ if (test_vreinterpretq_s32_f64 ()) ++ abort (); ++ if (test_vreinterpretq_s64_f64 ()) ++ abort (); + -+ * gcc.target/arm/simd/simd.exp: New file. -+ * gcc.target/arm/simd/vzipqf32_1.c: New file. -+ * gcc.target/arm/simd/vzipqp16_1.c: New file. -+ * gcc.target/arm/simd/vzipqp8_1.c: New file. -+ * gcc.target/arm/simd/vzipqs16_1.c: New file. -+ * gcc.target/arm/simd/vzipqs32_1.c: New file. -+ * gcc.target/arm/simd/vzipqs8_1.c: New file. -+ * gcc.target/arm/simd/vzipqu16_1.c: New file. -+ * gcc.target/arm/simd/vzipqu32_1.c: New file. -+ * gcc.target/arm/simd/vzipqu8_1.c: New file. -+ * gcc.target/arm/simd/vzipf32_1.c: New file. -+ * gcc.target/arm/simd/vzipp16_1.c: New file. -+ * gcc.target/arm/simd/vzipp8_1.c: New file. -+ * gcc.target/arm/simd/vzips16_1.c: New file. -+ * gcc.target/arm/simd/vzips32_1.c: New file. -+ * gcc.target/arm/simd/vzips8_1.c: New file. -+ * gcc.target/arm/simd/vzipu16_1.c: New file. -+ * gcc.target/arm/simd/vzipu32_1.c: New file. -+ * gcc.target/arm/simd/vzipu8_1.c: New file. ++ if (test_vreinterpret_f64_f32 ()) ++ abort (); + -+2014-05-25 Yvan Roux ++ if (test_vreinterpret_f64_s8 ()) ++ abort (); ++ if (test_vreinterpret_f64_s16 ()) ++ abort (); ++ if (test_vreinterpret_f64_s32 ()) ++ abort (); ++ if (test_vreinterpret_f64_s64 ()) ++ abort (); + -+ Backport from trunk r209893. -+ 2014-04-29 Alan Lawrence ++ if (test_vreinterpretq_f64_f32 ()) ++ abort (); + -+ * gcc.target/aarch64/simd/simd.exp: New file. -+ * gcc.target/aarch64/simd/vzipf32_1.c: New file. -+ * gcc.target/aarch64/simd/vzipf32.x: New file. -+ * gcc.target/aarch64/simd/vzipp16_1.c: New file. -+ * gcc.target/aarch64/simd/vzipp16.x: New file. -+ * gcc.target/aarch64/simd/vzipp8_1.c: New file. -+ * gcc.target/aarch64/simd/vzipp8.x: New file. -+ * gcc.target/aarch64/simd/vzipqf32_1.c: New file. -+ * gcc.target/aarch64/simd/vzipqf32.x: New file. -+ * gcc.target/aarch64/simd/vzipqp16_1.c: New file. -+ * gcc.target/aarch64/simd/vzipqp16.x: New file. -+ * gcc.target/aarch64/simd/vzipqp8_1.c: New file. -+ * gcc.target/aarch64/simd/vzipqp8.x: New file. -+ * gcc.target/aarch64/simd/vzipqs16_1.c: New file. -+ * gcc.target/aarch64/simd/vzipqs16.x: New file. -+ * gcc.target/aarch64/simd/vzipqs32_1.c: New file. -+ * gcc.target/aarch64/simd/vzipqs32.x: New file. -+ * gcc.target/aarch64/simd/vzipqs8_1.c: New file. -+ * gcc.target/aarch64/simd/vzipqs8.x: New file. -+ * gcc.target/aarch64/simd/vzipqu16_1.c: New file. -+ * gcc.target/aarch64/simd/vzipqu16.x: New file. -+ * gcc.target/aarch64/simd/vzipqu32_1.c: New file. -+ * gcc.target/aarch64/simd/vzipqu32.x: New file. -+ * gcc.target/aarch64/simd/vzipqu8_1.c: New file. -+ * gcc.target/aarch64/simd/vzipqu8.x: New file. -+ * gcc.target/aarch64/simd/vzips16_1.c: New file. -+ * gcc.target/aarch64/simd/vzips16.x: New file. -+ * gcc.target/aarch64/simd/vzips32_1.c: New file. -+ * gcc.target/aarch64/simd/vzips32.x: New file. -+ * gcc.target/aarch64/simd/vzips8_1.c: New file. -+ * gcc.target/aarch64/simd/vzips8.x: New file. -+ * gcc.target/aarch64/simd/vzipu16_1.c: New file. -+ * gcc.target/aarch64/simd/vzipu16.x: New file. -+ * gcc.target/aarch64/simd/vzipu32_1.c: New file. -+ * gcc.target/aarch64/simd/vzipu32.x: New file. -+ * gcc.target/aarch64/simd/vzipu8_1.c: New file. -+ * gcc.target/aarch64/simd/vzipu8.x: New file. ++ if (test_vreinterpretq_f64_s8 ()) ++ abort (); ++ if (test_vreinterpretq_f64_s16 ()) ++ abort (); ++ if (test_vreinterpretq_f64_s32 ()) ++ abort (); ++ if (test_vreinterpretq_f64_s64 ()) ++ abort (); + -+2014-05-25 Yvan Roux ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16.x +@@ -0,0 +1,114 @@ ++extern void abort (void); + -+ Backport from trunk r209808. -+ 2014-04-25 Jiong Wang ++int16x8_t ++test_vextq_s16_1 (int16x8_t a, int16x8_t b) ++{ ++ return vextq_s16 (a, b, 1); ++} + -+ * gcc.target/arm/tail-long-call.c: New test. ++int16x8_t ++test_vextq_s16_2 (int16x8_t a, int16x8_t b) ++{ ++ return vextq_s16 (a, b, 2); ++} + -+2014-05-25 Yvan Roux ++int16x8_t ++test_vextq_s16_3 (int16x8_t a, int16x8_t b) ++{ ++ return vextq_s16 (a, b, 3); ++} + -+ Backport from trunk r209749. -+ 2014-04-24 Alan Lawrence ++int16x8_t ++test_vextq_s16_4 (int16x8_t a, int16x8_t b) ++{ ++ return vextq_s16 (a, b, 4); ++} + -+ * lib/target-supports.exp (check_effective_target_vect_perm): Return -+ true for aarch64_be. ++int16x8_t ++test_vextq_s16_5 (int16x8_t a, int16x8_t b) ++{ ++ return vextq_s16 (a, b, 5); ++} + -+2014-05-23 Yvan Roux ++int16x8_t ++test_vextq_s16_6 (int16x8_t a, int16x8_t b) ++{ ++ return vextq_s16 (a, b, 6); ++} + -+ Backport from trunk r209736. -+ 2014-04-24 Kyrylo Tkachov ++int16x8_t ++test_vextq_s16_7 (int16x8_t a, int16x8_t b) ++{ ++ return vextq_s16 (a, b, 7); ++} + -+ * lib/target-supports.exp (check_effective_target_vect_bswap): New. -+ * gcc.dg/vect/vect-bswap16: New test. -+ * gcc.dg/vect/vect-bswap32: Likewise. -+ * gcc.dg/vect/vect-bswap64: Likewise. ++int ++main (int argc, char **argv) ++{ ++ int i, off; ++ int16_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; ++ int16x8_t in1 = vld1q_s16 (arr1); ++ int16_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; ++ int16x8_t in2 = vld1q_s16 (arr2); ++ int16_t exp[8]; ++ int16x8_t expected; ++ int16x8_t actual = test_vextq_s16_1 (in1, in2); + -+2014-05-23 Yvan Roux ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 1; ++ expected = vld1q_s16 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); + -+ Backport from trunk r209713. -+ 2014-04-23 Alex Velenko ++ actual = test_vextq_s16_2 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 2; ++ expected = vld1q_s16 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); + -+ * gcc.target/aarch64/vdup_lane_1.c: New testcase. -+ * gcc.target/aarch64/vdup_lane_2.c: New testcase. -+ * gcc.target/aarch64/vdup_n_1.c: New testcase. ++ actual = test_vextq_s16_3 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 3; ++ expected = vld1q_s16 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); + -+2014-05-23 Yvan Roux ++ actual = test_vextq_s16_4 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 4; ++ expected = vld1q_s16 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); + -+ Backport from trunk r209704, 209705. -+ 2014-04-23 Kyrylo Tkachov ++ actual = test_vextq_s16_5 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 5; ++ expected = vld1q_s16 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); + -+ * gcc.target/arm/rev16.c: New test. ++ actual = test_vextq_s16_6 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 6; ++ expected = vld1q_s16 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); + -+ 2014-04-23 Kyrylo Tkachov ++ actual = test_vextq_s16_7 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 7; ++ expected = vld1q_s16 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); + -+ * gcc.target/aarch64/rev16_1.c: New test. ++ return 0; ++} + -+2014-05-23 Yvan Roux +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8.x +@@ -0,0 +1,114 @@ ++extern void abort (void); + -+ Backport from trunk r209642. -+ 2014-04-22 Alex Velenko ++uint8x8_t ++test_vext_u8_1 (uint8x8_t a, uint8x8_t b) ++{ ++ return vext_u8 (a, b, 1); ++} + -+ * gcc.target/aarch64/vreinterpret_f64_1.c: New. ++uint8x8_t ++test_vext_u8_2 (uint8x8_t a, uint8x8_t b) ++{ ++ return vext_u8 (a, b, 2); ++} + -+2014-05-23 Yvan Roux ++uint8x8_t ++test_vext_u8_3 (uint8x8_t a, uint8x8_t b) ++{ ++ return vext_u8 (a, b, 3); ++} + -+ Backport from trunk r209640. -+ 2014-04-22 Alex Velenko ++uint8x8_t ++test_vext_u8_4 (uint8x8_t a, uint8x8_t b) ++{ ++ return vext_u8 (a, b, 4); ++} + -+ * gcc.target/aarch64/vqneg_s64_1.c: New testcase. -+ * gcc.target/aarch64/vqabs_s64_1.c: New testcase. ++uint8x8_t ++test_vext_u8_5 (uint8x8_t a, uint8x8_t b) ++{ ++ return vext_u8 (a, b, 5); ++} + -+2014-05-23 Yvan Roux ++uint8x8_t ++test_vext_u8_6 (uint8x8_t a, uint8x8_t b) ++{ ++ return vext_u8 (a, b, 6); ++} + -+ Backport from trunk r209613, 209614. -+ 2014-04-22 Ian Bolton ++uint8x8_t ++test_vext_u8_7 (uint8x8_t a, uint8x8_t b) ++{ ++ return vext_u8 (a, b, 7); ++} + -+ * gcc.target/arm/anddi_notdi-1.c: New test. -+ * gcc.target/arm/iordi_notdi-1.c: New test case. ++int ++main (int argc, char **argv) ++{ ++ int i, off; ++ uint8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; ++ uint8x8_t in1 = vld1_u8 (arr1); ++ uint8_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; ++ uint8x8_t in2 = vld1_u8 (arr2); ++ uint8_t exp[8]; ++ uint8x8_t expected; ++ uint8x8_t actual = test_vext_u8_1 (in1, in2); + -+ 2014-04-22 Ian Bolton ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 1; ++ expected = vld1_u8 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); + -+ * gcc.target/arm/iordi_notdi-1.c: New test. ++ actual = test_vext_u8_2 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 2; ++ expected = vld1_u8 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); + -+2014-05-23 Yvan Roux ++ actual = test_vext_u8_3 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 3; ++ expected = vld1_u8 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); + -+ Backport from trunk r209559. -+ 2014-04-22 Alex Velenko ++ actual = test_vext_u8_4 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 4; ++ expected = vld1_u8 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); + -+ * gcc.target/aarch64/vrnd_f64_1.c : New file. ++ actual = test_vext_u8_5 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 5; ++ expected = vld1_u8 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); + -+2014-05-14 Yvan Roux ++ actual = test_vext_u8_6 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 6; ++ expected = vld1_u8 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); + -+ GCC Linaro 4.9-2014.05 released. ++ actual = test_vext_u8_7 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 7; ++ expected = vld1_u8 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); + -+2014-05-13 Yvan Roux ++ return 0; ++} + -+ Backport from trunk r209889. -+ 2014-04-29 Zhenqiang Chen +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16.x +@@ -0,0 +1,114 @@ ++extern void abort (void); + -+ * gcc.target/aarch64/fcsel_1.c: New test case. ++uint16x8_t ++test_vextq_u16_1 (uint16x8_t a, uint16x8_t b) ++{ ++ return vextq_u16 (a, b, 1); ++} + -+2014-04-22 Yvan Roux ++uint16x8_t ++test_vextq_u16_2 (uint16x8_t a, uint16x8_t b) ++{ ++ return vextq_u16 (a, b, 2); ++} + -+ GCC Linaro 4.9-2014.04 released. ---- a/src/gcc/testsuite/gcc.c-torture/compile/20140528-1.c -+++ b/src/gcc/testsuite/gcc.c-torture/compile/20140528-1.c -@@ -0,0 +1,9 @@ -+unsigned f(unsigned flags, unsigned capabilities) ++uint16x8_t ++test_vextq_u16_3 (uint16x8_t a, uint16x8_t b) +{ -+ unsigned gfp_mask; -+ unsigned gfp_notmask = 0; -+ gfp_mask = flags & ((1 << 25) - 1); -+ if (!(capabilities & 0x00000001)) -+ gfp_mask |= 0x1000000u; -+ return (gfp_mask & ~gfp_notmask); ++ return vextq_u16 (a, b, 3); +} ---- a/src/gcc/testsuite/gcc.dg/vect/vect-bswap32.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-bswap32.c -@@ -0,0 +1,44 @@ -+/* { dg-require-effective-target vect_bswap } */ + -+#include "tree-vect.h" ++uint16x8_t ++test_vextq_u16_4 (uint16x8_t a, uint16x8_t b) ++{ ++ return vextq_u16 (a, b, 4); ++} + -+#define N 128 ++uint16x8_t ++test_vextq_u16_5 (uint16x8_t a, uint16x8_t b) ++{ ++ return vextq_u16 (a, b, 5); ++} + -+volatile int y = 0; ++uint16x8_t ++test_vextq_u16_6 (uint16x8_t a, uint16x8_t b) ++{ ++ return vextq_u16 (a, b, 6); ++} + -+static inline void -+vfoo32 (unsigned int* a) ++uint16x8_t ++test_vextq_u16_7 (uint16x8_t a, uint16x8_t b) +{ -+ int i = 0; -+ for (i = 0; i < N; ++i) -+ a[i] = __builtin_bswap32 (a[i]); ++ return vextq_u16 (a, b, 7); +} + +int -+main (void) ++main (int argc, char **argv) +{ -+ unsigned int arr[N]; -+ unsigned int expect[N]; -+ int i; -+ -+ for (i = 0; i < N; ++i) -+ { -+ arr[i] = i; -+ expect[i] = __builtin_bswap32 (i); -+ if (y) /* Avoid vectorisation. */ -+ abort (); -+ } ++ int i, off; ++ uint16_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; ++ uint16x8_t in1 = vld1q_u16 (arr1); ++ uint16_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; ++ uint16x8_t in2 = vld1q_u16 (arr2); ++ uint16_t exp[8]; ++ uint16x8_t expected; ++ uint16x8_t actual = test_vextq_u16_1 (in1, in2); + -+ vfoo32 (arr); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 1; ++ expected = vld1q_u16 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); + -+ for (i = 0; i < N; ++i) -+ { -+ if (arr[i] != expect[i]) -+ abort (); -+ } ++ actual = test_vextq_u16_2 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 2; ++ expected = vld1q_u16 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); + -+ return 0; ++ actual = test_vextq_u16_3 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 3; ++ expected = vld1q_u16 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_u16_4 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 4; ++ expected = vld1q_u16 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_u16_5 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 5; ++ expected = vld1q_u16 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_u16_6 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 6; ++ expected = vld1q_u16 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_u16_7 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 7; ++ expected = vld1q_u16 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ return 0; +} + -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -+/* { dg-final { cleanup-tree-dump "vect" } } */ ---- a/src/gcc/testsuite/gcc.dg/vect/vect-bswap16.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-bswap16.c -@@ -0,0 +1,44 @@ -+/* { dg-require-effective-target vect_bswap } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vzip_s16' AArch64 SIMD intrinsic. */ + -+#include "tree-vect.h" ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ + -+#define N 128 ++#include ++#include "vzips16.x" + -+volatile int y = 0; ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16.x +@@ -0,0 +1,26 @@ ++extern void abort (void); + -+static inline void -+vfoo16 (unsigned short int* a) ++int16x8x2_t ++test_vuzpqs16 (int16x8_t _a, int16x8_t _b) +{ -+ int i = 0; -+ for (i = 0; i < N; ++i) -+ a[i] = __builtin_bswap16 (a[i]); ++ return vuzpq_s16 (_a, _b); +} + +int -+main (void) ++main (int argc, char **argv) +{ -+ unsigned short arr[N]; -+ unsigned short expect[N]; + int i; ++ int16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; ++ int16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; ++ int16x8x2_t result = test_vuzpqs16 (vld1q_s16 (first), vld1q_s16 (second)); ++ int16_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; ++ int16_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; ++ int16x8_t expect1 = vld1q_s16 (exp1); ++ int16x8_t expect2 = vld1q_s16 (exp2); + -+ for (i = 0; i < N; ++i) -+ { -+ arr[i] = i; -+ expect[i] = __builtin_bswap16 (i); -+ if (y) /* Avoid vectorisation. */ -+ abort (); -+ } -+ -+ vfoo16 (arr); -+ -+ for (i = 0; i < N; ++i) -+ { -+ if (arr[i] != expect[i]) -+ abort (); -+ } ++ for (i = 0; i < 8; i++) ++ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) ++ abort (); + + return 0; +} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vzipq_s8' AArch64 SIMD intrinsic. */ + -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -+/* { dg-final { cleanup-tree-dump "vect" } } */ ---- a/src/gcc/testsuite/gcc.dg/vect/vect-bswap64.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-bswap64.c -@@ -0,0 +1,44 @@ -+/* { dg-require-effective-target vect_bswap } */ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ + -+#include "tree-vect.h" ++#include ++#include "vzipqs8.x" + -+#define N 128 ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev64q_p8' AArch64 SIMD intrinsic. */ + -+volatile int y = 0; ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ + -+static inline void -+vfoo64 (unsigned long long* a) ++#include ++#include "vrev64qp8.x" ++ ++/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vtrn_u16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vtrnu16.x" ++ ++/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16.x +@@ -0,0 +1,26 @@ ++extern void abort (void); ++ ++uint16x8x2_t ++test_vuzpqu16 (uint16x8_t _a, uint16x8_t _b) +{ -+ int i = 0; -+ for (i = 0; i < N; ++i) -+ a[i] = __builtin_bswap64 (a[i]); ++ return vuzpq_u16 (_a, _b); +} + +int -+main (void) ++main (int argc, char **argv) +{ -+ unsigned long long arr[N]; -+ unsigned long long expect[N]; + int i; ++ uint16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; ++ uint16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; ++ uint16x8x2_t result = test_vuzpqu16 (vld1q_u16 (first), vld1q_u16 (second)); ++ uint16_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; ++ uint16_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; ++ uint16x8_t expect1 = vld1q_u16 (exp1); ++ uint16x8_t expect2 = vld1q_u16 (exp2); + -+ for (i = 0; i < N; ++i) -+ { -+ arr[i] = i; -+ expect[i] = __builtin_bswap64 (i); -+ if (y) /* Avoid vectorisation. */ -+ abort (); -+ } -+ -+ vfoo64 (arr); -+ -+ for (i = 0; i < N; ++i) -+ { -+ if (arr[i] != expect[i]) -+ abort (); -+ } ++ for (i = 0; i < 8; i++) ++ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) ++ abort (); + + return 0; +} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8.x +@@ -0,0 +1,26 @@ ++extern void abort (void); + -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ -+/* { dg-final { cleanup-tree-dump "vect" } } */ ---- a/src/gcc/objcp/ChangeLog.linaro -+++ b/src/gcc/objcp/ChangeLog.linaro -@@ -0,0 +1,15 @@ -+2014-06-25 Yvan Roux ++uint8x8x2_t ++test_vuzpu8 (uint8x8_t _a, uint8x8_t _b) ++{ ++ return vuzp_u8 (_a, _b); ++} + -+ GCC Linaro 4.9-2014.06-1 released. ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; ++ uint8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; ++ uint8x8x2_t result = test_vuzpu8 (vld1_u8 (first), vld1_u8 (second)); ++ uint8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; ++ uint8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; ++ uint8x8_t expect1 = vld1_u8 (exp1); ++ uint8x8_t expect2 = vld1_u8 (exp2); + -+2014-06-12 Yvan Roux ++ for (i = 0; i < 8; i++) ++ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) ++ abort (); + -+ GCC Linaro 4.9-2014.06 released. ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vextu16' AArch64 SIMD intrinsic. */ + -+2014-05-14 Yvan Roux ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ + -+ GCC Linaro 4.9-2014.05 released. ++#include "arm_neon.h" ++#include "ext_u16.x" + -+2014-04-22 Yvan Roux ++/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vextQu8' AArch64 SIMD intrinsic. */ + -+ GCC Linaro 4.9-2014.04 released. ---- a/src/gcc/cp/ChangeLog.linaro -+++ b/src/gcc/cp/ChangeLog.linaro -@@ -0,0 +1,15 @@ -+2014-06-25 Yvan Roux ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ + -+ GCC Linaro 4.9-2014.06-1 released. ++#include "arm_neon.h" ++#include "extq_u8.x" + -+2014-06-12 Yvan Roux ++/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#?\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8.x +@@ -0,0 +1,22 @@ ++extern void abort (void); + -+ GCC Linaro 4.9-2014.06 released. ++uint8x16_t ++test_vrev64qu8 (uint8x16_t _arg) ++{ ++ return vrev64q_u8 (_arg); ++} + -+2014-05-14 Yvan Roux ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ uint8x16_t reversed = test_vrev64qu8 (inorder); ++ uint8x16_t expected = {8, 7, 6, 5, 4, 3, 2, 1, 16, 15, 14, 13, 12, 11, 10, 9}; + -+ GCC Linaro 4.9-2014.05 released. ++ for (i = 0; i < 16; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} + -+2014-04-22 Yvan Roux +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev32_p8' AArch64 SIMD intrinsic. */ + -+ GCC Linaro 4.9-2014.04 released. ---- a/src/gcc/expr.c -+++ b/src/gcc/expr.c -@@ -68,22 +68,6 @@ - #include "tree-ssa-address.h" - #include "cfgexpand.h" - --/* Decide whether a function's arguments should be processed -- from first to last or from last to first. -- -- They should if the stack and args grow in opposite directions, but -- only if we have push insns. */ -- --#ifdef PUSH_ROUNDING -- --#ifndef PUSH_ARGS_REVERSED --#if defined (STACK_GROWS_DOWNWARD) != defined (ARGS_GROW_DOWNWARD) --#define PUSH_ARGS_REVERSED /* If it's last to first. */ --#endif --#endif -- --#endif -- - #ifndef STACK_PUSH_CODE - #ifdef STACK_GROWS_DOWNWARD - #define STACK_PUSH_CODE PRE_DEC -@@ -4353,11 +4337,7 @@ - /* Loop over all the words allocated on the stack for this arg. */ - /* We can do it by words, because any scalar bigger than a word - has a size a multiple of a word. */ --#ifndef PUSH_ARGS_REVERSED -- for (i = not_stack; i < size; i++) --#else - for (i = size - 1; i >= not_stack; i--) --#endif - if (i >= not_stack + offset) - emit_push_insn (operand_subword_force (x, i, mode), - word_mode, NULL_TREE, NULL_RTX, align, 0, NULL_RTX, ---- a/src/gcc/go/ChangeLog.linaro -+++ b/src/gcc/go/ChangeLog.linaro -@@ -0,0 +1,15 @@ -+2014-06-25 Yvan Roux ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ + -+ GCC Linaro 4.9-2014.06-1 released. ++#include ++#include "vrev32p8.x" + -+2014-06-12 Yvan Roux ++/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x +@@ -0,0 +1,17 @@ ++extern void abort (void); + -+ GCC Linaro 4.9-2014.06 released. ++int ++main (int argc, char **argv) ++{ ++ int i, off; ++ int64_t arr1[] = {0}; ++ int64x1_t in1 = vld1_s64 (arr1); ++ int64_t arr2[] = {1}; ++ int64x1_t in2 = vld1_s64 (arr2); ++ int64x1_t actual = vext_s64 (in1, in2, 0); ++ if (actual != in1) ++ abort (); + -+2014-05-14 Yvan Roux ++ return 0; ++} + -+ GCC Linaro 4.9-2014.05 released. +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32.x +@@ -0,0 +1,26 @@ ++extern void abort (void); + -+2014-04-22 Yvan Roux ++int32x2x2_t ++test_vuzps32 (int32x2_t _a, int32x2_t _b) ++{ ++ return vuzp_s32 (_a, _b); ++} + -+ GCC Linaro 4.9-2014.04 released. ---- a/src/gcc/ada/ChangeLog.linaro -+++ b/src/gcc/ada/ChangeLog.linaro -@@ -0,0 +1,59 @@ -+2014-06-25 Yvan Roux ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int32_t first[] = {1, 2}; ++ int32_t second[] = {3, 4}; ++ int32x2x2_t result = test_vuzps32 (vld1_s32 (first), vld1_s32 (second)); ++ int32_t exp1[] = {1, 3}; ++ int32_t exp2[] = {2, 4}; ++ int32x2_t expect1 = vld1_s32 (exp1); ++ int32x2_t expect2 = vld1_s32 (exp2); + -+ GCC Linaro 4.9-2014.06-1 released. ++ for (i = 0; i < 2; i++) ++ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) ++ abort (); + -+2014-06-12 Yvan Roux ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32.x +@@ -0,0 +1,26 @@ ++extern void abort (void); + -+ GCC Linaro 4.9-2014.06 released. ++uint32x2x2_t ++test_vuzpu32 (uint32x2_t _a, uint32x2_t _b) ++{ ++ return vuzp_u32 (_a, _b); ++} + -+2014-05-14 Yvan Roux ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint32_t first[] = {1, 2}; ++ uint32_t second[] = {3, 4}; ++ uint32x2x2_t result = test_vuzpu32 (vld1_u32 (first), vld1_u32 (second)); ++ uint32_t exp1[] = {1, 3}; ++ uint32_t exp2[] = {2, 4}; ++ uint32x2_t expect1 = vld1_u32 (exp1); ++ uint32x2_t expect2 = vld1_u32 (exp2); + -+ GCC Linaro 4.9-2014.05 released. ++ for (i = 0; i < 2; i++) ++ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) ++ abort (); + -+2014-05-13 Yvan Roux ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x +@@ -0,0 +1,17 @@ ++extern void abort (void); + -+ Backport from trunk r209653,209866,209871. ++int ++main (int argc, char **argv) ++{ ++ int i, off; ++ uint64_t arr1[] = {0}; ++ uint64x1_t in1 = vld1_u64 (arr1); ++ uint64_t arr2[] = {1}; ++ uint64x1_t in2 = vld1_u64 (arr2); ++ uint64x1_t actual = vext_u64 (in1, in2, 0); ++ if (actual != in1) ++ abort (); + -+ 2014-04-28 Richard Henderson ++ return 0; ++} + -+ * gcc-interface/Makefile.in: Support aarch64-linux. +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vtrn_s8' AArch64 SIMD intrinsic. */ + -+ 2014-04-28 Eric Botcazou ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ + -+ * exp_dbug.ads (Get_External_Name): Add 'False' default to Has_Suffix, -+ add 'Suffix' parameter and adjust comment. -+ (Get_External_Name_With_Suffix): Delete. -+ * exp_dbug.adb (Get_External_Name_With_Suffix): Merge into... -+ (Get_External_Name): ...here. Add 'False' default to Has_Suffix, add -+ 'Suffix' parameter. -+ (Get_Encoded_Name): Remove 2nd argument in call to Get_External_Name. -+ Call Get_External_Name instead of Get_External_Name_With_Suffix. -+ (Get_Secondary_DT_External_Name): Likewise. -+ * exp_cg.adb (Write_Call_Info): Likewise. -+ * exp_disp.adb (Export_DT): Likewise. -+ (Import_DT): Likewise. -+ * comperr.ads (Compiler_Abort): Remove Code parameter and add From_GCC -+ parameter with False default. -+ * comperr.adb (Compiler_Abort): Likewise. Adjust accordingly. -+ * types.h (Fat_Pointer): Rename into... -+ (String_Pointer): ...this. Add comment on interfacing rules. -+ * fe.h (Compiler_Abort): Adjust for above renaming. -+ (Error_Msg_N): Likewise. -+ (Error_Msg_NE): Likewise. -+ (Get_External_Name): Likewise. Add third parameter. -+ (Get_External_Name_With_Suffix): Delete. -+ * gcc-interface/decl.c (STDCALL_PREFIX): Define. -+ (create_concat_name): Adjust call to Get_External_Name, remove call to -+ Get_External_Name_With_Suffix, use STDCALL_PREFIX, adjust for renaming. -+ * gcc-interface/trans.c (post_error): Likewise. -+ (post_error_ne): Likewise. -+ * gcc-interface/misc.c (internal_error_function): Likewise. ++#include ++#include "vtrns8.x" + -+ 2014-04-22 Richard Henderson ++/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vtrnq_s16' AArch64 SIMD intrinsic. */ + -+ * init.c [__linux__] (HAVE_GNAT_ALTERNATE_STACK): New define. -+ (__gnat_alternate_stack): Enable for all linux except ia64. ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ + -+2014-04-22 Yvan Roux ++#include ++#include "vtrnqs16.x" + -+ GCC Linaro 4.9-2014.04 released. ---- a/src/gcc/fortran/ChangeLog.linaro -+++ b/src/gcc/fortran/ChangeLog.linaro -@@ -0,0 +1,15 @@ -+2014-06-25 Yvan Roux ++/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev64q_s32' AArch64 SIMD intrinsic. */ + -+ GCC Linaro 4.9-2014.06-1 released. ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ + -+2014-06-12 Yvan Roux ++#include ++#include "vrev64qs32.x" + -+ GCC Linaro 4.9-2014.06 released. ++/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4s, ?v\[0-9\]+.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev64_s8' AArch64 SIMD intrinsic. */ + -+2014-05-14 Yvan Roux ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ + -+ GCC Linaro 4.9-2014.05 released. ++#include ++#include "vrev64s8.x" + -+2014-04-22 Yvan Roux ++/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16.x +@@ -0,0 +1,27 @@ ++extern void abort (void); + -+ GCC Linaro 4.9-2014.04 released. ---- a/src/gcc/configure.ac -+++ b/src/gcc/configure.ac -@@ -809,7 +809,7 @@ - ) - AC_SUBST(CONFIGURE_SPECS) - --ACX_PKGVERSION([GCC]) -+ACX_PKGVERSION([Linaro GCC `cat $srcdir/LINARO-VERSION`]) - ACX_BUGURL([http://gcc.gnu.org/bugs.html]) - - # Sanity check enable_languages in case someone does not run the toplevel ---- a/src/gcc/calls.c -+++ b/src/gcc/calls.c -@@ -1104,8 +1104,6 @@ - { - CUMULATIVE_ARGS *args_so_far_pnt = get_cumulative_args (args_so_far); - location_t loc = EXPR_LOCATION (exp); -- /* 1 if scanning parms front to back, -1 if scanning back to front. */ -- int inc; - - /* Count arg position in order args appear. */ - int argpos; -@@ -1116,22 +1114,9 @@ - args_size->var = 0; - - /* In this loop, we consider args in the order they are written. -- We fill up ARGS from the front or from the back if necessary -- so that in any case the first arg to be pushed ends up at the front. */ -+ We fill up ARGS from the back. */ - -- if (PUSH_ARGS_REVERSED) -- { -- i = num_actuals - 1, inc = -1; -- /* In this case, must reverse order of args -- so that we compute and push the last arg first. */ -- } -- else -- { -- i = 0, inc = 1; -- } -- -- /* First fill in the actual arguments in the ARGS array, splitting -- complex arguments if necessary. */ -+ i = num_actuals - 1; - { - int j = i; - call_expr_arg_iterator iter; -@@ -1140,7 +1125,7 @@ - if (struct_value_addr_value) - { - args[j].tree_value = struct_value_addr_value; -- j += inc; -+ j--; - } - FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) - { -@@ -1152,17 +1137,17 @@ - { - tree subtype = TREE_TYPE (argtype); - args[j].tree_value = build1 (REALPART_EXPR, subtype, arg); -- j += inc; -+ j--; - args[j].tree_value = build1 (IMAGPART_EXPR, subtype, arg); - } - else - args[j].tree_value = arg; -- j += inc; -+ j--; - } - } - - /* I counts args in order (to be) pushed; ARGPOS counts in order written. */ -- for (argpos = 0; argpos < num_actuals; i += inc, argpos++) -+ for (argpos = 0; argpos < num_actuals; i--, argpos++) - { - tree type = TREE_TYPE (args[i].tree_value); - int unsignedp; -@@ -2952,9 +2937,8 @@ - - compute_argument_addresses (args, argblock, num_actuals); - -- /* If we push args individually in reverse order, perform stack alignment -- before the first push (the last arg). */ -- if (PUSH_ARGS_REVERSED && argblock == 0 -+ /* Perform stack alignment before the first push (the last arg). */ -+ if (argblock == 0 - && adjusted_args_size.constant > reg_parm_stack_space - && adjusted_args_size.constant != unadjusted_args_size) - { -@@ -3097,12 +3081,6 @@ - sibcall_failure = 1; - } - -- /* If we pushed args in forward order, perform stack alignment -- after pushing the last arg. */ -- if (!PUSH_ARGS_REVERSED && argblock == 0) -- anti_adjust_stack (GEN_INT (adjusted_args_size.constant -- - unadjusted_args_size)); -- - /* If register arguments require space on the stack and stack space - was not preallocated, allocate stack space here for arguments - passed in registers. */ -@@ -3152,8 +3130,7 @@ - if (pass == 1 && (return_flags & ERF_RETURNS_ARG)) - { - int arg_nr = return_flags & ERF_RETURN_ARG_MASK; -- if (PUSH_ARGS_REVERSED) -- arg_nr = num_actuals - arg_nr - 1; -+ arg_nr = num_actuals - arg_nr - 1; - if (arg_nr >= 0 - && arg_nr < num_actuals - && args[arg_nr].reg -@@ -3597,7 +3574,6 @@ - isn't present here, so we default to native calling abi here. */ - tree fndecl ATTRIBUTE_UNUSED = NULL_TREE; /* library calls default to host calling abi ? */ - tree fntype ATTRIBUTE_UNUSED = NULL_TREE; /* library calls default to host calling abi ? */ -- int inc; - int count; - rtx argblock = 0; - CUMULATIVE_ARGS args_so_far_v; -@@ -3946,22 +3922,13 @@ - argblock = push_block (GEN_INT (args_size.constant), 0, 0); - } - -- /* If we push args individually in reverse order, perform stack alignment -+ /* We push args individually in reverse order, perform stack alignment - before the first push (the last arg). */ -- if (argblock == 0 && PUSH_ARGS_REVERSED) -+ if (argblock == 0) - anti_adjust_stack (GEN_INT (args_size.constant - - original_args_size.constant)); - -- if (PUSH_ARGS_REVERSED) -- { -- inc = -1; -- argnum = nargs - 1; -- } -- else -- { -- inc = 1; -- argnum = 0; -- } -+ argnum = nargs - 1; - - #ifdef REG_PARM_STACK_SPACE - if (ACCUMULATE_OUTGOING_ARGS) -@@ -3978,7 +3945,7 @@ - - /* ARGNUM indexes the ARGVEC array in the order in which the arguments - are to be pushed. */ -- for (count = 0; count < nargs; count++, argnum += inc) -+ for (count = 0; count < nargs; count++, argnum--) - { - enum machine_mode mode = argvec[argnum].mode; - rtx val = argvec[argnum].value; -@@ -4080,17 +4047,8 @@ - } - } - -- /* If we pushed args in forward order, perform stack alignment -- after pushing the last arg. */ -- if (argblock == 0 && !PUSH_ARGS_REVERSED) -- anti_adjust_stack (GEN_INT (args_size.constant -- - original_args_size.constant)); -+ argnum = nargs - 1; - -- if (PUSH_ARGS_REVERSED) -- argnum = nargs - 1; -- else -- argnum = 0; -- - fun = prepare_call_address (NULL, fun, NULL, &call_fusage, 0, 0); - - /* Now load any reg parms into their regs. */ -@@ -4097,7 +4055,7 @@ - - /* ARGNUM indexes the ARGVEC array in the order in which the arguments - are to be pushed. */ -- for (count = 0; count < nargs; count++, argnum += inc) -+ for (count = 0; count < nargs; count++, argnum--) - { - enum machine_mode mode = argvec[argnum].mode; - rtx val = argvec[argnum].value; ---- a/src/gcc/lto/ChangeLog.linaro -+++ b/src/gcc/lto/ChangeLog.linaro -@@ -0,0 +1,15 @@ -+2014-06-25 Yvan Roux ++int16x8x2_t ++test_vzipqs16 (int16x8_t _a, int16x8_t _b) ++{ ++ return vzipq_s16 (_a, _b); ++} + -+ GCC Linaro 4.9-2014.06-1 released. ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; ++ int16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; ++ int16x8x2_t result = test_vzipqs16 (vld1q_s16 (first), vld1q_s16 (second)); ++ int16x8_t res1 = result.val[0], res2 = result.val[1]; ++ int16_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; ++ int16_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; ++ int16x8_t expected1 = vld1q_s16 (exp1); ++ int16x8_t expected2 = vld1q_s16 (exp2); + -+2014-06-12 Yvan Roux ++ for (i = 0; i < 8; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); + -+ GCC Linaro 4.9-2014.06 released. ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32.x +@@ -0,0 +1,27 @@ ++extern void abort (void); + -+2014-05-14 Yvan Roux ++float32x2x2_t ++test_vzipf32 (float32x2_t _a, float32x2_t _b) ++{ ++ return vzip_f32 (_a, _b); ++} + -+ GCC Linaro 4.9-2014.05 released. ++int ++main (int argc, char **argv) ++{ ++ int i; ++ float32_t first[] = {1, 2}; ++ float32_t second[] = {3, 4}; ++ float32x2x2_t result = test_vzipf32 (vld1_f32 (first), vld1_f32 (second)); ++ float32x2_t res1 = result.val[0], res2 = result.val[1]; ++ float32_t exp1[] = {1, 3}; ++ float32_t exp2[] = {2, 4}; ++ float32x2_t expected1 = vld1_f32 (exp1); ++ float32x2_t expected2 = vld1_f32 (exp2); + -+2014-04-22 Yvan Roux ++ for (i = 0; i < 2; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); + -+ GCC Linaro 4.9-2014.04 released. ---- a/src/gcc/po/ChangeLog.linaro -+++ b/src/gcc/po/ChangeLog.linaro -@@ -0,0 +1,15 @@ -+2014-06-25 Yvan Roux ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8.x +@@ -0,0 +1,27 @@ ++extern void abort (void); + -+ GCC Linaro 4.9-2014.06-1 released. ++uint8x8x2_t ++test_vzipu8 (uint8x8_t _a, uint8x8_t _b) ++{ ++ return vzip_u8 (_a, _b); ++} + -+2014-06-12 Yvan Roux ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; ++ uint8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; ++ uint8x8x2_t result = test_vzipu8 (vld1_u8 (first), vld1_u8 (second)); ++ uint8x8_t res1 = result.val[0], res2 = result.val[1]; ++ uint8_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; ++ uint8_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; ++ uint8x8_t expected1 = vld1_u8 (exp1); ++ uint8x8_t expected2 = vld1_u8 (exp2); + -+ GCC Linaro 4.9-2014.06 released. ++ for (i = 0; i < 8; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); + -+2014-05-14 Yvan Roux ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16.x +@@ -0,0 +1,27 @@ ++extern void abort (void); + -+ GCC Linaro 4.9-2014.05 released. ++uint16x8x2_t ++test_vzipqu16 (uint16x8_t _a, uint16x8_t _b) ++{ ++ return vzipq_u16 (_a, _b); ++} + -+2014-04-22 Yvan Roux ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; ++ uint16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; ++ uint16x8x2_t result = test_vzipqu16 (vld1q_u16 (first), vld1q_u16 (second)); ++ uint16x8_t res1 = result.val[0], res2 = result.val[1]; ++ uint16_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; ++ uint16_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; ++ uint16x8_t expected1 = vld1q_u16 (exp1); ++ uint16x8_t expected2 = vld1q_u16 (exp2); + -+ GCC Linaro 4.9-2014.04 released. ---- a/src/gcc/config/host-linux.c -+++ b/src/gcc/config/host-linux.c -@@ -86,6 +86,8 @@ - # define TRY_EMPTY_VM_SPACE 0x60000000 - #elif defined(__mc68000__) - # define TRY_EMPTY_VM_SPACE 0x40000000 -+#elif defined(__aarch64__) && defined(__ILP32__) -+# define TRY_EMPTY_VM_SPACE 0x60000000 - #elif defined(__aarch64__) - # define TRY_EMPTY_VM_SPACE 0x1000000000 - #elif defined(__ARM_EABI__) ---- a/src/gcc/config/aarch64/aarch64-simd.md -+++ b/src/gcc/config/aarch64/aarch64-simd.md -@@ -19,8 +19,8 @@ - ;; . - - (define_expand "mov" -- [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "") -- (match_operand:VALL 1 "aarch64_simd_general_operand" ""))] -+ [(set (match_operand:VALL 0 "nonimmediate_operand" "") -+ (match_operand:VALL 1 "general_operand" ""))] - "TARGET_SIMD" - " - if (GET_CODE (operands[0]) == MEM) -@@ -29,8 +29,8 @@ - ) - - (define_expand "movmisalign" -- [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "") -- (match_operand:VALL 1 "aarch64_simd_general_operand" ""))] -+ [(set (match_operand:VALL 0 "nonimmediate_operand" "") -+ (match_operand:VALL 1 "general_operand" ""))] - "TARGET_SIMD" - { - /* This pattern is not permitted to fail during expansion: if both arguments -@@ -91,9 +91,9 @@ - ) - - (define_insn "*aarch64_simd_mov" -- [(set (match_operand:VD 0 "aarch64_simd_nonimmediate_operand" -+ [(set (match_operand:VD 0 "nonimmediate_operand" - "=w, m, w, ?r, ?w, ?r, w") -- (match_operand:VD 1 "aarch64_simd_general_operand" -+ (match_operand:VD 1 "general_operand" - "m, w, w, w, r, r, Dn"))] - "TARGET_SIMD - && (register_operand (operands[0], mode) -@@ -119,9 +119,9 @@ - ) - - (define_insn "*aarch64_simd_mov" -- [(set (match_operand:VQ 0 "aarch64_simd_nonimmediate_operand" -+ [(set (match_operand:VQ 0 "nonimmediate_operand" - "=w, m, w, ?r, ?w, ?r, w") -- (match_operand:VQ 1 "aarch64_simd_general_operand" -+ (match_operand:VQ 1 "general_operand" - "m, w, w, w, r, r, Dn"))] - "TARGET_SIMD - && (register_operand (operands[0], mode) -@@ -286,6 +286,14 @@ - [(set_attr "type" "neon_mul_")] - ) - -+(define_insn "bswap" -+ [(set (match_operand:VDQHSD 0 "register_operand" "=w") -+ (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] -+ "TARGET_SIMD" -+ "rev\\t%0., %1." -+ [(set_attr "type" "neon_rev")] -+) ++ for (i = 0; i < 8; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); + - (define_insn "*aarch64_mul3_elt" - [(set (match_operand:VMUL 0 "register_operand" "=w") - (mult:VMUL -@@ -1452,7 +1460,7 @@ - ) - - ;; Vector versions of the floating-point frint patterns. --;; Expands to btrunc, ceil, floor, nearbyint, rint, round. -+;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. - (define_insn "2" - [(set (match_operand:VDQF 0 "register_operand" "=w") - (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] -@@ -2259,6 +2267,15 @@ - DONE; - }) - -+(define_expand "aarch64_reinterpretdf" -+ [(match_operand:DF 0 "register_operand" "") -+ (match_operand:VD_RE 1 "register_operand" "")] -+ "TARGET_SIMD" -+{ -+ aarch64_simd_reinterpret (operands[0], operands[1]); -+ DONE; -+}) ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vextQs16' AArch64 SIMD intrinsic. */ + - (define_expand "aarch64_reinterpretv16qi" - [(match_operand:V16QI 0 "register_operand" "") - (match_operand:VQ 1 "register_operand" "")] -@@ -2610,9 +2627,9 @@ - ;; q - - (define_insn "aarch64_s" -- [(set (match_operand:VSDQ_I_BHSI 0 "register_operand" "=w") -- (UNQOPS:VSDQ_I_BHSI -- (match_operand:VSDQ_I_BHSI 1 "register_operand" "w")))] -+ [(set (match_operand:VSDQ_I 0 "register_operand" "=w") -+ (UNQOPS:VSDQ_I -+ (match_operand:VSDQ_I 1 "register_operand" "w")))] - "TARGET_SIMD" - "s\\t%0, %1" - [(set_attr "type" "neon_")] -@@ -3527,26 +3544,46 @@ - ))) - (clobber (reg:CC CC_REGNUM))] - "TARGET_SIMD" -- "@ -- cm\t%d0, %d, %d -- cm\t%d0, %d1, #0 -- #" -- "reload_completed -- /* We need to prevent the split from -- happening in the 'w' constraint cases. */ -- && GP_REGNUM_P (REGNO (operands[0])) -- && GP_REGNUM_P (REGNO (operands[1]))" -- [(const_int 0)] -+ "#" -+ "reload_completed" -+ [(set (match_operand:DI 0 "register_operand") -+ (neg:DI -+ (COMPARISONS:DI -+ (match_operand:DI 1 "register_operand") -+ (match_operand:DI 2 "aarch64_simd_reg_or_zero") -+ )))] - { -- enum machine_mode mode = SELECT_CC_MODE (, operands[1], operands[2]); -- rtx cc_reg = aarch64_gen_compare_reg (, operands[1], operands[2]); -- rtx comparison = gen_rtx_ (mode, operands[1], operands[2]); -- emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); -- DONE; -+ /* If we are in the general purpose register file, -+ we split to a sequence of comparison and store. */ -+ if (GP_REGNUM_P (REGNO (operands[0])) -+ && GP_REGNUM_P (REGNO (operands[1]))) -+ { -+ enum machine_mode mode = SELECT_CC_MODE (, operands[1], operands[2]); -+ rtx cc_reg = aarch64_gen_compare_reg (, operands[1], operands[2]); -+ rtx comparison = gen_rtx_ (mode, operands[1], operands[2]); -+ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); -+ DONE; -+ } -+ /* Otherwise, we expand to a similar pattern which does not -+ clobber CC_REGNUM. */ - } - [(set_attr "type" "neon_compare, neon_compare_zero, multiple")] - ) - -+(define_insn "*aarch64_cmdi" -+ [(set (match_operand:DI 0 "register_operand" "=w,w") -+ (neg:DI -+ (COMPARISONS:DI -+ (match_operand:DI 1 "register_operand" "w,w") -+ (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz") -+ )))] -+ "TARGET_SIMD && reload_completed" -+ "@ -+ cm\t%d0, %d, %d -+ cm\t%d0, %d1, #0" -+ [(set_attr "type" "neon_compare, neon_compare_zero")] -+) ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ + - ;; cm(hs|hi) - - (define_insn "aarch64_cm" -@@ -3570,25 +3607,44 @@ - ))) - (clobber (reg:CC CC_REGNUM))] - "TARGET_SIMD" -- "@ -- cm\t%d0, %d, %d -- #" -- "reload_completed -- /* We need to prevent the split from -- happening in the 'w' constraint cases. */ -- && GP_REGNUM_P (REGNO (operands[0])) -- && GP_REGNUM_P (REGNO (operands[1]))" -- [(const_int 0)] -+ "#" -+ "reload_completed" -+ [(set (match_operand:DI 0 "register_operand") -+ (neg:DI -+ (UCOMPARISONS:DI -+ (match_operand:DI 1 "register_operand") -+ (match_operand:DI 2 "aarch64_simd_reg_or_zero") -+ )))] - { -- enum machine_mode mode = CCmode; -- rtx cc_reg = aarch64_gen_compare_reg (, operands[1], operands[2]); -- rtx comparison = gen_rtx_ (mode, operands[1], operands[2]); -- emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); -- DONE; -+ /* If we are in the general purpose register file, -+ we split to a sequence of comparison and store. */ -+ if (GP_REGNUM_P (REGNO (operands[0])) -+ && GP_REGNUM_P (REGNO (operands[1]))) -+ { -+ enum machine_mode mode = CCmode; -+ rtx cc_reg = aarch64_gen_compare_reg (, operands[1], operands[2]); -+ rtx comparison = gen_rtx_ (mode, operands[1], operands[2]); -+ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); -+ DONE; -+ } -+ /* Otherwise, we expand to a similar pattern which does not -+ clobber CC_REGNUM. */ - } -- [(set_attr "type" "neon_compare, neon_compare_zero")] -+ [(set_attr "type" "neon_compare,multiple")] - ) - -+(define_insn "*aarch64_cmdi" -+ [(set (match_operand:DI 0 "register_operand" "=w") -+ (neg:DI -+ (UCOMPARISONS:DI -+ (match_operand:DI 1 "register_operand" "w") -+ (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w") -+ )))] -+ "TARGET_SIMD && reload_completed" -+ "cm\t%d0, %d, %d" -+ [(set_attr "type" "neon_compare")] -+) ++#include "arm_neon.h" ++#include "extq_s16.x" + - ;; cmtst - - (define_insn "aarch64_cmtst" -@@ -3614,23 +3670,44 @@ - (const_int 0)))) - (clobber (reg:CC CC_REGNUM))] - "TARGET_SIMD" -- "@ -- cmtst\t%d0, %d1, %d2 -- #" -- "reload_completed -- /* We need to prevent the split from -- happening in the 'w' constraint cases. */ -- && GP_REGNUM_P (REGNO (operands[0])) -- && GP_REGNUM_P (REGNO (operands[1]))" -- [(const_int 0)] -+ "#" -+ "reload_completed" -+ [(set (match_operand:DI 0 "register_operand") -+ (neg:DI -+ (ne:DI -+ (and:DI -+ (match_operand:DI 1 "register_operand") -+ (match_operand:DI 2 "register_operand")) -+ (const_int 0))))] - { -- rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]); -- enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx); -- rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx); -- rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx); -- emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); -- DONE; -+ /* If we are in the general purpose register file, -+ we split to a sequence of comparison and store. */ -+ if (GP_REGNUM_P (REGNO (operands[0])) -+ && GP_REGNUM_P (REGNO (operands[1]))) -+ { -+ rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]); -+ enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx); -+ rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx); -+ rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx); -+ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); -+ DONE; -+ } -+ /* Otherwise, we expand to a similar pattern which does not -+ clobber CC_REGNUM. */ - } -+ [(set_attr "type" "neon_tst,multiple")] -+) ++/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vuzpq_p16' AArch64 SIMD intrinsic. */ + -+(define_insn "*aarch64_cmtstdi" -+ [(set (match_operand:DI 0 "register_operand" "=w") -+ (neg:DI -+ (ne:DI -+ (and:DI -+ (match_operand:DI 1 "register_operand" "w") -+ (match_operand:DI 2 "register_operand" "w")) -+ (const_int 0))))] -+ "TARGET_SIMD" -+ "cmtst\t%d0, %d1, %d2" - [(set_attr "type" "neon_tst")] - ) - -@@ -3721,6 +3798,17 @@ - [(set_attr "type" "neon_store2_2reg")] - ) - -+(define_insn "vec_store_lanesoi_lane" -+ [(set (match_operand: 0 "aarch64_simd_struct_operand" "=Utv") -+ (unspec: [(match_operand:OI 1 "register_operand" "w") -+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) -+ (match_operand:SI 2 "immediate_operand" "i")] -+ UNSPEC_ST2_LANE))] -+ "TARGET_SIMD" -+ "st2\\t{%S1. - %T1.}[%2], %0" -+ [(set_attr "type" "neon_store3_one_lane")] -+) ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ + - (define_insn "vec_load_lanesci" - [(set (match_operand:CI 0 "register_operand" "=w") - (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") -@@ -3741,6 +3829,17 @@ - [(set_attr "type" "neon_store3_3reg")] - ) - -+(define_insn "vec_store_lanesci_lane" -+ [(set (match_operand: 0 "aarch64_simd_struct_operand" "=Utv") -+ (unspec: [(match_operand:CI 1 "register_operand" "w") -+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) -+ (match_operand:SI 2 "immediate_operand" "i")] -+ UNSPEC_ST3_LANE))] -+ "TARGET_SIMD" -+ "st3\\t{%S1. - %U1.}[%2], %0" -+ [(set_attr "type" "neon_store3_one_lane")] -+) ++#include ++#include "vuzpqp16.x" + - (define_insn "vec_load_lanesxi" - [(set (match_operand:XI 0 "register_operand" "=w") - (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") -@@ -3761,6 +3860,17 @@ - [(set_attr "type" "neon_store4_4reg")] - ) - -+(define_insn "vec_store_lanesxi_lane" -+ [(set (match_operand: 0 "aarch64_simd_struct_operand" "=Utv") -+ (unspec: [(match_operand:XI 1 "register_operand" "w") -+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) -+ (match_operand:SI 2 "immediate_operand" "i")] -+ UNSPEC_ST4_LANE))] -+ "TARGET_SIMD" -+ "st4\\t{%S1. - %V1.}[%2], %0" -+ [(set_attr "type" "neon_store4_one_lane")] -+) ++/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8.x +@@ -0,0 +1,114 @@ ++extern void abort (void); + - ;; Reload patterns for AdvSIMD register list operands. - - (define_expand "mov" -@@ -4255,6 +4365,57 @@ - DONE; - }) - -+(define_expand "aarch64_st2_lane" -+ [(match_operand:DI 0 "register_operand" "r") -+ (match_operand:OI 1 "register_operand" "w") -+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) -+ (match_operand:SI 2 "immediate_operand")] -+ "TARGET_SIMD" ++poly8x8_t ++test_vext_p8_1 (poly8x8_t a, poly8x8_t b) +{ -+ enum machine_mode mode = mode; -+ rtx mem = gen_rtx_MEM (mode, operands[0]); -+ operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); -+ -+ emit_insn (gen_vec_store_lanesoi_lane (mem, -+ operands[1], -+ operands[2])); -+ DONE; -+}) ++ return vext_p8 (a, b, 1); ++} + -+(define_expand "aarch64_st3_lane" -+ [(match_operand:DI 0 "register_operand" "r") -+ (match_operand:CI 1 "register_operand" "w") -+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) -+ (match_operand:SI 2 "immediate_operand")] -+ "TARGET_SIMD" ++poly8x8_t ++test_vext_p8_2 (poly8x8_t a, poly8x8_t b) +{ -+ enum machine_mode mode = mode; -+ rtx mem = gen_rtx_MEM (mode, operands[0]); -+ operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); -+ -+ emit_insn (gen_vec_store_lanesci_lane (mem, -+ operands[1], -+ operands[2])); -+ DONE; -+}) ++ return vext_p8 (a, b, 2); ++} + -+(define_expand "aarch64_st4_lane" -+ [(match_operand:DI 0 "register_operand" "r") -+ (match_operand:XI 1 "register_operand" "w") -+ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) -+ (match_operand:SI 2 "immediate_operand")] -+ "TARGET_SIMD" ++poly8x8_t ++test_vext_p8_3 (poly8x8_t a, poly8x8_t b) +{ -+ enum machine_mode mode = mode; -+ rtx mem = gen_rtx_MEM (mode, operands[0]); -+ operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); -+ -+ emit_insn (gen_vec_store_lanesxi_lane (mem, -+ operands[1], -+ operands[2])); -+ DONE; -+}) ++ return vext_p8 (a, b, 3); ++} + - (define_expand "aarch64_st1" - [(match_operand:DI 0 "register_operand") - (match_operand:VALL 1 "register_operand")] ---- a/src/gcc/config/aarch64/arm_neon.h -+++ b/src/gcc/config/aarch64/arm_neon.h -@@ -2318,6 +2318,12 @@ - return (int32x2_t) __builtin_aarch64_sqnegv2si (__a); - } - -+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+vqneg_s64 (int64x1_t __a) ++poly8x8_t ++test_vext_p8_4 (poly8x8_t a, poly8x8_t b) +{ -+ return __builtin_aarch64_sqnegdi (__a); ++ return vext_p8 (a, b, 4); +} + - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vqnegq_s8 (int8x16_t __a) - { -@@ -2354,6 +2360,12 @@ - return (int32x2_t) __builtin_aarch64_sqabsv2si (__a); - } - -+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+vqabs_s64 (int64x1_t __a) ++poly8x8_t ++test_vext_p8_5 (poly8x8_t a, poly8x8_t b) +{ -+ return __builtin_aarch64_sqabsdi (__a); ++ return vext_p8 (a, b, 5); +} + - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vqabsq_s8 (int8x16_t __a) - { -@@ -2643,1352 +2655,1587 @@ - /* vreinterpret */ - - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+vreinterpret_p8_f64 (float64x1_t __a) ++poly8x8_t ++test_vext_p8_6 (poly8x8_t a, poly8x8_t b) +{ -+ return __builtin_aarch64_reinterpretv8qidf_ps (__a); ++ return vext_p8 (a, b, 6); +} + -+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) - vreinterpret_p8_s8 (int8x8_t __a) - { -- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a); -+ return (poly8x8_t) __a; - } - - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) - vreinterpret_p8_s16 (int16x4_t __a) - { -- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a); -+ return (poly8x8_t) __a; - } - - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) - vreinterpret_p8_s32 (int32x2_t __a) - { -- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a); -+ return (poly8x8_t) __a; - } - - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) - vreinterpret_p8_s64 (int64x1_t __a) - { -- return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi (__a); -+ return (poly8x8_t) __a; - } - - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) - vreinterpret_p8_f32 (float32x2_t __a) - { -- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a); -+ return (poly8x8_t) __a; - } - - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) - vreinterpret_p8_u8 (uint8x8_t __a) - { -- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); -+ return (poly8x8_t) __a; - } - - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) - vreinterpret_p8_u16 (uint16x4_t __a) - { -- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); -+ return (poly8x8_t) __a; - } - - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) - vreinterpret_p8_u32 (uint32x2_t __a) - { -- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a); -+ return (poly8x8_t) __a; - } - - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) - vreinterpret_p8_u64 (uint64x1_t __a) - { -- return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a); -+ return (poly8x8_t) __a; - } - - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) - vreinterpret_p8_p16 (poly16x4_t __a) - { -- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); -+ return (poly8x8_t) __a; - } - - __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+vreinterpretq_p8_f64 (float64x2_t __a) ++poly8x8_t ++test_vext_p8_7 (poly8x8_t a, poly8x8_t b) +{ -+ return (poly8x16_t) __a; ++ return vext_p8 (a, b, 7); +} + -+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_p8_s8 (int8x16_t __a) - { -- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a); -+ return (poly8x16_t) __a; - } - - __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_p8_s16 (int16x8_t __a) - { -- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a); -+ return (poly8x16_t) __a; - } - - __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_p8_s32 (int32x4_t __a) - { -- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a); -+ return (poly8x16_t) __a; - } - - __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_p8_s64 (int64x2_t __a) - { -- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a); -+ return (poly8x16_t) __a; - } - - __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_p8_f32 (float32x4_t __a) - { -- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a); -+ return (poly8x16_t) __a; - } - - __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_p8_u8 (uint8x16_t __a) - { -- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) -- __a); -+ return (poly8x16_t) __a; - } - - __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_p8_u16 (uint16x8_t __a) - { -- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) -- __a); -+ return (poly8x16_t) __a; - } - - __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_p8_u32 (uint32x4_t __a) - { -- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) -- __a); -+ return (poly8x16_t) __a; - } - - __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_p8_u64 (uint64x2_t __a) - { -- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) -- __a); -+ return (poly8x16_t) __a; - } - - __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_p8_p16 (poly16x8_t __a) - { -- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) -- __a); -+ return (poly8x16_t) __a; - } - - __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+vreinterpret_p16_f64 (float64x1_t __a) ++int ++main (int argc, char **argv) +{ -+ return __builtin_aarch64_reinterpretv4hidf_ps (__a); ++ int i, off; ++ poly8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; ++ poly8x8_t in1 = vld1_p8 (arr1); ++ poly8_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; ++ poly8x8_t in2 = vld1_p8 (arr2); ++ poly8_t exp[8]; ++ poly8x8_t expected; ++ poly8x8_t actual = test_vext_p8_1 (in1, in2); ++ ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 1; ++ expected = vld1_p8 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vext_p8_2 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 2; ++ expected = vld1_p8 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vext_p8_3 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 3; ++ expected = vld1_p8 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vext_p8_4 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 4; ++ expected = vld1_p8 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vext_p8_5 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 5; ++ expected = vld1_p8 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vext_p8_6 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 6; ++ expected = vld1_p8 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vext_p8_7 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 7; ++ expected = vld1_p8 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ return 0; +} + -+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) - vreinterpret_p16_s8 (int8x8_t __a) - { -- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a); -+ return (poly16x4_t) __a; - } - - __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) - vreinterpret_p16_s16 (int16x4_t __a) - { -- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a); -+ return (poly16x4_t) __a; - } - - __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) - vreinterpret_p16_s32 (int32x2_t __a) - { -- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a); -+ return (poly16x4_t) __a; - } - - __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) - vreinterpret_p16_s64 (int64x1_t __a) - { -- return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi (__a); -+ return (poly16x4_t) __a; - } - - __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) - vreinterpret_p16_f32 (float32x2_t __a) - { -- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a); -+ return (poly16x4_t) __a; - } - - __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) - vreinterpret_p16_u8 (uint8x8_t __a) - { -- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); -+ return (poly16x4_t) __a; - } - - __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) - vreinterpret_p16_u16 (uint16x4_t __a) - { -- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); -+ return (poly16x4_t) __a; - } - - __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) - vreinterpret_p16_u32 (uint32x2_t __a) - { -- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a); -+ return (poly16x4_t) __a; - } - - __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) - vreinterpret_p16_u64 (uint64x1_t __a) - { -- return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a); -+ return (poly16x4_t) __a; - } - - __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) - vreinterpret_p16_p8 (poly8x8_t __a) - { -- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); -+ return (poly16x4_t) __a; - } - - __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+vreinterpretq_p16_f64 (float64x2_t __a) -+{ -+ return (poly16x8_t) __a; -+} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vuzpq_u32' AArch64 SIMD intrinsic. */ + -+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_p16_s8 (int8x16_t __a) - { -- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a); -+ return (poly16x8_t) __a; - } - - __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_p16_s16 (int16x8_t __a) - { -- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a); -+ return (poly16x8_t) __a; - } - - __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_p16_s32 (int32x4_t __a) - { -- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a); -+ return (poly16x8_t) __a; - } - - __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_p16_s64 (int64x2_t __a) - { -- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a); -+ return (poly16x8_t) __a; - } - - __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_p16_f32 (float32x4_t __a) - { -- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a); -+ return (poly16x8_t) __a; - } - - __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_p16_u8 (uint8x16_t __a) - { -- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) -- __a); -+ return (poly16x8_t) __a; - } - - __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_p16_u16 (uint16x8_t __a) - { -- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); -+ return (poly16x8_t) __a; - } - - __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_p16_u32 (uint32x4_t __a) - { -- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a); -+ return (poly16x8_t) __a; - } - - __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_p16_u64 (uint64x2_t __a) - { -- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a); -+ return (poly16x8_t) __a; - } - - __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_p16_p8 (poly8x16_t __a) - { -- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) -- __a); -+ return (poly16x8_t) __a; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+vreinterpret_f32_f64 (float64x1_t __a) ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vuzpqu32.x" ++ ++/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8.x +@@ -0,0 +1,26 @@ ++extern void abort (void); ++ ++poly8x8x2_t ++test_vuzpp8 (poly8x8_t _a, poly8x8_t _b) +{ -+ return __builtin_aarch64_reinterpretv2sfdf (__a); ++ return vuzp_p8 (_a, _b); +} + -+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vreinterpret_f32_s8 (int8x8_t __a) - { -- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi (__a); -+ return (float32x2_t) __a; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vreinterpret_f32_s16 (int16x4_t __a) - { -- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi (__a); -+ return (float32x2_t) __a; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vreinterpret_f32_s32 (int32x2_t __a) - { -- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si (__a); -+ return (float32x2_t) __a; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vreinterpret_f32_s64 (int64x1_t __a) - { -- return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi (__a); -+ return (float32x2_t) __a; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vreinterpret_f32_u8 (uint8x8_t __a) - { -- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a); -+ return (float32x2_t) __a; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vreinterpret_f32_u16 (uint16x4_t __a) - { -- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t) -- __a); -+ return (float32x2_t) __a; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vreinterpret_f32_u32 (uint32x2_t __a) - { -- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si ((int32x2_t) -- __a); -+ return (float32x2_t) __a; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vreinterpret_f32_u64 (uint64x1_t __a) - { -- return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi ((int64x1_t) __a); -+ return (float32x2_t) __a; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vreinterpret_f32_p8 (poly8x8_t __a) - { -- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a); -+ return (float32x2_t) __a; - } - - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) - vreinterpret_f32_p16 (poly16x4_t __a) - { -- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t) -- __a); -+ return (float32x2_t) __a; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+vreinterpretq_f32_f64 (float64x2_t __a) ++int ++main (int argc, char **argv) +{ -+ return (float32x4_t) __a; ++ int i; ++ poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; ++ poly8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; ++ poly8x8x2_t result = test_vuzpp8 (vld1_p8 (first), vld1_p8 (second)); ++ poly8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; ++ poly8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; ++ poly8x8_t expect1 = vld1_p8 (exp1); ++ poly8x8_t expect2 = vld1_p8 (exp2); ++ ++ for (i = 0; i < 8; i++) ++ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) ++ abort (); ++ ++ return 0; +} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev32_s16' AArch64 SIMD intrinsic. */ + -+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_f32_s8 (int8x16_t __a) - { -- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi (__a); -+ return (float32x4_t) __a; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_f32_s16 (int16x8_t __a) - { -- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi (__a); -+ return (float32x4_t) __a; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_f32_s32 (int32x4_t __a) - { -- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si (__a); -+ return (float32x4_t) __a; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_f32_s64 (int64x2_t __a) - { -- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di (__a); -+ return (float32x4_t) __a; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_f32_u8 (uint8x16_t __a) - { -- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t) -- __a); -+ return (float32x4_t) __a; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_f32_u16 (uint16x8_t __a) - { -- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t) -- __a); -+ return (float32x4_t) __a; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_f32_u32 (uint32x4_t __a) - { -- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si ((int32x4_t) -- __a); -+ return (float32x4_t) __a; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_f32_u64 (uint64x2_t __a) - { -- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di ((int64x2_t) -- __a); -+ return (float32x4_t) __a; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_f32_p8 (poly8x16_t __a) - { -- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t) -- __a); -+ return (float32x4_t) __a; - } - - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_f32_p16 (poly16x8_t __a) - { -- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t) -- __a); -+ return (float32x4_t) __a; - } - -+__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+vreinterpret_f64_f32 (float32x2_t __a) -+{ -+ return __builtin_aarch64_reinterpretdfv2sf (__a); -+} ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ + -+__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+vreinterpret_f64_p8 (poly8x8_t __a) -+{ -+ return __builtin_aarch64_reinterpretdfv8qi_sp (__a); -+} ++#include ++#include "vrev32s16.x" + -+__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+vreinterpret_f64_p16 (poly16x4_t __a) -+{ -+ return __builtin_aarch64_reinterpretdfv4hi_sp (__a); -+} ++/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vzipq_p8' AArch64 SIMD intrinsic. */ + -+__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+vreinterpret_f64_s8 (int8x8_t __a) -+{ -+ return __builtin_aarch64_reinterpretdfv8qi (__a); -+} ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ + -+__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+vreinterpret_f64_s16 (int16x4_t __a) -+{ -+ return __builtin_aarch64_reinterpretdfv4hi (__a); -+} ++#include ++#include "vzipqp8.x" + -+__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+vreinterpret_f64_s32 (int32x2_t __a) -+{ -+ return __builtin_aarch64_reinterpretdfv2si (__a); -+} ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev32q_s8' AArch64 SIMD intrinsic. */ + -+__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+vreinterpret_f64_s64 (int64x1_t __a) -+{ -+ return __builtin_aarch64_createdf ((uint64_t) vget_lane_s64 (__a, 0)); -+} ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ + -+__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+vreinterpret_f64_u8 (uint8x8_t __a) -+{ -+ return __builtin_aarch64_reinterpretdfv8qi_su (__a); ++#include ++#include "vrev32qs8.x" ++ ++/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev64_s32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev64s32.x" ++ ++/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.2s, ?v\[0-9\]+.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/simd.exp ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/simd.exp +@@ -0,0 +1,45 @@ ++# Specific regression driver for AArch64 SIMD instructions. ++# Copyright (C) 2014 Free Software Foundation, Inc. ++# Contributed by ARM Ltd. ++# ++# This file is part of GCC. ++# ++# GCC is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 3, or (at your option) ++# any later version. ++# ++# GCC is distributed in the hope that it will be useful, but ++# WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with GCC; see the file COPYING3. If not see ++# . */ ++ ++# GCC testsuite that uses the `dg.exp' driver. ++ ++# Exit immediately if this isn't an AArch64 target. ++if {![istarget aarch64*-*-*] } then { ++ return +} + -+__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+vreinterpret_f64_u16 (uint16x4_t __a) -+{ -+ return __builtin_aarch64_reinterpretdfv4hi_su (__a); ++# Load support procs. ++load_lib gcc-dg.exp ++ ++# If a testcase doesn't have special options, use these. ++global DEFAULT_CFLAGS ++if ![info exists DEFAULT_CFLAGS] then { ++ set DEFAULT_CFLAGS " -ansi -pedantic-errors" +} + -+__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+vreinterpret_f64_u32 (uint32x2_t __a) ++# Initialize `dg'. ++dg-init ++ ++# Main loop. ++dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \ ++ "" $DEFAULT_CFLAGS ++ ++# All done. ++dg-finish +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++int16x4x2_t ++test_vtrns16 (int16x4_t _a, int16x4_t _b) +{ -+ return __builtin_aarch64_reinterpretdfv2si_su (__a); ++ return vtrn_s16 (_a, _b); +} + -+__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+vreinterpret_f64_u64 (uint64x1_t __a) ++int ++main (int argc, char **argv) +{ -+ return __builtin_aarch64_createdf (vget_lane_u64 (__a, 0)); ++ int i; ++ int16_t first[] = {1, 2, 3, 4}; ++ int16_t second[] = {5, 6, 7, 8}; ++ int16x4x2_t result = test_vtrns16 (vld1_s16 (first), vld1_s16 (second)); ++ int16x4_t res1 = result.val[0], res2 = result.val[1]; ++ int16_t exp1[] = {1, 5, 3, 7}; ++ int16_t exp2[] = {2, 6, 4, 8}; ++ int16x4_t expected1 = vld1_s16 (exp1); ++ int16x4_t expected2 = vld1_s16 (exp2); ++ ++ for (i = 0; i < 4; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; +} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev64q_u8' AArch64 SIMD intrinsic. */ + -+__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+vreinterpretq_f64_f32 (float32x4_t __a) ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev64qu8.x" ++ ++/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++poly8x16_t ++test_vrev64qp8 (poly8x16_t _arg) +{ -+ return (float64x2_t) __a; ++ return vrev64q_p8 (_arg); +} + -+__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+vreinterpretq_f64_p8 (poly8x16_t __a) ++int ++main (int argc, char **argv) +{ -+ return (float64x2_t) __a; ++ int i; ++ poly8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ poly8x16_t reversed = test_vrev64qp8 (inorder); ++ poly8x16_t expected = {8, 7, 6, 5, 4, 3, 2, 1, 16, 15, 14, 13, 12, 11, 10, 9}; ++ ++ for (i = 0; i < 16; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; +} + -+__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+vreinterpretq_f64_p16 (poly16x8_t __a) +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++uint16x4x2_t ++test_vtrnu16 (uint16x4_t _a, uint16x4_t _b) +{ -+ return (float64x2_t) __a; ++ return vtrn_u16 (_a, _b); +} + -+__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+vreinterpretq_f64_s8 (int8x16_t __a) ++int ++main (int argc, char **argv) +{ -+ return (float64x2_t) __a; ++ int i; ++ uint16_t first[] = {1, 2, 3, 4}; ++ uint16_t second[] = {5, 6, 7, 8}; ++ uint16x4x2_t result = test_vtrnu16 (vld1_u16 (first), vld1_u16 (second)); ++ uint16x4_t res1 = result.val[0], res2 = result.val[1]; ++ uint16_t exp1[] = {1, 5, 3, 7}; ++ uint16_t exp2[] = {2, 6, 4, 8}; ++ uint16x4_t expected1 = vld1_u16 (exp1); ++ uint16x4_t expected2 = vld1_u16 (exp2); ++ ++ for (i = 0; i < 4; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; +} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16.x +@@ -0,0 +1,58 @@ ++extern void abort (void); + -+__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+vreinterpretq_f64_s16 (int16x8_t __a) ++poly16x4_t ++test_vext_p16_1 (poly16x4_t a, poly16x4_t b) +{ -+ return (float64x2_t) __a; ++ return vext_p16 (a, b, 1); +} + -+__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+vreinterpretq_f64_s32 (int32x4_t __a) ++poly16x4_t ++test_vext_p16_2 (poly16x4_t a, poly16x4_t b) +{ -+ return (float64x2_t) __a; ++ return vext_p16 (a, b, 2); +} + -+__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+vreinterpretq_f64_s64 (int64x2_t __a) ++poly16x4_t ++test_vext_p16_3 (poly16x4_t a, poly16x4_t b) +{ -+ return (float64x2_t) __a; ++ return vext_p16 (a, b, 3); +} + -+__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+vreinterpretq_f64_u8 (uint8x16_t __a) ++int ++main (int argc, char **argv) +{ -+ return (float64x2_t) __a; ++ int i, off; ++ poly16_t arr1[] = {0, 1, 2, 3}; ++ poly16x4_t in1 = vld1_p16 (arr1); ++ poly16_t arr2[] = {4, 5, 6, 7}; ++ poly16x4_t in2 = vld1_p16 (arr2); ++ poly16_t exp[4]; ++ poly16x4_t expected; ++ poly16x4_t actual = test_vext_p16_1 (in1, in2); ++ ++ for (i = 0; i < 4; i++) ++ exp[i] = i + 1; ++ expected = vld1_p16 (exp); ++ for (i = 0; i < 4; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vext_p16_2 (in1, in2); ++ for (i = 0; i < 4; i++) ++ exp[i] = i + 2; ++ expected = vld1_p16 (exp); ++ for (i = 0; i < 4; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vext_p16_3 (in1, in2); ++ for (i = 0; i < 4; i++) ++ exp[i] = i + 3; ++ expected = vld1_p16 (exp); ++ for (i = 0; i < 4; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ return 0; +} + -+__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+vreinterpretq_f64_u16 (uint16x8_t __a) +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vuzp_p16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vuzpp16.x" ++ ++/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8.x +@@ -0,0 +1,29 @@ ++extern void abort (void); ++ ++uint8x16x2_t ++test_vzipqu8 (uint8x16_t _a, uint8x16_t _b) +{ -+ return (float64x2_t) __a; ++ return vzipq_u8 (_a, _b); +} + -+__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+vreinterpretq_f64_u32 (uint32x4_t __a) ++int ++main (int argc, char **argv) +{ -+ return (float64x2_t) __a; ++ int i; ++ uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ uint8_t second[] = ++ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ uint8x16x2_t result = test_vzipqu8 (vld1q_u8 (first), vld1q_u8 (second)); ++ uint8x16_t res1 = result.val[0], res2 = result.val[1]; ++ uint8_t exp1[] = {1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24}; ++ uint8_t exp2[] = ++ {9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, 16, 32}; ++ uint8x16_t expected1 = vld1q_u8 (exp1); ++ uint8x16_t expected2 = vld1q_u8 (exp2); ++ ++ for (i = 0; i < 16; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; +} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vextu64' AArch64 SIMD intrinsic. */ + -+__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+vreinterpretq_f64_u64 (uint64x2_t __a) ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++ ++#include "arm_neon.h" ++#include "ext_u64.x" ++ ++/* Do not scan-assembler. An EXT instruction could be emitted, but would merely ++ return its first argument, so it is legitimate to optimize it out. */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vuzp_u32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vuzpu32.x" ++ ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev32q_p16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev32qp16.x" ++ ++/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32.x +@@ -0,0 +1,58 @@ ++extern void abort (void); ++ ++float32x4_t ++test_vextq_f32_1 (float32x4_t a, float32x4_t b) +{ -+ return (float64x2_t) __a; ++ return vextq_f32 (a, b, 1); +} + - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+vreinterpret_s64_f64 (float64x1_t __a) ++float32x4_t ++test_vextq_f32_2 (float32x4_t a, float32x4_t b) +{ -+ return __builtin_aarch64_reinterpretdidf (__a); ++ return vextq_f32 (a, b, 2); +} + -+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vreinterpret_s64_s8 (int8x8_t __a) - { -- return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a); -+ return (int64x1_t) __a; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vreinterpret_s64_s16 (int16x4_t __a) - { -- return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a); -+ return (int64x1_t) __a; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vreinterpret_s64_s32 (int32x2_t __a) - { -- return (int64x1_t) __builtin_aarch64_reinterpretdiv2si (__a); -+ return (int64x1_t) __a; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vreinterpret_s64_f32 (float32x2_t __a) - { -- return (int64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a); -+ return (int64x1_t) __a; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vreinterpret_s64_u8 (uint8x8_t __a) - { -- return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); -+ return (int64x1_t) __a; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vreinterpret_s64_u16 (uint16x4_t __a) - { -- return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); -+ return (int64x1_t) __a; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vreinterpret_s64_u32 (uint32x2_t __a) - { -- return (int64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a); -+ return (int64x1_t) __a; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vreinterpret_s64_u64 (uint64x1_t __a) - { -- return (int64x1_t) __builtin_aarch64_reinterpretdidi ((int64x1_t) __a); -+ return (int64x1_t) __a; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vreinterpret_s64_p8 (poly8x8_t __a) - { -- return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); -+ return (int64x1_t) __a; - } - - __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) - vreinterpret_s64_p16 (poly16x4_t __a) - { -- return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); -+ return (int64x1_t) __a; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+vreinterpretq_s64_f64 (float64x2_t __a) ++float32x4_t ++test_vextq_f32_3 (float32x4_t a, float32x4_t b) +{ -+ return (int64x2_t) __a; ++ return vextq_f32 (a, b, 3); +} + -+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_s64_s8 (int8x16_t __a) - { -- return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a); -+ return (int64x2_t) __a; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_s64_s16 (int16x8_t __a) - { -- return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a); -+ return (int64x2_t) __a; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_s64_s32 (int32x4_t __a) - { -- return (int64x2_t) __builtin_aarch64_reinterpretv2div4si (__a); -+ return (int64x2_t) __a; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_s64_f32 (float32x4_t __a) - { -- return (int64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a); -+ return (int64x2_t) __a; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_s64_u8 (uint8x16_t __a) - { -- return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a); -+ return (int64x2_t) __a; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_s64_u16 (uint16x8_t __a) - { -- return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); -+ return (int64x2_t) __a; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_s64_u32 (uint32x4_t __a) - { -- return (int64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a); -+ return (int64x2_t) __a; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_s64_u64 (uint64x2_t __a) - { -- return (int64x2_t) __builtin_aarch64_reinterpretv2div2di ((int64x2_t) __a); -+ return (int64x2_t) __a; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_s64_p8 (poly8x16_t __a) - { -- return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a); -+ return (int64x2_t) __a; - } - - __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_s64_p16 (poly16x8_t __a) - { -- return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); -+ return (int64x2_t) __a; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+vreinterpret_u64_f64 (float64x1_t __a) ++int ++main (int argc, char **argv) +{ -+ return __builtin_aarch64_reinterpretdidf_us (__a); ++ int i, off; ++ float32_t arr1[] = {0, 1, 2, 3}; ++ float32x4_t in1 = vld1q_f32 (arr1); ++ float32_t arr2[] = {4, 5, 6, 7}; ++ float32x4_t in2 = vld1q_f32 (arr2); ++ float32_t exp[4]; ++ float32x4_t expected; ++ float32x4_t actual = test_vextq_f32_1 (in1, in2); ++ ++ for (i = 0; i < 4; i++) ++ exp[i] = i + 1; ++ expected = vld1q_f32 (exp); ++ for (i = 0; i < 4; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_f32_2 (in1, in2); ++ for (i = 0; i < 4; i++) ++ exp[i] = i + 2; ++ expected = vld1q_f32 (exp); ++ for (i = 0; i < 4; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_f32_3 (in1, in2); ++ for (i = 0; i < 4; i++) ++ exp[i] = i + 3; ++ expected = vld1q_f32 (exp); ++ for (i = 0; i < 4; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ return 0; +} + -+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vreinterpret_u64_s8 (int8x8_t __a) - { -- return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a); -+ return (uint64x1_t) __a; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vreinterpret_u64_s16 (int16x4_t __a) - { -- return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a); -+ return (uint64x1_t) __a; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vreinterpret_u64_s32 (int32x2_t __a) - { -- return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si (__a); -+ return (uint64x1_t) __a; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vreinterpret_u64_s64 (int64x1_t __a) - { -- return (uint64x1_t) __builtin_aarch64_reinterpretdidi (__a); -+ return (uint64x1_t) __a; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vreinterpret_u64_f32 (float32x2_t __a) - { -- return (uint64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a); -+ return (uint64x1_t) __a; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vreinterpret_u64_u8 (uint8x8_t __a) - { -- return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); -+ return (uint64x1_t) __a; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vreinterpret_u64_u16 (uint16x4_t __a) - { -- return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); -+ return (uint64x1_t) __a; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vreinterpret_u64_u32 (uint32x2_t __a) - { -- return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a); -+ return (uint64x1_t) __a; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vreinterpret_u64_p8 (poly8x8_t __a) - { -- return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); -+ return (uint64x1_t) __a; - } - - __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) - vreinterpret_u64_p16 (poly16x4_t __a) - { -- return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); -+ return (uint64x1_t) __a; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+vreinterpretq_u64_f64 (float64x2_t __a) +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vzipq_p16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vzipqp16.x" ++ ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vtrn_p8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vtrnp8.x" ++ ++/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8.x +@@ -0,0 +1,227 @@ ++extern void abort (void); ++ ++uint8x16_t ++test_vextq_u8_1 (uint8x16_t a, uint8x16_t b) +{ -+ return (uint64x2_t) __a; ++ return vextq_u8 (a, b, 1); +} + -+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_u64_s8 (int8x16_t __a) - { -- return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a); -+ return (uint64x2_t) __a; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_u64_s16 (int16x8_t __a) - { -- return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a); -+ return (uint64x2_t) __a; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_u64_s32 (int32x4_t __a) - { -- return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si (__a); -+ return (uint64x2_t) __a; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_u64_s64 (int64x2_t __a) - { -- return (uint64x2_t) __builtin_aarch64_reinterpretv2div2di (__a); -+ return (uint64x2_t) __a; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_u64_f32 (float32x4_t __a) - { -- return (uint64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a); -+ return (uint64x2_t) __a; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_u64_u8 (uint8x16_t __a) - { -- return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) -- __a); -+ return (uint64x2_t) __a; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_u64_u16 (uint16x8_t __a) - { -- return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); -+ return (uint64x2_t) __a; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_u64_u32 (uint32x4_t __a) - { -- return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a); -+ return (uint64x2_t) __a; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_u64_p8 (poly8x16_t __a) - { -- return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) -- __a); -+ return (uint64x2_t) __a; - } - - __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) - vreinterpretq_u64_p16 (poly16x8_t __a) - { -- return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); -+ return (uint64x2_t) __a; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+vreinterpret_s8_f64 (float64x1_t __a) ++uint8x16_t ++test_vextq_u8_2 (uint8x16_t a, uint8x16_t b) +{ -+ return __builtin_aarch64_reinterpretv8qidf (__a); ++ return vextq_u8 (a, b, 2); +} + -+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vreinterpret_s8_s16 (int16x4_t __a) - { -- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a); -+ return (int8x8_t) __a; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vreinterpret_s8_s32 (int32x2_t __a) - { -- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a); -+ return (int8x8_t) __a; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vreinterpret_s8_s64 (int64x1_t __a) - { -- return (int8x8_t) __builtin_aarch64_reinterpretv8qidi (__a); -+ return (int8x8_t) __a; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vreinterpret_s8_f32 (float32x2_t __a) - { -- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a); -+ return (int8x8_t) __a; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vreinterpret_s8_u8 (uint8x8_t __a) - { -- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); -+ return (int8x8_t) __a; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vreinterpret_s8_u16 (uint16x4_t __a) - { -- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); -+ return (int8x8_t) __a; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vreinterpret_s8_u32 (uint32x2_t __a) - { -- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a); -+ return (int8x8_t) __a; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vreinterpret_s8_u64 (uint64x1_t __a) - { -- return (int8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a); -+ return (int8x8_t) __a; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vreinterpret_s8_p8 (poly8x8_t __a) - { -- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); -+ return (int8x8_t) __a; - } - - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) - vreinterpret_s8_p16 (poly16x4_t __a) - { -- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); -+ return (int8x8_t) __a; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+vreinterpretq_s8_f64 (float64x2_t __a) ++uint8x16_t ++test_vextq_u8_3 (uint8x16_t a, uint8x16_t b) +{ -+ return (int8x16_t) __a; ++ return vextq_u8 (a, b, 3); +} + -+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_s8_s16 (int16x8_t __a) - { -- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a); -+ return (int8x16_t) __a; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_s8_s32 (int32x4_t __a) - { -- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a); -+ return (int8x16_t) __a; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_s8_s64 (int64x2_t __a) - { -- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a); -+ return (int8x16_t) __a; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_s8_f32 (float32x4_t __a) - { -- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a); -+ return (int8x16_t) __a; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_s8_u8 (uint8x16_t __a) - { -- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) -- __a); -+ return (int8x16_t) __a; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_s8_u16 (uint16x8_t __a) - { -- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a); -+ return (int8x16_t) __a; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_s8_u32 (uint32x4_t __a) - { -- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) __a); -+ return (int8x16_t) __a; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_s8_u64 (uint64x2_t __a) - { -- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) __a); -+ return (int8x16_t) __a; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_s8_p8 (poly8x16_t __a) - { -- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) -- __a); -+ return (int8x16_t) __a; - } - - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_s8_p16 (poly16x8_t __a) - { -- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a); -+ return (int8x16_t) __a; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+vreinterpret_s16_f64 (float64x1_t __a) ++uint8x16_t ++test_vextq_u8_4 (uint8x16_t a, uint8x16_t b) +{ -+ return __builtin_aarch64_reinterpretv4hidf (__a); ++ return vextq_u8 (a, b, 4); +} + -+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vreinterpret_s16_s8 (int8x8_t __a) - { -- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a); -+ return (int16x4_t) __a; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vreinterpret_s16_s32 (int32x2_t __a) - { -- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a); -+ return (int16x4_t) __a; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vreinterpret_s16_s64 (int64x1_t __a) - { -- return (int16x4_t) __builtin_aarch64_reinterpretv4hidi (__a); -+ return (int16x4_t) __a; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vreinterpret_s16_f32 (float32x2_t __a) - { -- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a); -+ return (int16x4_t) __a; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vreinterpret_s16_u8 (uint8x8_t __a) - { -- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); -+ return (int16x4_t) __a; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vreinterpret_s16_u16 (uint16x4_t __a) - { -- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); -+ return (int16x4_t) __a; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vreinterpret_s16_u32 (uint32x2_t __a) - { -- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a); -+ return (int16x4_t) __a; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vreinterpret_s16_u64 (uint64x1_t __a) - { -- return (int16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a); -+ return (int16x4_t) __a; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vreinterpret_s16_p8 (poly8x8_t __a) - { -- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); -+ return (int16x4_t) __a; - } - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) - vreinterpret_s16_p16 (poly16x4_t __a) - { -- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); -+ return (int16x4_t) __a; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+vreinterpretq_s16_f64 (float64x2_t __a) ++uint8x16_t ++test_vextq_u8_5 (uint8x16_t a, uint8x16_t b) +{ -+ return (int16x8_t) __a; ++ return vextq_u8 (a, b, 5); +} + -+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_s16_s8 (int8x16_t __a) - { -- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a); -+ return (int16x8_t) __a; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_s16_s32 (int32x4_t __a) - { -- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a); -+ return (int16x8_t) __a; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_s16_s64 (int64x2_t __a) - { -- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a); -+ return (int16x8_t) __a; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_s16_f32 (float32x4_t __a) - { -- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a); -+ return (int16x8_t) __a; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_s16_u8 (uint8x16_t __a) - { -- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a); -+ return (int16x8_t) __a; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_s16_u16 (uint16x8_t __a) - { -- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); -+ return (int16x8_t) __a; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_s16_u32 (uint32x4_t __a) - { -- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a); -+ return (int16x8_t) __a; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_s16_u64 (uint64x2_t __a) - { -- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a); -+ return (int16x8_t) __a; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_s16_p8 (poly8x16_t __a) - { -- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a); -+ return (int16x8_t) __a; - } - - __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_s16_p16 (poly16x8_t __a) - { -- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); -+ return (int16x8_t) __a; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+vreinterpret_s32_f64 (float64x1_t __a) ++uint8x16_t ++test_vextq_u8_6 (uint8x16_t a, uint8x16_t b) +{ -+ return __builtin_aarch64_reinterpretv2sidf (__a); ++ return vextq_u8 (a, b, 6); +} + -+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vreinterpret_s32_s8 (int8x8_t __a) - { -- return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a); -+ return (int32x2_t) __a; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vreinterpret_s32_s16 (int16x4_t __a) - { -- return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a); -+ return (int32x2_t) __a; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vreinterpret_s32_s64 (int64x1_t __a) - { -- return (int32x2_t) __builtin_aarch64_reinterpretv2sidi (__a); -+ return (int32x2_t) __a; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vreinterpret_s32_f32 (float32x2_t __a) - { -- return (int32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a); -+ return (int32x2_t) __a; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vreinterpret_s32_u8 (uint8x8_t __a) - { -- return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); -+ return (int32x2_t) __a; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vreinterpret_s32_u16 (uint16x4_t __a) - { -- return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); -+ return (int32x2_t) __a; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vreinterpret_s32_u32 (uint32x2_t __a) - { -- return (int32x2_t) __builtin_aarch64_reinterpretv2siv2si ((int32x2_t) __a); -+ return (int32x2_t) __a; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vreinterpret_s32_u64 (uint64x1_t __a) - { -- return (int32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a); -+ return (int32x2_t) __a; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vreinterpret_s32_p8 (poly8x8_t __a) - { -- return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); -+ return (int32x2_t) __a; - } - - __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) - vreinterpret_s32_p16 (poly16x4_t __a) - { -- return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); -+ return (int32x2_t) __a; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+vreinterpretq_s32_f64 (float64x2_t __a) ++uint8x16_t ++test_vextq_u8_7 (uint8x16_t a, uint8x16_t b) +{ -+ return (int32x4_t) __a; ++ return vextq_u8 (a, b, 7); +} + -+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_s32_s8 (int8x16_t __a) - { -- return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a); -+ return (int32x4_t) __a; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_s32_s16 (int16x8_t __a) - { -- return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a); -+ return (int32x4_t) __a; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_s32_s64 (int64x2_t __a) - { -- return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a); -+ return (int32x4_t) __a; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_s32_f32 (float32x4_t __a) - { -- return (int32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a); -+ return (int32x4_t) __a; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_s32_u8 (uint8x16_t __a) - { -- return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a); -+ return (int32x4_t) __a; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_s32_u16 (uint16x8_t __a) - { -- return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); -+ return (int32x4_t) __a; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_s32_u32 (uint32x4_t __a) - { -- return (int32x4_t) __builtin_aarch64_reinterpretv4siv4si ((int32x4_t) __a); -+ return (int32x4_t) __a; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_s32_u64 (uint64x2_t __a) - { -- return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a); -+ return (int32x4_t) __a; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_s32_p8 (poly8x16_t __a) - { -- return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a); -+ return (int32x4_t) __a; - } - - __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_s32_p16 (poly16x8_t __a) - { -- return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); -+ return (int32x4_t) __a; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+vreinterpret_u8_f64 (float64x1_t __a) ++uint8x16_t ++test_vextq_u8_8 (uint8x16_t a, uint8x16_t b) +{ -+ return __builtin_aarch64_reinterpretv8qidf_us (__a); ++ return vextq_u8 (a, b, 8); +} + -+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vreinterpret_u8_s8 (int8x8_t __a) - { -- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a); -+ return (uint8x8_t) __a; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vreinterpret_u8_s16 (int16x4_t __a) - { -- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a); -+ return (uint8x8_t) __a; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vreinterpret_u8_s32 (int32x2_t __a) - { -- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a); -+ return (uint8x8_t) __a; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vreinterpret_u8_s64 (int64x1_t __a) - { -- return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi (__a); -+ return (uint8x8_t) __a; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vreinterpret_u8_f32 (float32x2_t __a) - { -- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a); -+ return (uint8x8_t) __a; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vreinterpret_u8_u16 (uint16x4_t __a) - { -- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); -+ return (uint8x8_t) __a; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vreinterpret_u8_u32 (uint32x2_t __a) - { -- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a); -+ return (uint8x8_t) __a; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vreinterpret_u8_u64 (uint64x1_t __a) - { -- return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a); -+ return (uint8x8_t) __a; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vreinterpret_u8_p8 (poly8x8_t __a) - { -- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); -+ return (uint8x8_t) __a; - } - - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) - vreinterpret_u8_p16 (poly16x4_t __a) - { -- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); -+ return (uint8x8_t) __a; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+vreinterpretq_u8_f64 (float64x2_t __a) ++uint8x16_t ++test_vextq_u8_9 (uint8x16_t a, uint8x16_t b) +{ -+ return (uint8x16_t) __a; ++ return vextq_u8 (a, b, 9); +} + -+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_u8_s8 (int8x16_t __a) - { -- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a); -+ return (uint8x16_t) __a; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_u8_s16 (int16x8_t __a) - { -- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a); -+ return (uint8x16_t) __a; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_u8_s32 (int32x4_t __a) - { -- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a); -+ return (uint8x16_t) __a; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_u8_s64 (int64x2_t __a) - { -- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a); -+ return (uint8x16_t) __a; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_u8_f32 (float32x4_t __a) - { -- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a); -+ return (uint8x16_t) __a; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_u8_u16 (uint16x8_t __a) - { -- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) -- __a); -+ return (uint8x16_t) __a; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_u8_u32 (uint32x4_t __a) - { -- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) -- __a); -+ return (uint8x16_t) __a; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_u8_u64 (uint64x2_t __a) - { -- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) -- __a); -+ return (uint8x16_t) __a; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_u8_p8 (poly8x16_t __a) - { -- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) -- __a); -+ return (uint8x16_t) __a; - } - - __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) - vreinterpretq_u8_p16 (poly16x8_t __a) - { -- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) -- __a); -+ return (uint8x16_t) __a; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+vreinterpret_u16_f64 (float64x1_t __a) ++uint8x16_t ++test_vextq_u8_10 (uint8x16_t a, uint8x16_t b) +{ -+ return __builtin_aarch64_reinterpretv4hidf_us (__a); ++ return vextq_u8 (a, b, 10); +} + -+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vreinterpret_u16_s8 (int8x8_t __a) - { -- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a); -+ return (uint16x4_t) __a; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vreinterpret_u16_s16 (int16x4_t __a) - { -- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a); -+ return (uint16x4_t) __a; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vreinterpret_u16_s32 (int32x2_t __a) - { -- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a); -+ return (uint16x4_t) __a; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vreinterpret_u16_s64 (int64x1_t __a) - { -- return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi (__a); -+ return (uint16x4_t) __a; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vreinterpret_u16_f32 (float32x2_t __a) - { -- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a); -+ return (uint16x4_t) __a; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vreinterpret_u16_u8 (uint8x8_t __a) - { -- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); -+ return (uint16x4_t) __a; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vreinterpret_u16_u32 (uint32x2_t __a) - { -- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a); -+ return (uint16x4_t) __a; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vreinterpret_u16_u64 (uint64x1_t __a) - { -- return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a); -+ return (uint16x4_t) __a; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vreinterpret_u16_p8 (poly8x8_t __a) - { -- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); -+ return (uint16x4_t) __a; - } - - __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) - vreinterpret_u16_p16 (poly16x4_t __a) - { -- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); -+ return (uint16x4_t) __a; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+vreinterpretq_u16_f64 (float64x2_t __a) ++uint8x16_t ++test_vextq_u8_11 (uint8x16_t a, uint8x16_t b) +{ -+ return (uint16x8_t) __a; ++ return vextq_u8 (a, b, 11); +} + -+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_u16_s8 (int8x16_t __a) - { -- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a); -+ return (uint16x8_t) __a; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_u16_s16 (int16x8_t __a) - { -- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a); -+ return (uint16x8_t) __a; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_u16_s32 (int32x4_t __a) - { -- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a); -+ return (uint16x8_t) __a; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_u16_s64 (int64x2_t __a) - { -- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a); -+ return (uint16x8_t) __a; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_u16_f32 (float32x4_t __a) - { -- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a); -+ return (uint16x8_t) __a; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_u16_u8 (uint8x16_t __a) - { -- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) -- __a); -+ return (uint16x8_t) __a; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_u16_u32 (uint32x4_t __a) - { -- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a); -+ return (uint16x8_t) __a; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_u16_u64 (uint64x2_t __a) - { -- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a); -+ return (uint16x8_t) __a; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_u16_p8 (poly8x16_t __a) - { -- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) -- __a); -+ return (uint16x8_t) __a; - } - - __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) - vreinterpretq_u16_p16 (poly16x8_t __a) - { -- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); -+ return (uint16x8_t) __a; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+vreinterpret_u32_f64 (float64x1_t __a) ++uint8x16_t ++test_vextq_u8_12 (uint8x16_t a, uint8x16_t b) +{ -+ return __builtin_aarch64_reinterpretv2sidf_us (__a); ++ return vextq_u8 (a, b, 12); +} + -+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vreinterpret_u32_s8 (int8x8_t __a) - { -- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a); -+ return (uint32x2_t) __a; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vreinterpret_u32_s16 (int16x4_t __a) - { -- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a); -+ return (uint32x2_t) __a; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vreinterpret_u32_s32 (int32x2_t __a) - { -- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2si (__a); -+ return (uint32x2_t) __a; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vreinterpret_u32_s64 (int64x1_t __a) - { -- return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi (__a); -+ return (uint32x2_t) __a; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vreinterpret_u32_f32 (float32x2_t __a) - { -- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a); -+ return (uint32x2_t) __a; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vreinterpret_u32_u8 (uint8x8_t __a) - { -- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); -+ return (uint32x2_t) __a; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vreinterpret_u32_u16 (uint16x4_t __a) - { -- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); -+ return (uint32x2_t) __a; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vreinterpret_u32_u64 (uint64x1_t __a) - { -- return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a); -+ return (uint32x2_t) __a; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vreinterpret_u32_p8 (poly8x8_t __a) - { -- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); -+ return (uint32x2_t) __a; - } - - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) - vreinterpret_u32_p16 (poly16x4_t __a) - { -- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); -+ return (uint32x2_t) __a; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+vreinterpretq_u32_f64 (float64x2_t __a) ++uint8x16_t ++test_vextq_u8_13 (uint8x16_t a, uint8x16_t b) +{ -+ return (uint32x4_t) __a; ++ return vextq_u8 (a, b, 13); +} + -+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_u32_s8 (int8x16_t __a) - { -- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a); -+ return (uint32x4_t) __a; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_u32_s16 (int16x8_t __a) - { -- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a); -+ return (uint32x4_t) __a; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_u32_s32 (int32x4_t __a) - { -- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4si (__a); -+ return (uint32x4_t) __a; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_u32_s64 (int64x2_t __a) - { -- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a); -+ return (uint32x4_t) __a; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_u32_f32 (float32x4_t __a) - { -- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a); -+ return (uint32x4_t) __a; - } - - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_u32_u8 (uint8x16_t __a) - { -- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) -- __a); -+ return (uint32x4_t) __a; - } - ++uint8x16_t ++test_vextq_u8_14 (uint8x16_t a, uint8x16_t b) ++{ ++ return vextq_u8 (a, b, 14); ++} ++ ++uint8x16_t ++test_vextq_u8_15 (uint8x16_t a, uint8x16_t b) ++{ ++ return vextq_u8 (a, b, 15); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ uint8x16_t in1 = vld1q_u8 (arr1); ++ uint8_t arr2[] = ++ {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; ++ uint8x16_t in2 = vld1q_u8 (arr2); ++ uint8_t exp[16]; ++ uint8x16_t expected; ++ uint8x16_t actual = test_vextq_u8_1 (in1, in2); ++ ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 1; ++ expected = vld1q_u8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_u8_2 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 2; ++ expected = vld1q_u8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_u8_3 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 3; ++ expected = vld1q_u8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_u8_4 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 4; ++ expected = vld1q_u8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_u8_5 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 5; ++ expected = vld1q_u8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_u8_6 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 6; ++ expected = vld1q_u8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_u8_7 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 7; ++ expected = vld1q_u8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_u8_8 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 8; ++ expected = vld1q_u8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_u8_9 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 9; ++ expected = vld1q_u8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_u8_10 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 10; ++ expected = vld1q_u8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_u8_11 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 11; ++ expected = vld1q_u8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_u8_12 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 12; ++ expected = vld1q_u8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_u8_13 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 13; ++ expected = vld1q_u8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_u8_14 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 14; ++ expected = vld1q_u8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_u8_15 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 15; ++ expected = vld1q_u8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vzipq_u32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vzipqu32.x" ++ ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev64_p8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev64p8.x" ++ ++/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev32_u8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev32u8.x" ++ ++/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev16_s8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev16s8.x" ++ ++/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32.x +@@ -0,0 +1,26 @@ ++extern void abort (void); ++ ++float32x4x2_t ++test_vuzpqf32 (float32x4_t _a, float32x4_t _b) ++{ ++ return vuzpq_f32 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ float32_t first[] = {1, 2, 3, 4}; ++ float32_t second[] = {5, 6, 7, 8}; ++ float32x4x2_t result = test_vuzpqf32 (vld1q_f32 (first), vld1q_f32 (second)); ++ float32_t exp1[] = {1, 3, 5, 7}; ++ float32_t exp2[] = {2, 4, 6, 8}; ++ float32x4_t expect1 = vld1q_f32 (exp1); ++ float32x4_t expect2 = vld1q_f32 (exp2); ++ ++ for (i = 0; i < 4; i++) ++ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++poly8x8x2_t ++test_vzipp8 (poly8x8_t _a, poly8x8_t _b) ++{ ++ return vzip_p8 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; ++ poly8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; ++ poly8x8x2_t result = test_vzipp8 (vld1_p8 (first), vld1_p8 (second)); ++ poly8x8_t res1 = result.val[0], res2 = result.val[1]; ++ poly8_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; ++ poly8_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; ++ poly8x8_t expected1 = vld1_p8 (exp1); ++ poly8x8_t expected2 = vld1_p8 (exp2); ++ ++ for (i = 0; i < 8; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++int32x4x2_t ++test_vtrnqs32 (int32x4_t _a, int32x4_t _b) ++{ ++ return vtrnq_s32 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int32_t first[] = {1, 2, 3, 4}; ++ int32_t second[] = {5, 6, 7, 8}; ++ int32x4x2_t result = test_vtrnqs32 (vld1q_s32 (first), vld1q_s32 (second)); ++ int32x4_t res1 = result.val[0], res2 = result.val[1]; ++ int32_t exp1[] = {1, 5, 3, 7}; ++ int32_t exp2[] = {2, 6, 4, 8}; ++ int32x4_t expected1 = vld1q_s32 (exp1); ++ int32x4_t expected2 = vld1q_s32 (exp2); ++ ++ for (i = 0; i < 4; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++uint32x4x2_t ++test_vtrnqu32 (uint32x4_t _a, uint32x4_t _b) ++{ ++ return vtrnq_u32 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint32_t first[] = {1, 2, 3, 4}; ++ uint32_t second[] = {5, 6, 7, 8}; ++ uint32x4x2_t result = test_vtrnqu32 (vld1q_u32 (first), vld1q_u32 (second)); ++ uint32x4_t res1 = result.val[0], res2 = result.val[1]; ++ uint32_t exp1[] = {1, 5, 3, 7}; ++ uint32_t exp2[] = {2, 6, 4, 8}; ++ uint32x4_t expected1 = vld1q_u32 (exp1); ++ uint32x4_t expected2 = vld1q_u32 (exp2); ++ ++ for (i = 0; i < 4; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++int32x4_t ++test_vrev64qs32 (int32x4_t _arg) ++{ ++ return vrev64q_s32 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int32x4_t inorder = {1, 2, 3, 4}; ++ int32x4_t reversed = test_vrev64qs32 (inorder); ++ int32x4_t expected = {2, 1, 4, 3}; ++ ++ for (i = 0; i < 4; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++uint8x8x2_t ++test_vtrnu8 (uint8x8_t _a, uint8x8_t _b) ++{ ++ return vtrn_u8 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; ++ uint8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; ++ uint8x8x2_t result = test_vtrnu8 (vld1_u8 (first), vld1_u8 (second)); ++ uint8x8_t res1 = result.val[0], res2 = result.val[1]; ++ uint8_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15}; ++ uint8_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16}; ++ uint8x8_t expected1 = vld1_u8 (exp1); ++ uint8x8_t expected2 = vld1_u8 (exp2); ++ ++ for (i = 0; i < 8; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++uint32x4_t ++test_vrev64qu32 (uint32x4_t _arg) ++{ ++ return vrev64q_u32 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint32x4_t inorder = {1, 2, 3, 4}; ++ uint32x4_t reversed = test_vrev64qu32 (inorder); ++ uint32x4_t expected = {2, 1, 4, 3}; ++ ++ for (i = 0; i < 4; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vextQs64' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++ ++#include "arm_neon.h" ++#include "extq_s64.x" ++ ++/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.8\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8.x +@@ -0,0 +1,114 @@ ++extern void abort (void); ++ ++int8x8_t ++test_vext_s8_1 (int8x8_t a, int8x8_t b) ++{ ++ return vext_s8 (a, b, 1); ++} ++ ++int8x8_t ++test_vext_s8_2 (int8x8_t a, int8x8_t b) ++{ ++ return vext_s8 (a, b, 2); ++} ++ ++int8x8_t ++test_vext_s8_3 (int8x8_t a, int8x8_t b) ++{ ++ return vext_s8 (a, b, 3); ++} ++ ++int8x8_t ++test_vext_s8_4 (int8x8_t a, int8x8_t b) ++{ ++ return vext_s8 (a, b, 4); ++} ++ ++int8x8_t ++test_vext_s8_5 (int8x8_t a, int8x8_t b) ++{ ++ return vext_s8 (a, b, 5); ++} ++ ++int8x8_t ++test_vext_s8_6 (int8x8_t a, int8x8_t b) ++{ ++ return vext_s8 (a, b, 6); ++} ++ ++int8x8_t ++test_vext_s8_7 (int8x8_t a, int8x8_t b) ++{ ++ return vext_s8 (a, b, 7); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i, off; ++ int8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; ++ int8x8_t in1 = vld1_s8 (arr1); ++ int8_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; ++ int8x8_t in2 = vld1_s8 (arr2); ++ int8_t exp[8]; ++ int8x8_t expected; ++ int8x8_t actual = test_vext_s8_1 (in1, in2); ++ ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 1; ++ expected = vld1_s8 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vext_s8_2 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 2; ++ expected = vld1_s8 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vext_s8_3 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 3; ++ expected = vld1_s8 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vext_s8_4 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 4; ++ expected = vld1_s8 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vext_s8_5 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 5; ++ expected = vld1_s8 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vext_s8_6 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 6; ++ expected = vld1_s8 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vext_s8_7 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 7; ++ expected = vld1_s8 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vzip_s32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vzips32.x" ++ ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev32q_p8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev32qp8.x" ++ ++/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vtrn_p16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vtrnp16.x" ++ ++/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vtrn_u32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vtrnu32.x" ++ ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8.x +@@ -0,0 +1,26 @@ ++extern void abort (void); ++ ++int8x8x2_t ++test_vuzps8 (int8x8_t _a, int8x8_t _b) ++{ ++ return vuzp_s8 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; ++ int8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; ++ int8x8x2_t result = test_vuzps8 (vld1_s8 (first), vld1_s8 (second)); ++ int8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; ++ int8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; ++ int8x8_t expect1 = vld1_s8 (exp1); ++ int8x8_t expect2 = vld1_s8 (exp2); ++ ++ for (i = 0; i < 8; i++) ++ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vzipq_u8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vzipqu8.x" ++ ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8.x +@@ -0,0 +1,29 @@ ++extern void abort (void); ++ ++poly8x16x2_t ++test_vzipqp8 (poly8x16_t _a, poly8x16_t _b) ++{ ++ return vzipq_p8 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ poly8_t second[] = ++ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ poly8x16x2_t result = test_vzipqp8 (vld1q_p8 (first), vld1q_p8 (second)); ++ poly8x16_t res1 = result.val[0], res2 = result.val[1]; ++ poly8_t exp1[] = {1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24}; ++ poly8_t exp2[] = ++ {9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, 16, 32}; ++ poly8x16_t expected1 = vld1q_p8 (exp1); ++ poly8x16_t expected2 = vld1q_p8 (exp2); ++ ++ for (i = 0; i < 16; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vextp16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++ ++#include "arm_neon.h" ++#include "ext_p16.x" ++ ++/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++int16x4_t ++test_vrev32s16 (int16x4_t _arg) ++{ ++ return vrev32_s16 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int16x4_t inorder = {1, 2, 3, 4}; ++ int16x4_t reversed = test_vrev32s16 (inorder); ++ int16x4_t expected = {2, 1, 4, 3}; ++ ++ for (i = 0; i < 4; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++uint16x4_t ++test_vrev32u16 (uint16x4_t _arg) ++{ ++ return vrev32_u16 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint16x4_t inorder = {1, 2, 3, 4}; ++ uint16x4_t reversed = test_vrev32u16 (inorder); ++ uint16x4_t expected = {2, 1, 4, 3}; ++ ++ for (i = 0; i < 4; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++poly16x4_t ++test_vrev64p16 (poly16x4_t _arg) ++{ ++ return vrev64_p16 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ poly16x4_t inorder = {1, 2, 3, 4}; ++ poly16x4_t reversed = test_vrev64p16 (inorder); ++ poly16x4_t expected = {4, 3, 2, 1}; ++ ++ for (i = 0; i < 4; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev64q_f32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev64qf32.x" ++ ++/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4s, ?v\[0-9\]+.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++float32x4x2_t ++test_vzipqf32 (float32x4_t _a, float32x4_t _b) ++{ ++ return vzipq_f32 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ float32_t first[] = {1, 2, 3, 4}; ++ float32_t second[] = {5, 6, 7, 8}; ++ float32x4x2_t result = test_vzipqf32 (vld1q_f32 (first), vld1q_f32 (second)); ++ float32x4_t res1 = result.val[0], res2 = result.val[1]; ++ float32_t exp1[] = {1, 5, 2, 6}; ++ float32_t exp2[] = {3, 7, 4, 8}; ++ float32x4_t expected1 = vld1q_f32 (exp1); ++ float32x4_t expected2 = vld1q_f32 (exp2); ++ ++ for (i = 0; i < 4; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vextu32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++ ++#include "arm_neon.h" ++#include "ext_u32.x" ++ ++/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8.x +@@ -0,0 +1,227 @@ ++extern void abort (void); ++ ++poly8x16_t ++test_vextq_p8_1 (poly8x16_t a, poly8x16_t b) ++{ ++ return vextq_p8 (a, b, 1); ++} ++ ++poly8x16_t ++test_vextq_p8_2 (poly8x16_t a, poly8x16_t b) ++{ ++ return vextq_p8 (a, b, 2); ++} ++ ++poly8x16_t ++test_vextq_p8_3 (poly8x16_t a, poly8x16_t b) ++{ ++ return vextq_p8 (a, b, 3); ++} ++ ++poly8x16_t ++test_vextq_p8_4 (poly8x16_t a, poly8x16_t b) ++{ ++ return vextq_p8 (a, b, 4); ++} ++ ++poly8x16_t ++test_vextq_p8_5 (poly8x16_t a, poly8x16_t b) ++{ ++ return vextq_p8 (a, b, 5); ++} ++ ++poly8x16_t ++test_vextq_p8_6 (poly8x16_t a, poly8x16_t b) ++{ ++ return vextq_p8 (a, b, 6); ++} ++ ++poly8x16_t ++test_vextq_p8_7 (poly8x16_t a, poly8x16_t b) ++{ ++ return vextq_p8 (a, b, 7); ++} ++ ++poly8x16_t ++test_vextq_p8_8 (poly8x16_t a, poly8x16_t b) ++{ ++ return vextq_p8 (a, b, 8); ++} ++ ++poly8x16_t ++test_vextq_p8_9 (poly8x16_t a, poly8x16_t b) ++{ ++ return vextq_p8 (a, b, 9); ++} ++ ++poly8x16_t ++test_vextq_p8_10 (poly8x16_t a, poly8x16_t b) ++{ ++ return vextq_p8 (a, b, 10); ++} ++ ++poly8x16_t ++test_vextq_p8_11 (poly8x16_t a, poly8x16_t b) ++{ ++ return vextq_p8 (a, b, 11); ++} ++ ++poly8x16_t ++test_vextq_p8_12 (poly8x16_t a, poly8x16_t b) ++{ ++ return vextq_p8 (a, b, 12); ++} ++ ++poly8x16_t ++test_vextq_p8_13 (poly8x16_t a, poly8x16_t b) ++{ ++ return vextq_p8 (a, b, 13); ++} ++ ++poly8x16_t ++test_vextq_p8_14 (poly8x16_t a, poly8x16_t b) ++{ ++ return vextq_p8 (a, b, 14); ++} ++ ++poly8x16_t ++test_vextq_p8_15 (poly8x16_t a, poly8x16_t b) ++{ ++ return vextq_p8 (a, b, 15); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ poly8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ poly8x16_t in1 = vld1q_p8 (arr1); ++ poly8_t arr2[] = ++ {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; ++ poly8x16_t in2 = vld1q_p8 (arr2); ++ poly8_t exp[16]; ++ poly8x16_t expected; ++ poly8x16_t actual = test_vextq_p8_1 (in1, in2); ++ ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 1; ++ expected = vld1q_p8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_p8_2 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 2; ++ expected = vld1q_p8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_p8_3 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 3; ++ expected = vld1q_p8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_p8_4 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 4; ++ expected = vld1q_p8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_p8_5 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 5; ++ expected = vld1q_p8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_p8_6 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 6; ++ expected = vld1q_p8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_p8_7 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 7; ++ expected = vld1q_p8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_p8_8 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 8; ++ expected = vld1q_p8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_p8_9 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 9; ++ expected = vld1q_p8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_p8_10 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 10; ++ expected = vld1q_p8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_p8_11 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 11; ++ expected = vld1q_p8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_p8_12 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 12; ++ expected = vld1q_p8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_p8_13 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 13; ++ expected = vld1q_p8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_p8_14 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 14; ++ expected = vld1q_p8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_p8_15 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 15; ++ expected = vld1q_p8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++int8x16_t ++test_vrev64qs8 (int8x16_t _arg) ++{ ++ return vrev64q_s8 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ int8x16_t reversed = test_vrev64qs8 (inorder); ++ int8x16_t expected = {8, 7, 6, 5, 4, 3, 2, 1, 16, 15, 14, 13, 12, 11, 10, 9}; ++ ++ for (i = 0; i < 16; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev16_p8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev16p8.x" ++ ++/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vtrnq_s32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vtrnqs32.x" ++ ++/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16.x +@@ -0,0 +1,26 @@ ++extern void abort (void); ++ ++int16x4x2_t ++test_vuzps16 (int16x4_t _a, int16x4_t _b) ++{ ++ return vuzp_s16 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int16_t first[] = {1, 2, 3, 4}; ++ int16_t second[] = {5, 6, 7, 8}; ++ int16x4x2_t result = test_vuzps16 (vld1_s16 (first), vld1_s16 (second)); ++ int16_t exp1[] = {1, 3, 5, 7}; ++ int16_t exp2[] = {2, 4, 6, 8}; ++ int16x4_t expect1 = vld1_s16 (exp1); ++ int16x4_t expect2 = vld1_s16 (exp2); ++ ++ for (i = 0; i < 4; i++) ++ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16.x +@@ -0,0 +1,26 @@ ++extern void abort (void); ++ ++uint16x4x2_t ++test_vuzpu16 (uint16x4_t _a, uint16x4_t _b) ++{ ++ return vuzp_u16 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint16_t first[] = {1, 2, 3, 4}; ++ uint16_t second[] = {5, 6, 7, 8}; ++ uint16x4x2_t result = test_vuzpu16 (vld1_u16 (first), vld1_u16 (second)); ++ uint16_t exp1[] = {1, 3, 5, 7}; ++ uint16_t exp2[] = {2, 4, 6, 8}; ++ uint16x4_t expect1 = vld1_u16 (exp1); ++ uint16x4_t expect2 = vld1_u16 (exp2); ++ ++ for (i = 0; i < 4; i++) ++ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vtrn_u8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vtrnu8.x" ++ ++/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++poly8x8x2_t ++test_vtrnp8 (poly8x8_t _a, poly8x8_t _b) ++{ ++ return vtrn_p8 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; ++ poly8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; ++ poly8x8x2_t result = test_vtrnp8 (vld1_p8 (first), vld1_p8 (second)); ++ poly8x8_t res1 = result.val[0], res2 = result.val[1]; ++ poly8_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15}; ++ poly8_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16}; ++ poly8x8_t expected1 = vld1_p8 (exp1); ++ poly8x8_t expected2 = vld1_p8 (exp2); ++ ++ for (i = 0; i < 8; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++int16x8_t ++test_vrev32qs16 (int16x8_t _arg) ++{ ++ return vrev32q_s16 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; ++ int16x8_t reversed = test_vrev32qs16 (inorder); ++ int16x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7}; ++ ++ for (i = 0; i < 8; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev64_f32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev64f32.x" ++ ++/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.2s, ?v\[0-9\]+.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++int8x8x2_t ++test_vzips8 (int8x8_t _a, int8x8_t _b) ++{ ++ return vzip_s8 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; ++ int8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; ++ int8x8x2_t result = test_vzips8 (vld1_s8 (first), vld1_s8 (second)); ++ int8x8_t res1 = result.val[0], res2 = result.val[1]; ++ int8_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; ++ int8_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; ++ int8x8_t expected1 = vld1_s8 (exp1); ++ int8x8_t expected2 = vld1_s8 (exp2); ++ ++ for (i = 0; i < 8; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vextQs32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++ ++#include "arm_neon.h" ++#include "extq_s32.x" ++ ++/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++uint16x8_t ++test_vrev32qu16 (uint16x8_t _arg) ++{ ++ return vrev32q_u16 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; ++ uint16x8_t reversed = test_vrev32qu16 (inorder); ++ uint16x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7}; ++ ++ for (i = 0; i < 8; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev64q_u16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev64qu16.x" ++ ++/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev64_u8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev64u8.x" ++ ++/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++float32x2x2_t ++test_vtrnf32 (float32x2_t _a, float32x2_t _b) ++{ ++ return vtrn_f32 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ float32_t first[] = {1, 2}; ++ float32_t second[] = {3, 4}; ++ float32x2x2_t result = test_vtrnf32 (vld1_f32 (first), vld1_f32 (second)); ++ float32x2_t res1 = result.val[0], res2 = result.val[1]; ++ float32_t exp1[] = {1, 3}; ++ float32_t exp2[] = {2, 4}; ++ float32x2_t expected1 = vld1_f32 (exp1); ++ float32x2_t expected2 = vld1_f32 (exp2); ++ ++ for (i = 0; i < 2; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vexts8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++ ++#include "arm_neon.h" ++#include "ext_s8.x" ++ ++/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#?\[0-9\]+\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++uint8x8_t ++test_vrev16u8 (uint8x8_t _arg) ++{ ++ return vrev16_u8 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; ++ uint8x8_t reversed = test_vrev16u8 (inorder); ++ uint8x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7}; ++ ++ for (i = 0; i < 8; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vuzpq_s16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vuzpqs16.x" ++ ++/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64.x +@@ -0,0 +1,30 @@ ++extern void abort (void); ++ ++int64x2_t ++test_vextq_s64_1 (int64x2_t a, int64x2_t b) ++{ ++ return vextq_s64 (a, b, 1); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i, off; ++ int64_t arr1[] = {0, 1}; ++ int64x2_t in1 = vld1q_s64 (arr1); ++ int64_t arr2[] = {2, 3}; ++ int64x2_t in2 = vld1q_s64 (arr2); ++ int64_t exp[2]; ++ int64x2_t expected; ++ int64x2_t actual = test_vextq_s64_1 (in1, in2); ++ ++ for (i = 0; i < 2; i++) ++ exp[i] = i + 1; ++ expected = vld1q_s64 (exp); ++ for (i = 0; i < 2; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++poly16x4x2_t ++test_vzipp16 (poly16x4_t _a, poly16x4_t _b) ++{ ++ return vzip_p16 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ poly16_t first[] = {1, 2, 3, 4}; ++ poly16_t second[] = {5, 6, 7, 8}; ++ poly16x4x2_t result = test_vzipp16 (vld1_p16 (first), vld1_p16 (second)); ++ poly16x4_t res1 = result.val[0], res2 = result.val[1]; ++ poly16_t exp1[] = {1, 5, 2, 6}; ++ poly16_t exp2[] = {3, 7, 4, 8}; ++ poly16x4_t expected1 = vld1_p16 (exp1); ++ poly16x4_t expected2 = vld1_p16 (exp2); ++ ++ for (i = 0; i < 4; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64.x +@@ -0,0 +1,30 @@ ++extern void abort (void); ++ ++uint64x2_t ++test_vextq_u64_1 (uint64x2_t a, uint64x2_t b) ++{ ++ return vextq_u64 (a, b, 1); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i, off; ++ uint64_t arr1[] = {0, 1}; ++ uint64x2_t in1 = vld1q_u64 (arr1); ++ uint64_t arr2[] = {2, 3}; ++ uint64x2_t in2 = vld1q_u64 (arr2); ++ uint64_t exp[2]; ++ uint64x2_t expected; ++ uint64x2_t actual = test_vextq_u64_1 (in1, in2); ++ ++ for (i = 0; i < 2; i++) ++ exp[i] = i + 1; ++ expected = vld1q_u64 (exp); ++ for (i = 0; i < 2; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev32q_u8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev32qu8.x" ++ ++/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev64_u16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev64u16.x" ++ ++/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8.x +@@ -0,0 +1,29 @@ ++extern void abort (void); ++ ++int8x16x2_t ++test_vzipqs8 (int8x16_t _a, int8x16_t _b) ++{ ++ return vzipq_s8 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ int8_t second[] = ++ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ int8x16x2_t result = test_vzipqs8 (vld1q_s8 (first), vld1q_s8 (second)); ++ int8x16_t res1 = result.val[0], res2 = result.val[1]; ++ int8_t exp1[] = {1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24}; ++ int8_t exp2[] = ++ {9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, 16, 32}; ++ int8x16_t expected1 = vld1q_s8 (exp1); ++ int8x16_t expected2 = vld1q_s8 (exp2); ++ ++ for (i = 0; i < 16; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8.x +@@ -0,0 +1,28 @@ ++extern void abort (void); ++ ++uint8x16x2_t ++test_vtrnqu8 (uint8x16_t _a, uint8x16_t _b) ++{ ++ return vtrnq_u8 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ uint8_t second[] = ++ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ uint8x16x2_t result = test_vtrnqu8 (vld1q_u8 (first), vld1q_u8 (second)); ++ uint8x16_t res1 = result.val[0], res2 = result.val[1]; ++ uint8_t exp1[] = {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}; ++ uint8_t exp2[] = {2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30, 16, 32}; ++ uint8x16_t expected1 = vld1q_u8 (exp1); ++ uint8x16_t expected2 = vld1q_u8 (exp2); ++ ++ for (i = 0; i < 16; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32.x +@@ -0,0 +1,30 @@ ++extern void abort (void); ++ ++int32x2_t ++test_vext_s32_1 (int32x2_t a, int32x2_t b) ++{ ++ return vext_s32 (a, b, 1); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i, off; ++ int32_t arr1[] = {0, 1}; ++ int32x2_t in1 = vld1_s32 (arr1); ++ int32_t arr2[] = {2, 3}; ++ int32x2_t in2 = vld1_s32 (arr2); ++ int32_t exp[2]; ++ int32x2_t expected; ++ int32x2_t actual = test_vext_s32_1 (in1, in2); ++ ++ for (i = 0; i < 2; i++) ++ exp[i] = i + 1; ++ expected = vld1_s32 (exp); ++ for (i = 0; i < 2; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vuzp_s16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vuzps16.x" ++ ++/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32.x +@@ -0,0 +1,30 @@ ++extern void abort (void); ++ ++uint32x2_t ++test_vext_u32_1 (uint32x2_t a, uint32x2_t b) ++{ ++ return vext_u32 (a, b, 1); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i, off; ++ uint32_t arr1[] = {0, 1}; ++ uint32x2_t in1 = vld1_u32 (arr1); ++ uint32_t arr2[] = {2, 3}; ++ uint32x2_t in2 = vld1_u32 (arr2); ++ uint32_t exp[2]; ++ uint32x2_t expected; ++ uint32x2_t actual = test_vext_u32_1 (in1, in2); ++ ++ for (i = 0; i < 2; i++) ++ exp[i] = i + 1; ++ expected = vld1_u32 (exp); ++ for (i = 0; i < 2; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vuzpq_s8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vuzpqs8.x" ++ ++/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8.x +@@ -0,0 +1,227 @@ ++extern void abort (void); ++ ++int8x16_t ++test_vextq_s8_1 (int8x16_t a, int8x16_t b) ++{ ++ return vextq_s8 (a, b, 1); ++} ++ ++int8x16_t ++test_vextq_s8_2 (int8x16_t a, int8x16_t b) ++{ ++ return vextq_s8 (a, b, 2); ++} ++ ++int8x16_t ++test_vextq_s8_3 (int8x16_t a, int8x16_t b) ++{ ++ return vextq_s8 (a, b, 3); ++} ++ ++int8x16_t ++test_vextq_s8_4 (int8x16_t a, int8x16_t b) ++{ ++ return vextq_s8 (a, b, 4); ++} ++ ++int8x16_t ++test_vextq_s8_5 (int8x16_t a, int8x16_t b) ++{ ++ return vextq_s8 (a, b, 5); ++} ++ ++int8x16_t ++test_vextq_s8_6 (int8x16_t a, int8x16_t b) ++{ ++ return vextq_s8 (a, b, 6); ++} ++ ++int8x16_t ++test_vextq_s8_7 (int8x16_t a, int8x16_t b) ++{ ++ return vextq_s8 (a, b, 7); ++} ++ ++int8x16_t ++test_vextq_s8_8 (int8x16_t a, int8x16_t b) ++{ ++ return vextq_s8 (a, b, 8); ++} ++ ++int8x16_t ++test_vextq_s8_9 (int8x16_t a, int8x16_t b) ++{ ++ return vextq_s8 (a, b, 9); ++} ++ ++int8x16_t ++test_vextq_s8_10 (int8x16_t a, int8x16_t b) ++{ ++ return vextq_s8 (a, b, 10); ++} ++ ++int8x16_t ++test_vextq_s8_11 (int8x16_t a, int8x16_t b) ++{ ++ return vextq_s8 (a, b, 11); ++} ++ ++int8x16_t ++test_vextq_s8_12 (int8x16_t a, int8x16_t b) ++{ ++ return vextq_s8 (a, b, 12); ++} ++ ++int8x16_t ++test_vextq_s8_13 (int8x16_t a, int8x16_t b) ++{ ++ return vextq_s8 (a, b, 13); ++} ++ ++int8x16_t ++test_vextq_s8_14 (int8x16_t a, int8x16_t b) ++{ ++ return vextq_s8 (a, b, 14); ++} ++ ++int8x16_t ++test_vextq_s8_15 (int8x16_t a, int8x16_t b) ++{ ++ return vextq_s8 (a, b, 15); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; ++ int8x16_t in1 = vld1q_s8 (arr1); ++ int8_t arr2[] = ++ {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; ++ int8x16_t in2 = vld1q_s8 (arr2); ++ int8_t exp[16]; ++ int8x16_t expected; ++ int8x16_t actual = test_vextq_s8_1 (in1, in2); ++ ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 1; ++ expected = vld1q_s8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_s8_2 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 2; ++ expected = vld1q_s8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_s8_3 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 3; ++ expected = vld1q_s8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_s8_4 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 4; ++ expected = vld1q_s8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_s8_5 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 5; ++ expected = vld1q_s8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_s8_6 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 6; ++ expected = vld1q_s8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_s8_7 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 7; ++ expected = vld1q_s8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_s8_8 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 8; ++ expected = vld1q_s8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_s8_9 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 9; ++ expected = vld1q_s8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_s8_10 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 10; ++ expected = vld1q_s8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_s8_11 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 11; ++ expected = vld1q_s8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_s8_12 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 12; ++ expected = vld1q_s8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_s8_13 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 13; ++ expected = vld1q_s8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_s8_14 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 14; ++ expected = vld1q_s8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_s8_15 (in1, in2); ++ for (i = 0; i < 16; i++) ++ exp[i] = i + 15; ++ expected = vld1q_s8 (exp); ++ for (i = 0; i < 16; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f64_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f64_1.c +@@ -0,0 +1,36 @@ ++/* Test the `vextq_f64' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++ ++#include "arm_neon.h" ++extern void abort (void); ++#include ++ ++float64x2_t ++test_vextq_f64_1 (float64x2_t a, float64x2_t b) ++{ ++ return vextq_f64 (a, b, 1); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i, off; ++ float64_t arr1[] = {0, 1}; ++ float64x2_t in1 = vld1q_f64 (arr1); ++ float64_t arr2[] = {2, 3}; ++ float64x2_t in2 = vld1q_f64 (arr2); ++ float64_t exp[] = {1, 2}; ++ float64x2_t expected = vld1q_f64 (exp); ++ float64x2_t actual = test_vextq_f64_1 (in1, in2); ++ ++ for (i = 0; i < 2; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.8\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev32q_s16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev32qs16.x" ++ ++/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vzipq_s16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vzipqs16.x" ++ ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vzip_f32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vzipf32.x" ++ ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev16_u8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev16u8.x" ++ ++/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++poly8x8_t ++test_vrev16p8 (poly8x8_t _arg) ++{ ++ return vrev16_p8 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ poly8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; ++ poly8x8_t reversed = test_vrev16p8 (inorder); ++ poly8x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7}; ++ ++ for (i = 0; i < 8; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vextp8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++ ++#include "arm_neon.h" ++#include "ext_p8.x" ++ ++/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#?\[0-9\]+\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++int8x8x2_t ++test_vtrns8 (int8x8_t _a, int8x8_t _b) ++{ ++ return vtrn_s8 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; ++ int8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; ++ int8x8x2_t result = test_vtrns8 (vld1_s8 (first), vld1_s8 (second)); ++ int8x8_t res1 = result.val[0], res2 = result.val[1]; ++ int8_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15}; ++ int8_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16}; ++ int8x8_t expected1 = vld1_s8 (exp1); ++ int8x8_t expected2 = vld1_s8 (exp2); ++ ++ for (i = 0; i < 8; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++int16x8x2_t ++test_vtrnqs16 (int16x8_t _a, int16x8_t _b) ++{ ++ return vtrnq_s16 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; ++ int16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; ++ int16x8x2_t result = test_vtrnqs16 (vld1q_s16 (first), vld1q_s16 (second)); ++ int16x8_t res1 = result.val[0], res2 = result.val[1]; ++ int16_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15}; ++ int16_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16}; ++ int16x8_t expected1 = vld1q_s16 (exp1); ++ int16x8_t expected2 = vld1q_s16 (exp2); ++ ++ for (i = 0; i < 8; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++uint16x8x2_t ++test_vtrnqu16 (uint16x8_t _a, uint16x8_t _b) ++{ ++ return vtrnq_u16 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; ++ uint16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; ++ uint16x8x2_t result = test_vtrnqu16 (vld1q_u16 (first), vld1q_u16 (second)); ++ uint16x8_t res1 = result.val[0], res2 = result.val[1]; ++ uint16_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15}; ++ uint16_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16}; ++ uint16x8_t expected1 = vld1q_u16 (exp1); ++ uint16x8_t expected2 = vld1q_u16 (exp2); ++ ++ for (i = 0; i < 8; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16.x +@@ -0,0 +1,114 @@ ++extern void abort (void); ++ ++poly16x8_t ++test_vextq_p16_1 (poly16x8_t a, poly16x8_t b) ++{ ++ return vextq_p16 (a, b, 1); ++} ++ ++poly16x8_t ++test_vextq_p16_2 (poly16x8_t a, poly16x8_t b) ++{ ++ return vextq_p16 (a, b, 2); ++} ++ ++poly16x8_t ++test_vextq_p16_3 (poly16x8_t a, poly16x8_t b) ++{ ++ return vextq_p16 (a, b, 3); ++} ++ ++poly16x8_t ++test_vextq_p16_4 (poly16x8_t a, poly16x8_t b) ++{ ++ return vextq_p16 (a, b, 4); ++} ++ ++poly16x8_t ++test_vextq_p16_5 (poly16x8_t a, poly16x8_t b) ++{ ++ return vextq_p16 (a, b, 5); ++} ++ ++poly16x8_t ++test_vextq_p16_6 (poly16x8_t a, poly16x8_t b) ++{ ++ return vextq_p16 (a, b, 6); ++} ++ ++poly16x8_t ++test_vextq_p16_7 (poly16x8_t a, poly16x8_t b) ++{ ++ return vextq_p16 (a, b, 7); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i, off; ++ poly16_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; ++ poly16x8_t in1 = vld1q_p16 (arr1); ++ poly16_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; ++ poly16x8_t in2 = vld1q_p16 (arr2); ++ poly16_t exp[8]; ++ poly16x8_t expected; ++ poly16x8_t actual = test_vextq_p16_1 (in1, in2); ++ ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 1; ++ expected = vld1q_p16 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_p16_2 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 2; ++ expected = vld1q_p16 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_p16_3 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 3; ++ expected = vld1q_p16 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_p16_4 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 4; ++ expected = vld1q_p16 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_p16_5 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 5; ++ expected = vld1q_p16 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_p16_6 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 6; ++ expected = vld1q_p16 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_p16_7 (in1, in2); ++ for (i = 0; i < 8; i++) ++ exp[i] = i + 7; ++ expected = vld1q_p16 (exp); ++ for (i = 0; i < 8; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++int16x8_t ++test_vrev64qs16 (int16x8_t _arg) ++{ ++ return vrev64q_s16 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; ++ int16x8_t reversed = test_vrev64qs16 (inorder); ++ int16x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5}; ++ ++ for (i = 0; i < 8; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++uint16x8_t ++test_vrev64qu16 (uint16x8_t _arg) ++{ ++ return vrev64q_u16 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; ++ uint16x8_t reversed = test_vrev64qu16 (inorder); ++ uint16x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5}; ++ ++ for (i = 0; i < 8; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++uint8x8_t ++test_vrev64u8 (uint8x8_t _arg) ++{ ++ return vrev64_u8 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; ++ uint8x8_t reversed = test_vrev64u8 (inorder); ++ uint8x8_t expected = {8, 7, 6, 5, 4, 3, 2, 1}; ++ ++ for (i = 0; i < 8; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16.x +@@ -0,0 +1,26 @@ ++extern void abort (void); ++ ++poly16x8x2_t ++test_vuzpqp16 (poly16x8_t _a, poly16x8_t _b) ++{ ++ return vuzpq_p16 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ poly16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; ++ poly16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; ++ poly16x8x2_t result = test_vuzpqp16 (vld1q_p16 (first), vld1q_p16 (second)); ++ poly16_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; ++ poly16_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; ++ poly16x8_t expect1 = vld1q_p16 (exp1); ++ poly16x8_t expect2 = vld1q_p16 (exp2); ++ ++ for (i = 0; i < 8; i++) ++ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vtrn_s16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vtrns16.x" ++ ++/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vzip_u16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vzipu16.x" ++ ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32.x +@@ -0,0 +1,26 @@ ++extern void abort (void); ++ ++float32x2x2_t ++test_vuzpf32 (float32x2_t _a, float32x2_t _b) ++{ ++ return vuzp_f32 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ float32_t first[] = {1, 2}; ++ float32_t second[] = {3, 4}; ++ float32x2x2_t result = test_vuzpf32 (vld1_f32 (first), vld1_f32 (second)); ++ float32_t exp1[] = {1, 3}; ++ float32_t exp2[] = {2, 4}; ++ float32x2_t expect1 = vld1_f32 (exp1); ++ float32x2_t expect2 = vld1_f32 (exp2); ++ ++ for (i = 0; i < 2; i++) ++ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vtrnq_s8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vtrnqs8.x" ++ ++/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vtrnq_f32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vtrnqf32.x" ++ ++/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8.x +@@ -0,0 +1,28 @@ ++extern void abort (void); ++ ++poly8x16x2_t ++test_vtrnqp8 (poly8x16_t _a, poly8x16_t _b) ++{ ++ return vtrnq_p8 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ poly8_t second[] = ++ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ poly8x16x2_t result = test_vtrnqp8 (vld1q_p8 (first), vld1q_p8 (second)); ++ poly8x16_t res1 = result.val[0], res2 = result.val[1]; ++ poly8_t exp1[] = {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}; ++ poly8_t exp2[] = {2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30, 16, 32}; ++ poly8x16_t expected1 = vld1q_p8 (exp1); ++ poly8x16_t expected2 = vld1q_p8 (exp2); ++ ++ for (i = 0; i < 16; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++int32x2_t ++test_vrev64s32 (int32x2_t _arg) ++{ ++ return vrev64_s32 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int32x2_t inorder = {1, 2}; ++ int32x2_t reversed = test_vrev64s32 (inorder); ++ int32x2_t expected = {2, 1}; ++ ++ for (i = 0; i < 2; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vexts16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++ ++#include "arm_neon.h" ++#include "ext_s16.x" ++ ++/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++uint8x16_t ++test_vrev32qu8 (uint8x16_t _arg) ++{ ++ return vrev32q_u8 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ uint8x16_t reversed = test_vrev32qu8 (inorder); ++ uint8x16_t expected = {4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13}; ++ ++ for (i = 0; i < 16; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++uint32x2_t ++test_vrev64u32 (uint32x2_t _arg) ++{ ++ return vrev64_u32 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint32x2_t inorder = {1, 2}; ++ uint32x2_t reversed = test_vrev64u32 (inorder); ++ uint32x2_t expected = {2, 1}; ++ ++ for (i = 0; i < 2; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vextQf32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++ ++#include "arm_neon.h" ++#include "extq_f32.x" ++ ++/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++uint8x16_t ++test_vrev16qu8 (uint8x16_t _arg) ++{ ++ return vrev16q_u8 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ uint8x16_t reversed = test_vrev16qu8 (inorder); ++ uint8x16_t expected = {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15}; ++ ++ for (i = 0; i < 16; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vuzpq_p8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vuzpqp8.x" ++ ++/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev64q_p16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev64qp16.x" ++ ++/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++poly16x8x2_t ++test_vzipqp16 (poly16x8_t _a, poly16x8_t _b) ++{ ++ return vzipq_p16 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ poly16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; ++ poly16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; ++ poly16x8x2_t result = test_vzipqp16 (vld1q_p16 (first), vld1q_p16 (second)); ++ poly16x8_t res1 = result.val[0], res2 = result.val[1]; ++ poly16_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; ++ poly16_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; ++ poly16x8_t expected1 = vld1q_p16 (exp1); ++ poly16x8_t expected2 = vld1q_p16 (exp2); ++ ++ for (i = 0; i < 8; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vtrnq_u16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vtrnqu16.x" ++ ++/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev64q_u32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev64qu32.x" ++ ++/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4s, ?v\[0-9\]+.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++uint8x16x2_t ++test_vuzpqu8 (uint8x16_t _a, uint8x16_t _b) ++{ ++ return vuzpq_u8 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ uint8_t second[] = ++ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ uint8x16x2_t result = test_vuzpqu8 (vld1q_u8 (first), vld1q_u8 (second)); ++ uint8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}; ++ uint8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32}; ++ uint8x16_t expect1 = vld1q_u8 (exp1); ++ uint8x16_t expect2 = vld1q_u8 (exp2); ++ ++ for (i = 0; i < 16; i++) ++ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++poly8x8_t ++test_vrev64p8 (poly8x8_t _arg) ++{ ++ return vrev64_p8 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ poly8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; ++ poly8x8_t reversed = test_vrev64p8 (inorder); ++ poly8x8_t expected = {8, 7, 6, 5, 4, 3, 2, 1}; ++ ++ for (i = 0; i < 8; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++uint8x8_t ++test_vrev32u8 (uint8x8_t _arg) ++{ ++ return vrev32_u8 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; ++ uint8x8_t reversed = test_vrev32u8 (inorder); ++ uint8x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5}; ++ ++ for (i = 0; i < 8; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++int8x8_t ++test_vrev16s8 (int8x8_t _arg) ++{ ++ return vrev16_s8 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; ++ int8x8_t reversed = test_vrev16s8 (inorder); ++ int8x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7}; ++ ++ for (i = 0; i < 8; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vextu8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++ ++#include "arm_neon.h" ++#include "ext_u8.x" ++ ++/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#?\[0-9\]+\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vextQu16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++ ++#include "arm_neon.h" ++#include "extq_u16.x" ++ ++/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vuzpq_s32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vuzpqs32.x" ++ ++/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vuzp_s8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vuzps8.x" ++ ++/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vtrnq_p8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vtrnqp8.x" ++ ++/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev64_p16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev64p16.x" ++ ++/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev32_u16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev32u16.x" ++ ++/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++poly16x4x2_t ++test_vtrnp16 (poly16x4_t _a, poly16x4_t _b) ++{ ++ return vtrn_p16 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ poly16_t first[] = {1, 2, 3, 4}; ++ poly16_t second[] = {5, 6, 7, 8}; ++ poly16x4x2_t result = test_vtrnp16 (vld1_p16 (first), vld1_p16 (second)); ++ poly16x4_t res1 = result.val[0], res2 = result.val[1]; ++ poly16_t exp1[] = {1, 5, 3, 7}; ++ poly16_t exp2[] = {2, 6, 4, 8}; ++ poly16x4_t expected1 = vld1_p16 (exp1); ++ poly16x4_t expected2 = vld1_p16 (exp2); ++ ++ for (i = 0; i < 4; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++poly8x16_t ++test_vrev32qp8 (poly8x16_t _arg) ++{ ++ return vrev32q_p8 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ poly8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ poly8x16_t reversed = test_vrev32qp8 (inorder); ++ poly8x16_t expected = {4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13}; ++ ++ for (i = 0; i < 16; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev16q_s8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev16qs8.x" ++ ++/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++int32x2x2_t ++test_vzips32 (int32x2_t _a, int32x2_t _b) ++{ ++ return vzip_s32 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int32_t first[] = {1, 2}; ++ int32_t second[] = {3, 4}; ++ int32x2x2_t result = test_vzips32 (vld1_s32 (first), vld1_s32 (second)); ++ int32x2_t res1 = result.val[0], res2 = result.val[1]; ++ int32_t exp1[] = {1, 3}; ++ int32_t exp2[] = {2, 4}; ++ int32x2_t expected1 = vld1_s32 (exp1); ++ int32x2_t expected2 = vld1_s32 (exp2); ++ ++ for (i = 0; i < 2; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev64_u32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev64u32.x" ++ ++/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.2s, ?v\[0-9\]+.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++poly8x16_t ++test_vrev16qp8 (poly8x16_t _arg) ++{ ++ return vrev16q_p8 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ poly8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ poly8x16_t reversed = test_vrev16qp8 (inorder); ++ poly8x16_t expected = {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15}; ++ ++ for (i = 0; i < 16; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++uint32x2x2_t ++test_vzipu32 (uint32x2_t _a, uint32x2_t _b) ++{ ++ return vzip_u32 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint32_t first[] = {1, 2}; ++ uint32_t second[] = {3, 4}; ++ uint32x2x2_t result = test_vzipu32 (vld1_u32 (first), vld1_u32 (second)); ++ uint32x2_t res1 = result.val[0], res2 = result.val[1]; ++ uint32_t exp1[] = {1, 3}; ++ uint32_t exp2[] = {2, 4}; ++ uint32x2_t expected1 = vld1_u32 (exp1); ++ uint32x2_t expected2 = vld1_u32 (exp2); ++ ++ for (i = 0; i < 2; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++float32x4x2_t ++test_vtrnqf32 (float32x4_t _a, float32x4_t _b) ++{ ++ return vtrnq_f32 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ float32_t first[] = {1, 2, 3, 4}; ++ float32_t second[] = {5, 6, 7, 8}; ++ float32x4x2_t result = test_vtrnqf32 (vld1q_f32 (first), vld1q_f32 (second)); ++ float32x4_t res1 = result.val[0], res2 = result.val[1]; ++ float32_t exp1[] = {1, 5, 3, 7}; ++ float32_t exp2[] = {2, 6, 4, 8}; ++ float32x4_t expected1 = vld1q_f32 (exp1); ++ float32x4_t expected2 = vld1q_f32 (exp2); ++ ++ for (i = 0; i < 4; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8.x +@@ -0,0 +1,28 @@ ++extern void abort (void); ++ ++int8x16x2_t ++test_vtrnqs8 (int8x16_t _a, int8x16_t _b) ++{ ++ return vtrnq_s8 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ int8_t second[] = ++ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ int8x16x2_t result = test_vtrnqs8 (vld1q_s8 (first), vld1q_s8 (second)); ++ int8x16_t res1 = result.val[0], res2 = result.val[1]; ++ int8_t exp1[] = {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}; ++ int8_t exp2[] = {2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30, 16, 32}; ++ int8x16_t expected1 = vld1q_s8 (exp1); ++ int8x16_t expected2 = vld1q_s8 (exp2); ++ ++ for (i = 0; i < 16; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vexts64' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++ ++#include "arm_neon.h" ++#include "ext_s64.x" ++ ++/* Do not scan-assembler. An EXT instruction could be emitted, but would merely ++ return its first argument, so it is legitimate to optimize it out. */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vuzp_s32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vuzps32.x" ++ ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++float32x4_t ++test_vrev64qf32 (float32x4_t _arg) ++{ ++ return vrev64q_f32 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ float32x4_t inorder = {1, 2, 3, 4}; ++ float32x4_t reversed = test_vrev64qf32 (inorder); ++ float32x4_t expected = {2, 1, 4, 3}; ++ ++ for (i = 0; i < 4; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16.x +@@ -0,0 +1,58 @@ ++extern void abort (void); ++ ++int16x4_t ++test_vext_s16_1 (int16x4_t a, int16x4_t b) ++{ ++ return vext_s16 (a, b, 1); ++} ++ ++int16x4_t ++test_vext_s16_2 (int16x4_t a, int16x4_t b) ++{ ++ return vext_s16 (a, b, 2); ++} ++ ++int16x4_t ++test_vext_s16_3 (int16x4_t a, int16x4_t b) ++{ ++ return vext_s16 (a, b, 3); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i, off; ++ int16_t arr1[] = {0, 1, 2, 3}; ++ int16x4_t in1 = vld1_s16 (arr1); ++ int16_t arr2[] = {4, 5, 6, 7}; ++ int16x4_t in2 = vld1_s16 (arr2); ++ int16_t exp[4]; ++ int16x4_t expected; ++ int16x4_t actual = test_vext_s16_1 (in1, in2); ++ ++ for (i = 0; i < 4; i++) ++ exp[i] = i + 1; ++ expected = vld1_s16 (exp); ++ for (i = 0; i < 4; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vext_s16_2 (in1, in2); ++ for (i = 0; i < 4; i++) ++ exp[i] = i + 2; ++ expected = vld1_s16 (exp); ++ for (i = 0; i < 4; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vext_s16_3 (in1, in2); ++ for (i = 0; i < 4; i++) ++ exp[i] = i + 3; ++ expected = vld1_s16 (exp); ++ for (i = 0; i < 4; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16.x +@@ -0,0 +1,58 @@ ++extern void abort (void); ++ ++uint16x4_t ++test_vext_u16_1 (uint16x4_t a, uint16x4_t b) ++{ ++ return vext_u16 (a, b, 1); ++} ++ ++uint16x4_t ++test_vext_u16_2 (uint16x4_t a, uint16x4_t b) ++{ ++ return vext_u16 (a, b, 2); ++} ++ ++uint16x4_t ++test_vext_u16_3 (uint16x4_t a, uint16x4_t b) ++{ ++ return vext_u16 (a, b, 3); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i, off; ++ uint16_t arr1[] = {0, 1, 2, 3}; ++ uint16x4_t in1 = vld1_u16 (arr1); ++ uint16_t arr2[] = {4, 5, 6, 7}; ++ uint16x4_t in2 = vld1_u16 (arr2); ++ uint16_t exp[4]; ++ uint16x4_t expected; ++ uint16x4_t actual = test_vext_u16_1 (in1, in2); ++ ++ for (i = 0; i < 4; i++) ++ exp[i] = i + 1; ++ expected = vld1_u16 (exp); ++ for (i = 0; i < 4; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vext_u16_2 (in1, in2); ++ for (i = 0; i < 4; i++) ++ exp[i] = i + 2; ++ expected = vld1_u16 (exp); ++ for (i = 0; i < 4; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vext_u16_3 (in1, in2); ++ for (i = 0; i < 4; i++) ++ exp[i] = i + 3; ++ expected = vld1_u16 (exp); ++ for (i = 0; i < 4; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vzipq_s32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vzipqs32.x" ++ ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++poly8x16x2_t ++test_vuzpqp8 (poly8x16_t _a, poly8x16_t _b) ++{ ++ return vuzpq_p8 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ poly8_t second[] = ++ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ poly8x16x2_t result = test_vuzpqp8 (vld1q_p8 (first), vld1q_p8 (second)); ++ poly8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}; ++ poly8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32}; ++ poly8x16_t expect1 = vld1q_p8 (exp1); ++ poly8x16_t expect2 = vld1q_p8 (exp2); ++ ++ for (i = 0; i < 16; i++) ++ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vuzpq_u8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vuzpqu8.x" ++ ++/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vzip_s8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vzips8.x" ++ ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++poly8x8_t ++test_vrev32p8 (poly8x8_t _arg) ++{ ++ return vrev32_p8 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ poly8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; ++ poly8x8_t reversed = test_vrev32p8 (inorder); ++ poly8x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5}; ++ ++ for (i = 0; i < 8; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++int8x8_t ++test_vrev64s8 (int8x8_t _arg) ++{ ++ return vrev64_s8 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; ++ int8x8_t reversed = test_vrev64s8 (inorder); ++ int8x8_t expected = {8, 7, 6, 5, 4, 3, 2, 1}; ++ ++ for (i = 0; i < 8; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vuzp_p8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vuzpp8.x" ++ ++/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32.x +@@ -0,0 +1,58 @@ ++extern void abort (void); ++ ++int32x4_t ++test_vextq_s32_1 (int32x4_t a, int32x4_t b) ++{ ++ return vextq_s32 (a, b, 1); ++} ++ ++int32x4_t ++test_vextq_s32_2 (int32x4_t a, int32x4_t b) ++{ ++ return vextq_s32 (a, b, 2); ++} ++ ++int32x4_t ++test_vextq_s32_3 (int32x4_t a, int32x4_t b) ++{ ++ return vextq_s32 (a, b, 3); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i, off; ++ int32_t arr1[] = {0, 1, 2, 3}; ++ int32x4_t in1 = vld1q_s32 (arr1); ++ int32_t arr2[] = {4, 5, 6, 7}; ++ int32x4_t in2 = vld1q_s32 (arr2); ++ int32_t exp[4]; ++ int32x4_t expected; ++ int32x4_t actual = test_vextq_s32_1 (in1, in2); ++ ++ for (i = 0; i < 4; i++) ++ exp[i] = i + 1; ++ expected = vld1q_s32 (exp); ++ for (i = 0; i < 4; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_s32_2 (in1, in2); ++ for (i = 0; i < 4; i++) ++ exp[i] = i + 2; ++ expected = vld1q_s32 (exp); ++ for (i = 0; i < 4; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_s32_3 (in1, in2); ++ for (i = 0; i < 4; i++) ++ exp[i] = i + 3; ++ expected = vld1q_s32 (exp); ++ for (i = 0; i < 4; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32.x +@@ -0,0 +1,58 @@ ++extern void abort (void); ++ ++uint32x4_t ++test_vextq_u32_1 (uint32x4_t a, uint32x4_t b) ++{ ++ return vextq_u32 (a, b, 1); ++} ++ ++uint32x4_t ++test_vextq_u32_2 (uint32x4_t a, uint32x4_t b) ++{ ++ return vextq_u32 (a, b, 2); ++} ++ ++uint32x4_t ++test_vextq_u32_3 (uint32x4_t a, uint32x4_t b) ++{ ++ return vextq_u32 (a, b, 3); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i, off; ++ uint32_t arr1[] = {0, 1, 2, 3}; ++ uint32x4_t in1 = vld1q_u32 (arr1); ++ uint32_t arr2[] = {4, 5, 6, 7}; ++ uint32x4_t in2 = vld1q_u32 (arr2); ++ uint32_t exp[4]; ++ uint32x4_t expected; ++ uint32x4_t actual = test_vextq_u32_1 (in1, in2); ++ ++ for (i = 0; i < 4; i++) ++ exp[i] = i + 1; ++ expected = vld1q_u32 (exp); ++ for (i = 0; i < 4; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_u32_2 (in1, in2); ++ for (i = 0; i < 4; i++) ++ exp[i] = i + 2; ++ expected = vld1q_u32 (exp); ++ for (i = 0; i < 4; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ actual = test_vextq_u32_3 (in1, in2); ++ for (i = 0; i < 4; i++) ++ exp[i] = i + 3; ++ expected = vld1q_u32 (exp); ++ for (i = 0; i < 4; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vextQu64' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++ ++#include "arm_neon.h" ++#include "extq_u64.x" ++ ++/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.8\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vzip_p16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vzipp16.x" ++ ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vtrn_s32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vtrns32.x" ++ ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev16q_p8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev16qp8.x" ++ ++/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32.x +@@ -0,0 +1,26 @@ ++extern void abort (void); ++ ++int32x4x2_t ++test_vuzpqs32 (int32x4_t _a, int32x4_t _b) ++{ ++ return vuzpq_s32 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int32_t first[] = {1, 2, 3, 4}; ++ int32_t second[] = {5, 6, 7, 8}; ++ int32x4x2_t result = test_vuzpqs32 (vld1q_s32 (first), vld1q_s32 (second)); ++ int32_t exp1[] = {1, 3, 5, 7}; ++ int32_t exp2[] = {2, 4, 6, 8}; ++ int32x4_t expect1 = vld1q_s32 (exp1); ++ int32x4_t expect2 = vld1q_s32 (exp2); ++ ++ for (i = 0; i < 4; i++) ++ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vzip_u32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vzipu32.x" ++ ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++poly16x4_t ++test_vrev32p16 (poly16x4_t _arg) ++{ ++ return vrev32_p16 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ poly16x4_t inorder = {1, 2, 3, 4}; ++ poly16x4_t reversed = test_vrev32p16 (inorder); ++ poly16x4_t expected = {2, 1, 4, 3}; ++ ++ for (i = 0; i < 4; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32.x +@@ -0,0 +1,26 @@ ++extern void abort (void); ++ ++uint32x4x2_t ++test_vuzpqu32 (uint32x4_t _a, uint32x4_t _b) ++{ ++ return vuzpq_u32 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint32_t first[] = {1, 2, 3, 4}; ++ uint32_t second[] = {5, 6, 7, 8}; ++ uint32x4x2_t result = test_vuzpqu32 (vld1q_u32 (first), vld1q_u32 (second)); ++ uint32_t exp1[] = {1, 3, 5, 7}; ++ uint32_t exp2[] = {2, 4, 6, 8}; ++ uint32x4_t expect1 = vld1q_u32 (exp1); ++ uint32x4_t expect2 = vld1q_u32 (exp2); ++ ++ for (i = 0; i < 4; i++) ++ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vexts32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++ ++#include "arm_neon.h" ++#include "ext_s32.x" ++ ++/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vtrnq_u8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vtrnqu8.x" ++ ++/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++int8x16_t ++test_vrev32qs8 (int8x16_t _arg) ++{ ++ return vrev32q_s8 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ int8x16_t reversed = test_vrev32qs8 (inorder); ++ int8x16_t expected = {4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13}; ++ ++ for (i = 0; i < 16; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++int8x16_t ++test_vrev16qs8 (int8x16_t _arg) ++{ ++ return vrev16q_s8 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ int8x16_t reversed = test_vrev16qs8 (inorder); ++ int8x16_t expected = {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15}; ++ ++ for (i = 0; i < 16; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++int16x4_t ++test_vrev64s16 (int16x4_t _arg) ++{ ++ return vrev64_s16 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int16x4_t inorder = {1, 2, 3, 4}; ++ int16x4_t reversed = test_vrev64s16 (inorder); ++ int16x4_t expected = {4, 3, 2, 1}; ++ ++ for (i = 0; i < 4; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vextQs8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++ ++#include "arm_neon.h" ++#include "extq_s8.x" ++ ++/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#?\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++uint16x4_t ++test_vrev64u16 (uint16x4_t _arg) ++{ ++ return vrev64_u16 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint16x4_t inorder = {1, 2, 3, 4}; ++ uint16x4_t reversed = test_vrev64u16 (inorder); ++ uint16x4_t expected = {4, 3, 2, 1}; ++ ++ for (i = 0; i < 4; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16.x +@@ -0,0 +1,26 @@ ++extern void abort (void); ++ ++poly16x4x2_t ++test_vuzpp16 (poly16x4_t _a, poly16x4_t _b) ++{ ++ return vuzp_p16 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ poly16_t first[] = {1, 2, 3, 4}; ++ poly16_t second[] = {5, 6, 7, 8}; ++ poly16x4x2_t result = test_vuzpp16 (vld1_p16 (first), vld1_p16 (second)); ++ poly16_t exp1[] = {1, 3, 5, 7}; ++ poly16_t exp2[] = {2, 4, 6, 8}; ++ poly16x4_t expect1 = vld1_p16 (exp1); ++ poly16x4_t expect2 = vld1_p16 (exp2); ++ ++ for (i = 0; i < 4; i++) ++ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vuzpq_f32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vuzpqf32.x" ++ ++/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vzip_p8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vzipp8.x" ++ ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vtrnq_p16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vtrnqp16.x" ++ ++/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++poly16x8_t ++test_vrev32qp16 (poly16x8_t _arg) ++{ ++ return vrev32q_p16 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ poly16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; ++ poly16x8_t reversed = test_vrev32qp16 (inorder); ++ poly16x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7}; ++ ++ for (i = 0; i < 8; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vtrnq_u32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vtrnqu32.x" ++ ++/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++int8x16x2_t ++test_vuzpqs8 (int8x16_t _a, int8x16_t _b) ++{ ++ return vuzpq_s8 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; ++ int8_t second[] = ++ {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; ++ int8x16x2_t result = test_vuzpqs8 (vld1q_s8 (first), vld1q_s8 (second)); ++ int8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}; ++ int8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32}; ++ int8x16_t expect1 = vld1q_s8 (exp1); ++ int8x16_t expect2 = vld1q_s8 (exp2); ++ ++ for (i = 0; i < 16; i++) ++ if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++int32x4x2_t ++test_vzipqs32 (int32x4_t _a, int32x4_t _b) ++{ ++ return vzipq_s32 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int32_t first[] = {1, 2, 3, 4}; ++ int32_t second[] = {5, 6, 7, 8}; ++ int32x4x2_t result = test_vzipqs32 (vld1q_s32 (first), vld1q_s32 (second)); ++ int32x4_t res1 = result.val[0], res2 = result.val[1]; ++ int32_t exp1[] = {1, 5, 2, 6}; ++ int32_t exp2[] = {3, 7, 4, 8}; ++ int32x4_t expected1 = vld1q_s32 (exp1); ++ int32x4_t expected2 = vld1q_s32 (exp2); ++ ++ for (i = 0; i < 4; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev64q_s16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev64qs16.x" ++ ++/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++int8x8_t ++test_vrev32s8 (int8x8_t _arg) ++{ ++ return vrev32_s8 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; ++ int8x8_t reversed = test_vrev32s8 (inorder); ++ int8x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5}; ++ ++ for (i = 0; i < 8; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vextQp16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++ ++#include "arm_neon.h" ++#include "extq_p16.x" ++ ++/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++uint32x4x2_t ++test_vzipqu32 (uint32x4_t _a, uint32x4_t _b) ++{ ++ return vzipq_u32 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint32_t first[] = {1, 2, 3, 4}; ++ uint32_t second[] = {5, 6, 7, 8}; ++ uint32x4x2_t result = test_vzipqu32 (vld1q_u32 (first), vld1q_u32 (second)); ++ uint32x4_t res1 = result.val[0], res2 = result.val[1]; ++ uint32_t exp1[] = {1, 5, 2, 6}; ++ uint32_t exp2[] = {3, 7, 4, 8}; ++ uint32x4_t expected1 = vld1q_u32 (exp1); ++ uint32x4_t expected2 = vld1q_u32 (exp2); ++ ++ for (i = 0; i < 4; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vextQu32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++ ++#include "arm_neon.h" ++#include "extq_u32.x" ++ ++/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev32_p16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev32p16.x" ++ ++/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32.x +@@ -0,0 +1,30 @@ ++extern void abort (void); ++ ++float32x2_t ++test_vext_f32_1 (float32x2_t a, float32x2_t b) ++{ ++ return vext_f32 (a, b, 1); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i, off; ++ float32_t arr1[] = {0, 1}; ++ float32x2_t in1 = vld1_f32 (arr1); ++ float32_t arr2[] = {2, 3}; ++ float32x2_t in2 = vld1_f32 (arr2); ++ float32_t exp[2]; ++ float32x2_t expected; ++ float32x2_t actual = test_vext_f32_1 (in1, in2); ++ ++ for (i = 0; i < 2; i++) ++ exp[i] = i + 1; ++ expected = vld1_f32 (exp); ++ for (i = 0; i < 2; i++) ++ if (actual[i] != expected[i]) ++ abort (); ++ ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f64_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f64_1.c +@@ -0,0 +1,25 @@ ++/* Test the `vextf64' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++ ++#include "arm_neon.h" ++ ++extern void abort (void); ++ ++int ++main (int argc, char **argv) ++{ ++ int i, off; ++ float64x1_t in1 = {0}; ++ float64x1_t in2 = {1}; ++ float64x1_t actual = vext_f64 (in1, in2, 0); ++ if (actual != in1) ++ abort (); ++ ++ return 0; ++} ++ ++/* Do not scan-assembler. An EXT instruction could be emitted, but would merely ++ return its first argument, so it is legitimate to optimize it out. */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vuzp_f32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vuzpf32.x" ++ ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vuzpq_u16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vuzpqu16.x" ++ ++/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vuzp_u8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vuzpu8.x" ++ ++/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vzipq_f32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vzipqf32.x" ++ ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev64_s16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev64s16.x" ++ ++/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++int32x2x2_t ++test_vtrns32 (int32x2_t _a, int32x2_t _b) ++{ ++ return vtrn_s32 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int32_t first[] = {1, 2}; ++ int32_t second[] = {3, 4}; ++ int32x2x2_t result = test_vtrns32 (vld1_s32 (first), vld1_s32 (second)); ++ int32x2_t res1 = result.val[0], res2 = result.val[1]; ++ int32_t exp1[] = {1, 3}; ++ int32_t exp2[] = {2, 4}; ++ int32x2_t expected1 = vld1_s32 (exp1); ++ int32x2_t expected2 = vld1_s32 (exp2); ++ ++ for (i = 0; i < 2; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev16q_u8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev16qu8.x" ++ ++/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++int16x4x2_t ++test_vzips16 (int16x4_t _a, int16x4_t _b) ++{ ++ return vzip_s16 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ int16_t first[] = {1, 2, 3, 4}; ++ int16_t second[] = {5, 6, 7, 8}; ++ int16x4x2_t result = test_vzips16 (vld1_s16 (first), vld1_s16 (second)); ++ int16x4_t res1 = result.val[0], res2 = result.val[1]; ++ int16_t exp1[] = {1, 5, 2, 6}; ++ int16_t exp2[] = {3, 7, 4, 8}; ++ int16x4_t expected1 = vld1_s16 (exp1); ++ int16x4_t expected2 = vld1_s16 (exp2); ++ ++ for (i = 0; i < 4; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev64q_s8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev64qs8.x" ++ ++/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vextQp8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++ ++#include "arm_neon.h" ++#include "extq_p8.x" ++ ++/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#?\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++uint32x2x2_t ++test_vtrnu32 (uint32x2_t _a, uint32x2_t _b) ++{ ++ return vtrn_u32 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint32_t first[] = {1, 2}; ++ uint32_t second[] = {3, 4}; ++ uint32x2x2_t result = test_vtrnu32 (vld1_u32 (first), vld1_u32 (second)); ++ uint32x2_t res1 = result.val[0], res2 = result.val[1]; ++ uint32_t exp1[] = {1, 3}; ++ uint32_t exp2[] = {2, 4}; ++ uint32x2_t expected1 = vld1_u32 (exp1); ++ uint32x2_t expected2 = vld1_u32 (exp2); ++ ++ for (i = 0; i < 2; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++uint16x4x2_t ++test_vzipu16 (uint16x4_t _a, uint16x4_t _b) ++{ ++ return vzip_u16 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ uint16_t first[] = {1, 2, 3, 4}; ++ uint16_t second[] = {5, 6, 7, 8}; ++ uint16x4x2_t result = test_vzipu16 (vld1_u16 (first), vld1_u16 (second)); ++ uint16x4_t res1 = result.val[0], res2 = result.val[1]; ++ uint16_t exp1[] = {1, 5, 2, 6}; ++ uint16_t exp2[] = {3, 7, 4, 8}; ++ uint16x4_t expected1 = vld1_u16 (exp1); ++ uint16x4_t expected2 = vld1_u16 (exp2); ++ ++ for (i = 0; i < 4; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vuzp_u16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vuzpu16.x" ++ ++/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev32_s8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev32s8.x" ++ ++/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vtrn_f32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vtrnf32.x" ++ ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vrev32q_u16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vrev32qu16.x" ++ ++/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vzipq_u16' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vzipqu16.x" ++ ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8_1.c +@@ -0,0 +1,11 @@ ++/* Test the `vzip_u8' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -fno-inline" } */ ++ ++#include ++#include "vzipu8.x" ++ ++/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16.x +@@ -0,0 +1,27 @@ ++extern void abort (void); ++ ++poly16x8x2_t ++test_vtrnqp16 (poly16x8_t _a, poly16x8_t _b) ++{ ++ return vtrnq_p16 (_a, _b); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ poly16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; ++ poly16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; ++ poly16x8x2_t result = test_vtrnqp16 (vld1q_p16 (first), vld1q_p16 (second)); ++ poly16x8_t res1 = result.val[0], res2 = result.val[1]; ++ poly16_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15}; ++ poly16_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16}; ++ poly16x8_t expected1 = vld1q_p16 (exp1); ++ poly16x8_t expected2 = vld1q_p16 (exp2); ++ ++ for (i = 0; i < 8; i++) ++ if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++poly16x8_t ++test_vrev64qp16 (poly16x8_t _arg) ++{ ++ return vrev64q_p16 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ poly16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; ++ poly16x8_t reversed = test_vrev64qp16 (inorder); ++ poly16x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5}; ++ ++ for (i = 0; i < 8; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32_1.c +@@ -0,0 +1,10 @@ ++/* Test the `vextf32' AArch64 SIMD intrinsic. */ ++ ++/* { dg-do run } */ ++/* { dg-options "-save-temps -O3 -fno-inline" } */ ++ ++#include "arm_neon.h" ++#include "ext_f32.x" ++ ++/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32.x ++++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32.x +@@ -0,0 +1,22 @@ ++extern void abort (void); ++ ++float32x2_t ++test_vrev64f32 (float32x2_t _arg) ++{ ++ return vrev64_f32 (_arg); ++} ++ ++int ++main (int argc, char **argv) ++{ ++ int i; ++ float32x2_t inorder = {1, 2}; ++ float32x2_t reversed = test_vrev64f32 (inorder); ++ float32x2_t expected = {2, 1}; ++ ++ for (i = 0; i < 2; i++) ++ if (reversed[i] != expected[i]) ++ abort (); ++ return 0; ++} ++ +--- a/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c +@@ -0,0 +1,430 @@ ++/* Test vdup_lane intrinsics work correctly. */ ++/* { dg-do run } */ ++/* { dg-options "--save-temps -O1" } */ ++ ++#include ++ ++extern void abort (void); ++ ++float32x2_t __attribute__ ((noinline)) ++wrap_vdup_lane_f32_0 (float32x2_t a) ++{ ++ return vdup_lane_f32 (a, 0); ++} ++ ++float32x2_t __attribute__ ((noinline)) ++wrap_vdup_lane_f32_1 (float32x2_t a) ++{ ++ return vdup_lane_f32 (a, 1); ++} ++ ++int __attribute__ ((noinline)) ++test_vdup_lane_f32 () ++{ ++ float32x2_t a; ++ float32x2_t b; ++ int i; ++ float32_t c[2] = { 0.0 , 3.14 }; ++ float32_t d[2]; ++ ++ a = vld1_f32 (c); ++ b = wrap_vdup_lane_f32_0 (a); ++ vst1_f32 (d, b); ++ for (i = 0; i < 2; i++) ++ if (c[0] != d[i]) ++ return 1; ++ ++ b = wrap_vdup_lane_f32_1 (a); ++ vst1_f32 (d, b); ++ for (i = 0; i < 2; i++) ++ if (c[1] != d[i]) ++ return 1; ++ return 0; ++} ++ ++float32x4_t __attribute__ ((noinline)) ++wrap_vdupq_lane_f32_0 (float32x2_t a) ++{ ++ return vdupq_lane_f32 (a, 0); ++} ++ ++float32x4_t __attribute__ ((noinline)) ++wrap_vdupq_lane_f32_1 (float32x2_t a) ++{ ++ return vdupq_lane_f32 (a, 1); ++} ++ ++int __attribute__ ((noinline)) ++test_vdupq_lane_f32 () ++{ ++ float32x2_t a; ++ float32x4_t b; ++ int i; ++ float32_t c[2] = { 0.0 , 3.14 }; ++ float32_t d[4]; ++ ++ a = vld1_f32 (c); ++ b = wrap_vdupq_lane_f32_0 (a); ++ vst1q_f32 (d, b); ++ for (i = 0; i < 4; i++) ++ if (c[0] != d[i]) ++ return 1; ++ ++ b = wrap_vdupq_lane_f32_1 (a); ++ vst1q_f32 (d, b); ++ for (i = 0; i < 4; i++) ++ if (c[1] != d[i]) ++ return 1; ++ return 0; ++} ++ ++int8x8_t __attribute__ ((noinline)) ++wrap_vdup_lane_s8_0 (int8x8_t a) ++{ ++ return vdup_lane_s8 (a, 0); ++} ++ ++int8x8_t __attribute__ ((noinline)) ++wrap_vdup_lane_s8_1 (int8x8_t a) ++{ ++ return vdup_lane_s8 (a, 1); ++} ++ ++int __attribute__ ((noinline)) ++test_vdup_lane_s8 () ++{ ++ int8x8_t a; ++ int8x8_t b; ++ int i; ++ /* Only two first cases are interesting. */ ++ int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; ++ int8_t d[8]; ++ ++ a = vld1_s8 (c); ++ b = wrap_vdup_lane_s8_0 (a); ++ vst1_s8 (d, b); ++ for (i = 0; i < 8; i++) ++ if (c[0] != d[i]) ++ return 1; ++ ++ b = wrap_vdup_lane_s8_1 (a); ++ vst1_s8 (d, b); ++ for (i = 0; i < 8; i++) ++ if (c[1] != d[i]) ++ return 1; ++ return 0; ++} ++ ++int8x16_t __attribute__ ((noinline)) ++wrap_vdupq_lane_s8_0 (int8x8_t a) ++{ ++ return vdupq_lane_s8 (a, 0); ++} ++ ++int8x16_t __attribute__ ((noinline)) ++wrap_vdupq_lane_s8_1 (int8x8_t a) ++{ ++ return vdupq_lane_s8 (a, 1); ++} ++ ++int __attribute__ ((noinline)) ++test_vdupq_lane_s8 () ++{ ++ int8x8_t a; ++ int8x16_t b; ++ int i; ++ /* Only two first cases are interesting. */ ++ int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; ++ int8_t d[16]; ++ ++ a = vld1_s8 (c); ++ b = wrap_vdupq_lane_s8_0 (a); ++ vst1q_s8 (d, b); ++ for (i = 0; i < 16; i++) ++ if (c[0] != d[i]) ++ return 1; ++ ++ b = wrap_vdupq_lane_s8_1 (a); ++ vst1q_s8 (d, b); ++ for (i = 0; i < 16; i++) ++ if (c[1] != d[i]) ++ return 1; ++ return 0; ++} ++ ++int16x4_t __attribute__ ((noinline)) ++wrap_vdup_lane_s16_0 (int16x4_t a) ++{ ++ return vdup_lane_s16 (a, 0); ++} ++ ++int16x4_t __attribute__ ((noinline)) ++wrap_vdup_lane_s16_1 (int16x4_t a) ++{ ++ return vdup_lane_s16 (a, 1); ++} ++ ++int __attribute__ ((noinline)) ++test_vdup_lane_s16 () ++{ ++ int16x4_t a; ++ int16x4_t b; ++ int i; ++ /* Only two first cases are interesting. */ ++ int16_t c[4] = { 0, 1, 2, 3 }; ++ int16_t d[4]; ++ ++ a = vld1_s16 (c); ++ b = wrap_vdup_lane_s16_0 (a); ++ vst1_s16 (d, b); ++ for (i = 0; i < 4; i++) ++ if (c[0] != d[i]) ++ return 1; ++ ++ b = wrap_vdup_lane_s16_1 (a); ++ vst1_s16 (d, b); ++ for (i = 0; i < 4; i++) ++ if (c[1] != d[i]) ++ return 1; ++ return 0; ++} ++ ++int16x8_t __attribute__ ((noinline)) ++wrap_vdupq_lane_s16_0 (int16x4_t a) ++{ ++ return vdupq_lane_s16 (a, 0); ++} ++ ++int16x8_t __attribute__ ((noinline)) ++wrap_vdupq_lane_s16_1 (int16x4_t a) ++{ ++ return vdupq_lane_s16 (a, 1); ++} ++ ++int __attribute__ ((noinline)) ++test_vdupq_lane_s16 () ++{ ++ int16x4_t a; ++ int16x8_t b; ++ int i; ++ /* Only two first cases are interesting. */ ++ int16_t c[4] = { 0, 1, 2, 3 }; ++ int16_t d[8]; ++ ++ a = vld1_s16 (c); ++ b = wrap_vdupq_lane_s16_0 (a); ++ vst1q_s16 (d, b); ++ for (i = 0; i < 8; i++) ++ if (c[0] != d[i]) ++ return 1; ++ ++ b = wrap_vdupq_lane_s16_1 (a); ++ vst1q_s16 (d, b); ++ for (i = 0; i < 8; i++) ++ if (c[1] != d[i]) ++ return 1; ++ return 0; ++} ++ ++int32x2_t __attribute__ ((noinline)) ++wrap_vdup_lane_s32_0 (int32x2_t a) ++{ ++ return vdup_lane_s32 (a, 0); ++} ++ ++int32x2_t __attribute__ ((noinline)) ++wrap_vdup_lane_s32_1 (int32x2_t a) ++{ ++ return vdup_lane_s32 (a, 1); ++} ++ ++int __attribute__ ((noinline)) ++test_vdup_lane_s32 () ++{ ++ int32x2_t a; ++ int32x2_t b; ++ int i; ++ int32_t c[2] = { 0, 1 }; ++ int32_t d[2]; ++ ++ a = vld1_s32 (c); ++ b = wrap_vdup_lane_s32_0 (a); ++ vst1_s32 (d, b); ++ for (i = 0; i < 2; i++) ++ if (c[0] != d[i]) ++ return 1; ++ ++ b = wrap_vdup_lane_s32_1 (a); ++ vst1_s32 (d, b); ++ for (i = 0; i < 2; i++) ++ if (c[1] != d[i]) ++ return 1; ++ return 0; ++} ++ ++int32x4_t __attribute__ ((noinline)) ++wrap_vdupq_lane_s32_0 (int32x2_t a) ++{ ++ return vdupq_lane_s32 (a, 0); ++} ++ ++int32x4_t __attribute__ ((noinline)) ++wrap_vdupq_lane_s32_1 (int32x2_t a) ++{ ++ return vdupq_lane_s32 (a, 1); ++} ++ ++int __attribute__ ((noinline)) ++test_vdupq_lane_s32 () ++{ ++ int32x2_t a; ++ int32x4_t b; ++ int i; ++ int32_t c[2] = { 0, 1 }; ++ int32_t d[4]; ++ ++ a = vld1_s32 (c); ++ b = wrap_vdupq_lane_s32_0 (a); ++ vst1q_s32 (d, b); ++ for (i = 0; i < 4; i++) ++ if (c[0] != d[i]) ++ return 1; ++ ++ b = wrap_vdupq_lane_s32_1 (a); ++ vst1q_s32 (d, b); ++ for (i = 0; i < 4; i++) ++ if (c[1] != d[i]) ++ return 1; ++ return 0; ++} ++ ++int64x1_t __attribute__ ((noinline)) ++wrap_vdup_lane_s64_0 (int64x1_t a) ++{ ++ return vdup_lane_s64 (a, 0); ++} ++ ++int64x1_t __attribute__ ((noinline)) ++wrap_vdup_lane_s64_1 (int64x1_t a) ++{ ++ return vdup_lane_s64 (a, 1); ++} ++ ++int __attribute__ ((noinline)) ++test_vdup_lane_s64 () ++{ ++ int64x1_t a; ++ int64x1_t b; ++ int64_t c[1]; ++ int64_t d[1]; ++ ++ c[0] = 0; ++ a = vld1_s64 (c); ++ b = wrap_vdup_lane_s64_0 (a); ++ vst1_s64 (d, b); ++ if (c[0] != d[0]) ++ return 1; ++ ++ c[0] = 1; ++ a = vld1_s64 (c); ++ b = wrap_vdup_lane_s64_1 (a); ++ vst1_s64 (d, b); ++ if (c[0] != d[0]) ++ return 1; ++ return 0; ++} ++ ++int64x2_t __attribute__ ((noinline)) ++wrap_vdupq_lane_s64_0 (int64x1_t a) ++{ ++ return vdupq_lane_s64 (a, 0); ++} ++ ++int64x2_t __attribute__ ((noinline)) ++wrap_vdupq_lane_s64_1 (int64x1_t a) ++{ ++ return vdupq_lane_s64 (a, 1); ++} ++ ++int __attribute__ ((noinline)) ++test_vdupq_lane_s64 () ++{ ++ int64x1_t a; ++ int64x2_t b; ++ int i; ++ int64_t c[1]; ++ int64_t d[2]; ++ ++ c[0] = 0; ++ a = vld1_s64 (c); ++ b = wrap_vdupq_lane_s64_0 (a); ++ vst1q_s64 (d, b); ++ for (i = 0; i < 2; i++) ++ if (c[0] != d[i]) ++ return 1; ++ ++ c[0] = 1; ++ a = vld1_s64 (c); ++ b = wrap_vdupq_lane_s64_1 (a); ++ vst1q_s64 (d, b); ++ for (i = 0; i < 2; i++) ++ if (c[0] != d[i]) ++ return 1; ++ return 0; ++} ++ ++int ++main () ++{ ++ ++ if (test_vdup_lane_f32 ()) ++ abort (); ++ if (test_vdup_lane_s8 ()) ++ abort (); ++ if (test_vdup_lane_s16 ()) ++ abort (); ++ if (test_vdup_lane_s32 ()) ++ abort (); ++ if (test_vdup_lane_s64 ()) ++ abort (); ++ if (test_vdupq_lane_f32 ()) ++ abort (); ++ if (test_vdupq_lane_s8 ()) ++ abort (); ++ if (test_vdupq_lane_s16 ()) ++ abort (); ++ if (test_vdupq_lane_s32 ()) ++ abort (); ++ if (test_vdupq_lane_s64 ()) ++ abort (); ++ ++ return 0; ++} ++ ++/* Asm check for test_vdup_lane_s8. */ ++/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */ ++/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */ ++ ++/* Asm check for test_vdupq_lane_s8. */ ++/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */ ++/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */ ++ ++/* Asm check for test_vdup_lane_s16. */ ++/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */ ++/* Asm check for test_vdup_lane_s16. */ ++/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */ ++ ++/* Asm check for test_vdupq_lane_s16. */ ++/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */ ++/* Asm check for test_vdupq_lane_s16. */ ++/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */ ++ ++/* Asm check for test_vdup_lane_f32 and test_vdup_lane_s32. */ ++/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */ ++/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */ ++ ++/* Asm check for test_vdupq_lane_f32 and test_vdupq_lane_s32. */ ++/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */ ++/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */ ++ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c +@@ -0,0 +1,619 @@ ++/* Test vdup_lane intrinsics work correctly. */ ++/* { dg-do run } */ ++/* { dg-options "-O1 --save-temps" } */ ++ ++#include ++ ++extern void abort (void); ++ ++float32x2_t __attribute__ ((noinline)) ++wrap_vdup_n_f32 (float32_t a) ++{ ++ return vdup_n_f32 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdup_n_f32 () ++{ ++ float32_t a = 1.0; ++ float32x2_t b; ++ float32_t c[2]; ++ int i; ++ ++ b = wrap_vdup_n_f32 (a); ++ vst1_f32 (c, b); ++ for (i = 0; i < 2; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++float32x4_t __attribute__ ((noinline)) ++wrap_vdupq_n_f32 (float32_t a) ++{ ++ return vdupq_n_f32 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdupq_n_f32 () ++{ ++ float32_t a = 1.0; ++ float32x4_t b; ++ float32_t c[4]; ++ int i; ++ ++ b = wrap_vdupq_n_f32 (a); ++ vst1q_f32 (c, b); ++ for (i = 0; i < 4; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++float64x1_t __attribute__ ((noinline)) ++wrap_vdup_n_f64 (float64_t a) ++{ ++ return vdup_n_f64 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdup_n_f64 () ++{ ++ float64_t a = 1.0; ++ float64x1_t b; ++ float64_t c[1]; ++ int i; ++ ++ b = wrap_vdup_n_f64 (a); ++ vst1_f64 (c, b); ++ for (i = 0; i < 1; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++float64x2_t __attribute__ ((noinline)) ++wrap_vdupq_n_f64 (float64_t a) ++{ ++ return vdupq_n_f64 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdupq_n_f64 () ++{ ++ float64_t a = 1.0; ++ float64x2_t b; ++ float64_t c[2]; ++ int i; ++ ++ b = wrap_vdupq_n_f64 (a); ++ vst1q_f64 (c, b); ++ for (i = 0; i < 2; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++poly8x8_t __attribute__ ((noinline)) ++wrap_vdup_n_p8 (poly8_t a) ++{ ++ return vdup_n_p8 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdup_n_p8 () ++{ ++ poly8_t a = 1; ++ poly8x8_t b; ++ poly8_t c[8]; ++ int i; ++ ++ b = wrap_vdup_n_p8 (a); ++ vst1_p8 (c, b); ++ for (i = 0; i < 8; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++poly8x16_t __attribute__ ((noinline)) ++wrap_vdupq_n_p8 (poly8_t a) ++{ ++ return vdupq_n_p8 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdupq_n_p8 () ++{ ++ poly8_t a = 1; ++ poly8x16_t b; ++ poly8_t c[16]; ++ int i; ++ ++ b = wrap_vdupq_n_p8 (a); ++ vst1q_p8 (c, b); ++ for (i = 0; i < 16; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++int8x8_t __attribute__ ((noinline)) ++wrap_vdup_n_s8 (int8_t a) ++{ ++ return vdup_n_s8 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdup_n_s8 () ++{ ++ int8_t a = 1; ++ int8x8_t b; ++ int8_t c[8]; ++ int i; ++ ++ b = wrap_vdup_n_s8 (a); ++ vst1_s8 (c, b); ++ for (i = 0; i < 8; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++int8x16_t __attribute__ ((noinline)) ++wrap_vdupq_n_s8 (int8_t a) ++{ ++ return vdupq_n_s8 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdupq_n_s8 () ++{ ++ int8_t a = 1; ++ int8x16_t b; ++ int8_t c[16]; ++ int i; ++ ++ b = wrap_vdupq_n_s8 (a); ++ vst1q_s8 (c, b); ++ for (i = 0; i < 16; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++uint8x8_t __attribute__ ((noinline)) ++wrap_vdup_n_u8 (uint8_t a) ++{ ++ return vdup_n_u8 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdup_n_u8 () ++{ ++ uint8_t a = 1; ++ uint8x8_t b; ++ uint8_t c[8]; ++ int i; ++ ++ b = wrap_vdup_n_u8 (a); ++ vst1_u8 (c, b); ++ for (i = 0; i < 8; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++uint8x16_t __attribute__ ((noinline)) ++wrap_vdupq_n_u8 (uint8_t a) ++{ ++ return vdupq_n_u8 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdupq_n_u8 () ++{ ++ uint8_t a = 1; ++ uint8x16_t b; ++ uint8_t c[16]; ++ int i; ++ ++ b = wrap_vdupq_n_u8 (a); ++ vst1q_u8 (c, b); ++ for (i = 0; i < 16; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++poly16x4_t __attribute__ ((noinline)) ++wrap_vdup_n_p16 (poly16_t a) ++{ ++ return vdup_n_p16 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdup_n_p16 () ++{ ++ poly16_t a = 1; ++ poly16x4_t b; ++ poly16_t c[4]; ++ int i; ++ ++ b = wrap_vdup_n_p16 (a); ++ vst1_p16 (c, b); ++ for (i = 0; i < 4; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++poly16x8_t __attribute__ ((noinline)) ++wrap_vdupq_n_p16 (poly16_t a) ++{ ++ return vdupq_n_p16 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdupq_n_p16 () ++{ ++ poly16_t a = 1; ++ poly16x8_t b; ++ poly16_t c[8]; ++ int i; ++ ++ b = wrap_vdupq_n_p16 (a); ++ vst1q_p16 (c, b); ++ for (i = 0; i < 8; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++int16x4_t __attribute__ ((noinline)) ++wrap_vdup_n_s16 (int16_t a) ++{ ++ return vdup_n_s16 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdup_n_s16 () ++{ ++ int16_t a = 1; ++ int16x4_t b; ++ int16_t c[4]; ++ int i; ++ ++ b = wrap_vdup_n_s16 (a); ++ vst1_s16 (c, b); ++ for (i = 0; i < 4; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++int16x8_t __attribute__ ((noinline)) ++wrap_vdupq_n_s16 (int16_t a) ++{ ++ return vdupq_n_s16 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdupq_n_s16 () ++{ ++ int16_t a = 1; ++ int16x8_t b; ++ int16_t c[8]; ++ int i; ++ ++ b = wrap_vdupq_n_s16 (a); ++ vst1q_s16 (c, b); ++ for (i = 0; i < 8; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++uint16x4_t __attribute__ ((noinline)) ++wrap_vdup_n_u16 (uint16_t a) ++{ ++ return vdup_n_u16 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdup_n_u16 () ++{ ++ uint16_t a = 1; ++ uint16x4_t b; ++ uint16_t c[4]; ++ int i; ++ ++ b = wrap_vdup_n_u16 (a); ++ vst1_u16 (c, b); ++ for (i = 0; i < 4; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++uint16x8_t __attribute__ ((noinline)) ++wrap_vdupq_n_u16 (uint16_t a) ++{ ++ return vdupq_n_u16 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdupq_n_u16 () ++{ ++ uint16_t a = 1; ++ uint16x8_t b; ++ uint16_t c[8]; ++ int i; ++ ++ b = wrap_vdupq_n_u16 (a); ++ vst1q_u16 (c, b); ++ for (i = 0; i < 8; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++int32x2_t __attribute__ ((noinline)) ++wrap_vdup_n_s32 (int32_t a) ++{ ++ return vdup_n_s32 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdup_n_s32 () ++{ ++ int32_t a = 1; ++ int32x2_t b; ++ int32_t c[2]; ++ int i; ++ ++ b = wrap_vdup_n_s32 (a); ++ vst1_s32 (c, b); ++ for (i = 0; i < 2; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++int32x4_t __attribute__ ((noinline)) ++wrap_vdupq_n_s32 (int32_t a) ++{ ++ return vdupq_n_s32 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdupq_n_s32 () ++{ ++ int32_t a = 1; ++ int32x4_t b; ++ int32_t c[4]; ++ int i; ++ ++ b = wrap_vdupq_n_s32 (a); ++ vst1q_s32 (c, b); ++ for (i = 0; i < 4; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++uint32x2_t __attribute__ ((noinline)) ++wrap_vdup_n_u32 (uint32_t a) ++{ ++ return vdup_n_u32 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdup_n_u32 () ++{ ++ uint32_t a = 1; ++ uint32x2_t b; ++ uint32_t c[2]; ++ int i; ++ ++ b = wrap_vdup_n_u32 (a); ++ vst1_u32 (c, b); ++ for (i = 0; i < 2; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++uint32x4_t __attribute__ ((noinline)) ++wrap_vdupq_n_u32 (uint32_t a) ++{ ++ return vdupq_n_u32 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdupq_n_u32 () ++{ ++ uint32_t a = 1; ++ uint32x4_t b; ++ uint32_t c[4]; ++ int i; ++ ++ b = wrap_vdupq_n_u32 (a); ++ vst1q_u32 (c, b); ++ for (i = 0; i < 4; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++int64x1_t __attribute__ ((noinline)) ++wrap_vdup_n_s64 (int64_t a) ++{ ++ return vdup_n_s64 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdup_n_s64 () ++{ ++ int64_t a = 1; ++ int64x1_t b; ++ int64_t c[1]; ++ int i; ++ ++ b = wrap_vdup_n_s64 (a); ++ vst1_s64 (c, b); ++ for (i = 0; i < 1; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++int64x2_t __attribute__ ((noinline)) ++wrap_vdupq_n_s64 (int64_t a) ++{ ++ return vdupq_n_s64 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdupq_n_s64 () ++{ ++ int64_t a = 1; ++ int64x2_t b; ++ int64_t c[2]; ++ int i; ++ ++ b = wrap_vdupq_n_s64 (a); ++ vst1q_s64 (c, b); ++ for (i = 0; i < 2; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++uint64x1_t __attribute__ ((noinline)) ++wrap_vdup_n_u64 (uint64_t a) ++{ ++ return vdup_n_u64 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdup_n_u64 () ++{ ++ uint64_t a = 1; ++ uint64x1_t b; ++ uint64_t c[1]; ++ int i; ++ ++ b = wrap_vdup_n_u64 (a); ++ vst1_u64 (c, b); ++ for (i = 0; i < 1; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++uint64x2_t __attribute__ ((noinline)) ++wrap_vdupq_n_u64 (uint64_t a) ++{ ++ return vdupq_n_u64 (a); ++} ++ ++int __attribute__ ((noinline)) ++test_vdupq_n_u64 () ++{ ++ uint64_t a = 1; ++ uint64x2_t b; ++ uint64_t c[2]; ++ int i; ++ ++ b = wrap_vdupq_n_u64 (a); ++ vst1q_u64 (c, b); ++ for (i = 0; i < 2; i++) ++ if (a != c[i]) ++ return 1; ++ return 0; ++} ++ ++int ++main () ++{ ++ if (test_vdup_n_f32 ()) ++ abort (); ++ if (test_vdup_n_f64 ()) ++ abort (); ++ if (test_vdup_n_p8 ()) ++ abort (); ++ if (test_vdup_n_u8 ()) ++ abort (); ++ if (test_vdup_n_s8 ()) ++ abort (); ++ if (test_vdup_n_p16 ()) ++ abort (); ++ if (test_vdup_n_s16 ()) ++ abort (); ++ if (test_vdup_n_u16 ()) ++ abort (); ++ if (test_vdup_n_s32 ()) ++ abort (); ++ if (test_vdup_n_u32 ()) ++ abort (); ++ if (test_vdup_n_s64 ()) ++ abort (); ++ if (test_vdup_n_u64 ()) ++ abort (); ++ if (test_vdupq_n_f32 ()) ++ abort (); ++ if (test_vdupq_n_f64 ()) ++ abort (); ++ if (test_vdupq_n_p8 ()) ++ abort (); ++ if (test_vdupq_n_u8 ()) ++ abort (); ++ if (test_vdupq_n_s8 ()) ++ abort (); ++ if (test_vdupq_n_p16 ()) ++ abort (); ++ if (test_vdupq_n_s16 ()) ++ abort (); ++ if (test_vdupq_n_u16 ()) ++ abort (); ++ if (test_vdupq_n_s32 ()) ++ abort (); ++ if (test_vdupq_n_u32 ()) ++ abort (); ++ if (test_vdupq_n_s64 ()) ++ abort (); ++ if (test_vdupq_n_u64 ()) ++ abort (); ++ return 0; ++} ++ ++/* No asm checks for vdup_n_f32, vdupq_n_f32, vdup_n_f64 and vdupq_n_f64. ++ Cannot force floating point value in general purpose regester. */ ++ ++/* Asm check for test_vdup_n_p8, test_vdup_n_s8, test_vdup_n_u8. */ ++/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, w\[0-9\]+" 3 } } */ ++ ++/* Asm check for test_vdupq_n_p8, test_vdupq_n_s8, test_vdupq_n_u8. */ ++/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, w\[0-9\]+" 3 } } */ ++ ++/* Asm check for test_vdup_n_p16, test_vdup_n_s16, test_vdup_n_u16. */ ++/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, w\[0-9\]+" 3 } } */ ++ ++/* Asm check for test_vdupq_n_p16, test_vdupq_n_s16, test_vdupq_n_u16. */ ++/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, w\[0-9\]+" 3 } } */ ++ ++/* Asm check for test_vdup_n_s32, test_vdup_n_u32. */ ++/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, w\[0-9\]+" 2 } } */ ++ ++/* Asm check for test_vdupq_n_s32, test_vdupq_n_u32. */ ++/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, w\[0-9\]+" 2 } } */ ++ ++/* Asm check for test_vdup_n_s64, test_vdup_n_u64 are left out. ++ Attempts to make the compiler generate "dup\\td\[0-9\]+, x\[0-9\]+" ++ are not practical. */ ++ ++/* Asm check for test_vdupq_n_s64, test_vdupq_n_u64. */ ++/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2d, x\[0-9\]+" 2 } } */ ++ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/fcsel_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/fcsel_1.c +@@ -0,0 +1,22 @@ ++/* { dg-do compile } */ ++/* { dg-options " -O2 " } */ ++ ++float ++f_1 (float a, float b, float c, float d) ++{ ++ if (a > 0.0) ++ return c; ++ else ++ return 2.0; ++} ++ ++double ++f_2 (double a, double b, double c, double d) ++{ ++ if (a > b) ++ return c; ++ else ++ return d; ++} ++ ++/* { dg-final { scan-assembler-times "\tfcsel" 2 } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/rev16_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/rev16_1.c +@@ -0,0 +1,59 @@ ++/* { dg-options "-O2" } */ ++/* { dg-do run } */ ++ ++extern void abort (void); ++ ++typedef unsigned int __u32; ++ ++__u32 ++__rev16_32_alt (__u32 x) ++{ ++ return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8) ++ | (((__u32)(x) & (__u32)0x00ff00ffUL) << 8); ++} ++ ++__u32 ++__rev16_32 (__u32 x) ++{ ++ return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8) ++ | (((__u32)(x) & (__u32)0xff00ff00UL) >> 8); ++} ++ ++typedef unsigned long long __u64; ++ ++__u64 ++__rev16_64_alt (__u64 x) ++{ ++ return (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8) ++ | (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8); ++} ++ ++__u64 ++__rev16_64 (__u64 x) ++{ ++ return (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8) ++ | (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8); ++} ++ ++int ++main (void) ++{ ++ volatile __u32 in32 = 0x12345678; ++ volatile __u32 expected32 = 0x34127856; ++ volatile __u64 in64 = 0x1234567890abcdefUL; ++ volatile __u64 expected64 = 0x34127856ab90efcdUL; ++ ++ if (__rev16_32 (in32) != expected32) ++ abort (); ++ ++ if (__rev16_32_alt (in32) != expected32) ++ abort (); ++ ++ if (__rev16_64 (in64) != expected64) ++ abort (); ++ ++ if (__rev16_64_alt (in64) != expected64) ++ abort (); ++ ++ return 0; ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c +@@ -0,0 +1,343 @@ ++/* Test vdup_lane intrinsics work correctly. */ ++/* { dg-do run } */ ++/* { dg-options "-O1 --save-temps" } */ ++ ++#include ++ ++#define force_simd(V1) asm volatile ("" \ ++ : "=w"(V1) \ ++ : "w"(V1) \ ++ : /* No clobbers */) ++ ++extern void abort (void); ++ ++float32_t __attribute__ ((noinline)) ++wrap_vdups_lane_f32_0 (float32x2_t dummy, float32x2_t a) ++{ ++ return vdups_lane_f32 (a, 0); ++} ++ ++float32_t __attribute__ ((noinline)) ++wrap_vdups_lane_f32_1 (float32x2_t a) ++{ ++ return vdups_lane_f32 (a, 1); ++} ++ ++int __attribute__ ((noinline)) ++test_vdups_lane_f32 () ++{ ++ float32x2_t a; ++ float32_t b; ++ float32_t c[2] = { 0.0, 1.0 }; ++ ++ a = vld1_f32 (c); ++ b = wrap_vdups_lane_f32_0 (a, a); ++ if (c[0] != b) ++ return 1; ++ b = wrap_vdups_lane_f32_1 (a); ++ if (c[1] != b) ++ return 1; ++ return 0; ++} ++ ++float64_t __attribute__ ((noinline)) ++wrap_vdupd_lane_f64_0 (float64x1_t dummy, float64x1_t a) ++{ ++ return vdupd_lane_f64 (a, 0); ++} ++ ++int __attribute__ ((noinline)) ++test_vdupd_lane_f64 () ++{ ++ float64x1_t a; ++ float64_t b; ++ float64_t c[1] = { 0.0 }; ++ a = vld1_f64 (c); ++ b = wrap_vdupd_lane_f64_0 (a, a); ++ if (c[0] != b) ++ return 1; ++ return 0; ++} ++ ++int8_t __attribute__ ((noinline)) ++wrap_vdupb_lane_s8_0 (int8x8_t dummy, int8x8_t a) ++{ ++ int8_t result = vdupb_lane_s8 (a, 0); ++ force_simd (result); ++ return result; ++} ++ ++int8_t __attribute__ ((noinline)) ++wrap_vdupb_lane_s8_1 (int8x8_t a) ++{ ++ int8_t result = vdupb_lane_s8 (a, 1); ++ force_simd (result); ++ return result; ++} ++ ++int __attribute__ ((noinline)) ++test_vdupb_lane_s8 () ++{ ++ int8x8_t a; ++ int8_t b; ++ int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; ++ ++ a = vld1_s8 (c); ++ b = wrap_vdupb_lane_s8_0 (a, a); ++ if (c[0] != b) ++ return 1; ++ b = wrap_vdupb_lane_s8_1 (a); ++ if (c[1] != b) ++ return 1; ++ ++ return 0; ++} ++ ++uint8_t __attribute__ ((noinline)) ++wrap_vdupb_lane_u8_0 (uint8x8_t dummy, uint8x8_t a) ++{ ++ uint8_t result = vdupb_lane_u8 (a, 0); ++ force_simd (result); ++ return result; ++} ++ ++uint8_t __attribute__ ((noinline)) ++wrap_vdupb_lane_u8_1 (uint8x8_t a) ++{ ++ uint8_t result = vdupb_lane_u8 (a, 1); ++ force_simd (result); ++ return result; ++} ++ ++int __attribute__ ((noinline)) ++test_vdupb_lane_u8 () ++{ ++ uint8x8_t a; ++ uint8_t b; ++ uint8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; ++ ++ a = vld1_u8 (c); ++ b = wrap_vdupb_lane_u8_0 (a, a); ++ if (c[0] != b) ++ return 1; ++ b = wrap_vdupb_lane_u8_1 (a); ++ if (c[1] != b) ++ return 1; ++ return 0; ++} ++ ++int16_t __attribute__ ((noinline)) ++wrap_vduph_lane_s16_0 (int16x4_t dummy, int16x4_t a) ++{ ++ int16_t result = vduph_lane_s16 (a, 0); ++ force_simd (result); ++ return result; ++} ++ ++int16_t __attribute__ ((noinline)) ++wrap_vduph_lane_s16_1 (int16x4_t a) ++{ ++ int16_t result = vduph_lane_s16 (a, 1); ++ force_simd (result); ++ return result; ++} ++ ++int __attribute__ ((noinline)) ++test_vduph_lane_s16 () ++{ ++ int16x4_t a; ++ int16_t b; ++ int16_t c[4] = { 0, 1, 2, 3 }; ++ ++ a = vld1_s16 (c); ++ b = wrap_vduph_lane_s16_0 (a, a); ++ if (c[0] != b) ++ return 1; ++ b = wrap_vduph_lane_s16_1 (a); ++ if (c[1] != b) ++ return 1; ++ return 0; ++} ++ ++uint16_t __attribute__ ((noinline)) ++wrap_vduph_lane_u16_0 (uint16x4_t dummy, uint16x4_t a) ++{ ++ uint16_t result = vduph_lane_u16 (a, 0); ++ force_simd (result); ++ return result; ++} ++ ++uint16_t __attribute__ ((noinline)) ++wrap_vduph_lane_u16_1 (uint16x4_t a) ++{ ++ uint16_t result = vduph_lane_u16 (a, 1); ++ force_simd (result); ++ return result; ++} ++ ++int __attribute__ ((noinline)) ++test_vduph_lane_u16 () ++{ ++ uint16x4_t a; ++ uint16_t b; ++ uint16_t c[4] = { 0, 1, 2, 3 }; ++ ++ a = vld1_u16 (c); ++ b = wrap_vduph_lane_u16_0 (a, a); ++ if (c[0] != b) ++ return 1; ++ b = wrap_vduph_lane_u16_1 (a); ++ if (c[1] != b) ++ return 1; ++ return 0; ++} ++ ++int32_t __attribute__ ((noinline)) ++wrap_vdups_lane_s32_0 (int32x2_t dummy, int32x2_t a) ++{ ++ int32_t result = vdups_lane_s32 (a, 0); ++ force_simd (result); ++ return result; ++} ++ ++int32_t __attribute__ ((noinline)) ++wrap_vdups_lane_s32_1 (int32x2_t a) ++{ ++ int32_t result = vdups_lane_s32 (a, 1); ++ force_simd (result); ++ return result; ++} ++ ++int __attribute__ ((noinline)) ++test_vdups_lane_s32 () ++{ ++ int32x2_t a; ++ int32_t b; ++ int32_t c[2] = { 0, 1 }; ++ ++ a = vld1_s32 (c); ++ b = wrap_vdups_lane_s32_0 (vcreate_s32 (0), a); ++ if (c[0] != b) ++ return 1; ++ b = wrap_vdups_lane_s32_1 (a); ++ if (c[1] != b) ++ return 1; ++ return 0; ++} ++ ++uint32_t __attribute__ ((noinline)) ++wrap_vdups_lane_u32_0 (uint32x2_t dummy, uint32x2_t a) ++{ ++ uint32_t result = vdups_lane_u32 (a, 0); ++ force_simd (result); ++ return result; ++} ++ ++uint32_t __attribute__ ((noinline)) ++wrap_vdups_lane_u32_1 (uint32x2_t a) ++{ ++ uint32_t result = vdups_lane_u32 (a, 1); ++ force_simd (result); ++ return result; ++} ++ ++int __attribute__ ((noinline)) ++test_vdups_lane_u32 () ++{ ++ uint32x2_t a; ++ uint32_t b; ++ uint32_t c[2] = { 0, 1 }; ++ a = vld1_u32 (c); ++ b = wrap_vdups_lane_u32_0 (a, a); ++ if (c[0] != b) ++ return 1; ++ b = wrap_vdups_lane_u32_1 (a); ++ if (c[1] != b) ++ return 1; ++ return 0; ++} ++ ++uint64_t __attribute__ ((noinline)) ++wrap_vdupd_lane_u64_0 (uint64x1_t dummy, uint64x1_t a) ++{ ++ return vdupd_lane_u64 (a, 0);; ++} ++ ++int __attribute__ ((noinline)) ++test_vdupd_lane_u64 () ++{ ++ uint64x1_t a; ++ uint64_t b; ++ uint64_t c[1] = { 0 }; ++ ++ a = vld1_u64 (c); ++ b = wrap_vdupd_lane_u64_0 (a, a); ++ if (c[0] != b) ++ return 1; ++ return 0; ++} ++ ++int64_t __attribute__ ((noinline)) ++wrap_vdupd_lane_s64_0 (uint64x1_t dummy, int64x1_t a) ++{ ++ return vdupd_lane_u64 (a, 0); ++} ++ ++int __attribute__ ((noinline)) ++test_vdupd_lane_s64 () ++{ ++ int64x1_t a; ++ int64_t b; ++ int64_t c[1] = { 0 }; ++ ++ a = vld1_s64 (c); ++ b = wrap_vdupd_lane_s64_0 (a, a); ++ if (c[0] != b) ++ return 1; ++ return 0; ++} ++ ++int ++main () ++{ ++ if (test_vdups_lane_f32 ()) ++ abort (); ++ if (test_vdupd_lane_f64 ()) ++ abort (); ++ if (test_vdupb_lane_s8 ()) ++ abort (); ++ if (test_vdupb_lane_u8 ()) ++ abort (); ++ if (test_vduph_lane_s16 ()) ++ abort (); ++ if (test_vduph_lane_u16 ()) ++ abort (); ++ if (test_vdups_lane_s32 ()) ++ abort (); ++ if (test_vdups_lane_u32 ()) ++ abort (); ++ if (test_vdupd_lane_s64 ()) ++ abort (); ++ if (test_vdupd_lane_u64 ()) ++ abort (); ++ return 0; ++} ++ ++/* Asm check for vdupb_lane_s8, vdupb_lane_u8. */ ++/* { dg-final { scan-assembler-not "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[0\\\]" } } */ ++/* { dg-final { scan-assembler-times "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[1\\\]" 2 } } */ ++ ++/* Asm check for vduph_lane_h16, vduph_lane_h16. */ ++/* { dg-final { scan-assembler-not "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[0\\\]" } } */ ++/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[1\\\]" 2 } } */ ++ ++/* Asm check for vdups_lane_f32, vdups_lane_s32, vdups_lane_u32. */ ++/* Can't generate "dup s, v[0]" for vdups_lane_s32 and vdups_lane_u32. */ ++/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[0\\\]" 1} } */ ++/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[1\\\]" 3 } } */ ++ ++/* Asm check for vdupd_lane_f64, vdupd_lane_s64, vdupd_lane_u64. */ ++/* Attempts to make the compiler generate vdupd are not practical. */ ++/* { dg-final { scan-assembler-not "dup\\td\[0-9\]+, v\[0-9\]+\.d\\\[0\\\]" } } ++ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/tail_indirect_call_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/tail_indirect_call_1.c +@@ -0,0 +1,18 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++typedef void FP (int); ++ ++/* { dg-final { scan-assembler "br" } } */ ++/* { dg-final { scan-assembler-not "blr" } } */ ++void ++f1 (FP fp, int n) ++{ ++ (fp) (n); ++} ++ ++void ++f2 (int n, FP fp) ++{ ++ (fp) (n); ++} +--- a/src/gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c +@@ -193,7 +193,6 @@ + return b; + } + /* { dg-final { scan-assembler "sshr\td\[0-9\]+,\ d\[0-9\]+,\ 63" } } */ +-/* { dg-final { scan-assembler "shl\td\[0-9\]+,\ d\[0-9\]+,\ 1" } } */ + + Int32x1 + test_corners_sisd_si (Int32x1 b) +@@ -207,7 +206,6 @@ + return b; + } + /* { dg-final { scan-assembler "sshr\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ 31" } } */ +-/* { dg-final { scan-assembler "shl\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ 1" } } */ + + + +--- a/src/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c +@@ -0,0 +1,105 @@ ++/* Test vrnd_f64 works correctly. */ ++/* { dg-do run } */ ++/* { dg-options "--save-temps" } */ ++ ++#include "arm_neon.h" ++ ++extern void abort (void); ++ ++/* Bit offset to round mode field in FPCR. */ ++#define RMODE_START 22 ++ ++#define FPROUNDING_ZERO 3 ++ ++/* Set RMODE field of FPCR control register ++ to rounding mode passed. */ ++void __inline __attribute__ ((__always_inline__)) ++set_rounding_mode (uint32_t mode) ++{ ++ uint32_t r; ++ ++ /* Read current FPCR. */ ++ asm volatile ("mrs %[r], fpcr" : [r] "=r" (r) : :); ++ ++ /* Clear rmode. */ ++ r &= ~(3 << RMODE_START); ++ /* Calculate desired FPCR. */ ++ r |= mode << RMODE_START; ++ ++ /* Write desired FPCR back. */ ++ asm volatile ("msr fpcr, %[r]" : : [r] "r" (r) :); ++} ++ ++float64x1_t __attribute__ ((noinline)) ++compare_f64 (float64x1_t passed, float64_t expected) ++{ ++ return (__builtin_fabs (vget_lane_f64 (passed, 0) - expected) ++ > __DBL_EPSILON__); ++} ++ ++void __attribute__ ((noinline)) ++run_round_tests (float64x1_t *tests, ++ float64_t expectations[][6]) ++{ ++ int i; ++ ++ for (i = 0; i < 6; i++) ++ { ++ if (compare_f64 (vrnd_f64 (tests[i]), expectations[0][i])) ++ abort (); ++ if (compare_f64 (vrndx_f64 (tests[i]), expectations[1][i])) ++ abort (); ++ if (compare_f64 (vrndp_f64 (tests[i]), expectations[2][i])) ++ abort (); ++ if (compare_f64 (vrndn_f64 (tests[i]), expectations[3][i])) ++ abort (); ++ if (compare_f64 (vrndm_f64 (tests[i]), expectations[4][i])) ++ abort (); ++ if (compare_f64 (vrndi_f64 (tests[i]), expectations[5][i])) ++ abort (); ++ if (compare_f64 (vrnda_f64 (tests[i]), expectations[6][i])) ++ abort (); ++ } ++} ++ ++int ++main (int argc, char **argv) ++{ ++ float64x1_t tests[6] = ++ { ++ vcreate_f64 (0x3FE0000000000000), /* Hex for: 0.5. */ ++ vcreate_f64 (0x3FD999999999999A), /* Hex for: 0.4. */ ++ vcreate_f64 (0x3FE3333333333333), /* Hex for: 0.6. */ ++ vcreate_f64 (0xBFE0000000000000), /* Hex for: -0.5. */ ++ vcreate_f64 (0xBFD999999999999A), /* Hex for: -0.4. */ ++ vcreate_f64 (0xBFE3333333333333), /* Hex for: -0.6. */ ++ }; ++ ++ float64_t expectations[7][6] = ++ { ++ { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, /* vrnd - round towards zero. */ ++ { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, /* vrndx - round using FPCR mode. */ ++ { 1.0, 1.0, 1.0, 0.0, 0.0, 0.0 }, /* vrndp - round to plus infinity. */ ++ { 0.0, 0.0, 1.0, 0.0, 0.0, -1.0 }, /* vrndn - round ties to even. */ ++ { 0.0, 0.0, 0.0, -1.0, -1.0, -1.0 }, /* vrndm - round to minus infinity. */ ++ { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, /* vrndi - round using FPCR mode. */ ++ { 1.0, 0.0, 1.0, -1.0, 0.0, -1.0 }, /* vrnda - round ties away from 0. */ ++ }; ++ ++ /* Set floating point control register ++ to have predictable vrndx and vrndi behaviour. */ ++ set_rounding_mode (FPROUNDING_ZERO); ++ ++ run_round_tests (tests, expectations); ++ ++ return 0; ++} ++ ++/* { dg-final { scan-assembler-times "frintz\\td\[0-9\]+, d\[0-9\]+" 1 } } */ ++/* { dg-final { scan-assembler-times "frintx\\td\[0-9\]+, d\[0-9\]+" 1 } } */ ++/* { dg-final { scan-assembler-times "frintp\\td\[0-9\]+, d\[0-9\]+" 1 } } */ ++/* { dg-final { scan-assembler-times "frintn\\td\[0-9\]+, d\[0-9\]+" 1 } } */ ++/* { dg-final { scan-assembler-times "frintm\\td\[0-9\]+, d\[0-9\]+" 1 } } */ ++/* { dg-final { scan-assembler-times "frinti\\td\[0-9\]+, d\[0-9\]+" 1 } } */ ++/* { dg-final { scan-assembler-times "frinta\\td\[0-9\]+, d\[0-9\]+" 1 } } */ ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/gcc.target/aarch64/vqneg_s64_1.c ++++ b/src/gcc/testsuite/gcc.target/aarch64/vqneg_s64_1.c +@@ -0,0 +1,47 @@ ++/* Test vqneg_s64 intrinsics work correctly. */ ++/* { dg-do run } */ ++/* { dg-options "--save-temps" } */ ++ ++#include ++ ++extern void abort (void); ++ ++int __attribute__ ((noinline)) ++test_vqneg_s64 (int64x1_t passed, int64_t expected) ++{ ++ return vget_lane_s64 (vqneg_s64 (passed), 0) != expected; ++} ++ ++int __attribute__ ((noinline)) ++test_vqnegd_s64 (int64_t passed, int64_t expected) ++{ ++ return vqnegd_s64 (passed) != expected; ++} ++ ++/* { dg-final { scan-assembler-times "sqneg\\td\[0-9\]+, d\[0-9\]+" 2 } } */ ++ ++int ++main (int argc, char **argv) ++{ ++ /* Basic test. */ ++ if (test_vqneg_s64 (vcreate_s64 (-1), 1)) ++ abort (); ++ if (test_vqnegd_s64 (-1, 1)) ++ abort (); ++ ++ /* Negating max int64_t. */ ++ if (test_vqneg_s64 (vcreate_s64 (0x7fffffffffffffff), 0x8000000000000001)) ++ abort (); ++ if (test_vqnegd_s64 (0x7fffffffffffffff, 0x8000000000000001)) ++ abort (); ++ ++ /* Negating min int64_t. ++ Note, exact negation cannot be represented as int64_t. */ ++ if (test_vqneg_s64 (vcreate_s64 (0x8000000000000000), 0x7fffffffffffffff)) ++ abort (); ++ if (test_vqnegd_s64 (0x8000000000000000, 0x7fffffffffffffff)) ++ abort (); ++ ++ return 0; ++} ++/* { dg-final { cleanup-saved-temps } } */ +--- a/src/gcc/testsuite/lib/target-supports.exp ++++ b/src/gcc/testsuite/lib/target-supports.exp +@@ -3306,6 +3306,27 @@ + return $et_vect_shift_saved + } + ++# Return 1 if the target supports vector bswap operations. ++ ++proc check_effective_target_vect_bswap { } { ++ global et_vect_bswap_saved ++ ++ if [info exists et_vect_bswap_saved] { ++ verbose "check_effective_target_vect_bswap: using cached result" 2 ++ } else { ++ set et_vect_bswap_saved 0 ++ if { [istarget aarch64*-*-*] ++ || ([istarget arm*-*-*] ++ && [check_effective_target_arm_neon]) ++ } { ++ set et_vect_bswap_saved 1 ++ } ++ } ++ ++ verbose "check_effective_target_vect_bswap: returning $et_vect_bswap_saved" 2 ++ return $et_vect_bswap_saved ++} ++ + # Return 1 if the target supports hardware vector shift operation for char. + + proc check_effective_target_vect_shift_char { } { +@@ -3504,8 +3525,7 @@ + } else { + set et_vect_perm_saved 0 + if { [is-effective-target arm_neon_ok] +- || ([istarget aarch64*-*-*] +- && [is-effective-target aarch64_little_endian]) ++ || [istarget aarch64*-*-*] + || [istarget powerpc*-*-*] + || [istarget spu-*-*] + || [istarget i?86-*-*] +--- a/src/gcc/testsuite/ChangeLog.linaro ++++ b/src/gcc/testsuite/ChangeLog.linaro +@@ -0,0 +1,527 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ ++2014-07-17 Yvan Roux ++ ++ Backport from trunk r211887. ++ 2014-06-23 James Greenhalgh ++ ++ * gcc.target/aarch64/scalar_shift_1.c: Fix expected assembler. ++ ++2014-07-17 Yvan Roux ++ ++ Backport from trunk r211441. ++ 2014-06-11 Kyrylo Tkachov ++ ++ * gcc.target/aarch64/acle/acle.exp: New. ++ * gcc.target/aarch64/acle/crc32b.c: New test. ++ * gcc.target/aarch64/acle/crc32cb.c: Likewise. ++ * gcc.target/aarch64/acle/crc32cd.c: Likewise. ++ * gcc.target/aarch64/acle/crc32ch.c: Likewise. ++ * gcc.target/aarch64/acle/crc32cw.c: Likewise. ++ * gcc.target/aarch64/acle/crc32d.c: Likewise. ++ * gcc.target/aarch64/acle/crc32h.c: Likewise. ++ * gcc.target/aarch64/acle/crc32w.c: Likewise. ++ ++2014-07-17 Yvan Roux ++ ++ Backport from trunk r210153. ++ 2014-05-07 Alan Lawrence ++ ++ * gcc.target/aarch64/simd/vrev16p8_1.c: New file. ++ * gcc.target/aarch64/simd/vrev16p8.x: New file. ++ * gcc.target/aarch64/simd/vrev16qp8_1.c: New file. ++ * gcc.target/aarch64/simd/vrev16qp8.x: New file. ++ * gcc.target/aarch64/simd/vrev16qs8_1.c: New file. ++ * gcc.target/aarch64/simd/vrev16qs8.x: New file. ++ * gcc.target/aarch64/simd/vrev16qu8_1.c: New file. ++ * gcc.target/aarch64/simd/vrev16qu8.x: New file. ++ * gcc.target/aarch64/simd/vrev16s8_1.c: New file. ++ * gcc.target/aarch64/simd/vrev16s8.x: New file. ++ * gcc.target/aarch64/simd/vrev16u8_1.c: New file. ++ * gcc.target/aarch64/simd/vrev16u8.x: New file. ++ * gcc.target/aarch64/simd/vrev32p16_1.c: New file. ++ * gcc.target/aarch64/simd/vrev32p16.x: New file. ++ * gcc.target/aarch64/simd/vrev32p8_1.c: New file. ++ * gcc.target/aarch64/simd/vrev32p8.x: New file. ++ * gcc.target/aarch64/simd/vrev32qp16_1.c: New file. ++ * gcc.target/aarch64/simd/vrev32qp16.x: New file. ++ * gcc.target/aarch64/simd/vrev32qp8_1.c: New file. ++ * gcc.target/aarch64/simd/vrev32qp8.x: New file. ++ * gcc.target/aarch64/simd/vrev32qs16_1.c: New file. ++ * gcc.target/aarch64/simd/vrev32qs16.x: New file. ++ * gcc.target/aarch64/simd/vrev32qs8_1.c: New file. ++ * gcc.target/aarch64/simd/vrev32qs8.x: New file. ++ * gcc.target/aarch64/simd/vrev32qu16_1.c: New file. ++ * gcc.target/aarch64/simd/vrev32qu16.x: New file. ++ * gcc.target/aarch64/simd/vrev32qu8_1.c: New file. ++ * gcc.target/aarch64/simd/vrev32qu8.x: New file. ++ * gcc.target/aarch64/simd/vrev32s16_1.c: New file. ++ * gcc.target/aarch64/simd/vrev32s16.x: New file. ++ * gcc.target/aarch64/simd/vrev32s8_1.c: New file. ++ * gcc.target/aarch64/simd/vrev32s8.x: New file. ++ * gcc.target/aarch64/simd/vrev32u16_1.c: New file. ++ * gcc.target/aarch64/simd/vrev32u16.x: New file. ++ * gcc.target/aarch64/simd/vrev32u8_1.c: New file. ++ * gcc.target/aarch64/simd/vrev32u8.x: New file. ++ * gcc.target/aarch64/simd/vrev64f32_1.c: New file. ++ * gcc.target/aarch64/simd/vrev64f32.x: New file. ++ * gcc.target/aarch64/simd/vrev64p16_1.c: New file. ++ * gcc.target/aarch64/simd/vrev64p16.x: New file. ++ * gcc.target/aarch64/simd/vrev64p8_1.c: New file. ++ * gcc.target/aarch64/simd/vrev64p8.x: New file. ++ * gcc.target/aarch64/simd/vrev64qf32_1.c: New file. ++ * gcc.target/aarch64/simd/vrev64qf32.x: New file. ++ * gcc.target/aarch64/simd/vrev64qp16_1.c: New file. ++ * gcc.target/aarch64/simd/vrev64qp16.x: New file. ++ * gcc.target/aarch64/simd/vrev64qp8_1.c: New file. ++ * gcc.target/aarch64/simd/vrev64qp8.x: New file. ++ * gcc.target/aarch64/simd/vrev64qs16_1.c: New file. ++ * gcc.target/aarch64/simd/vrev64qs16.x: New file. ++ * gcc.target/aarch64/simd/vrev64qs32_1.c: New file. ++ * gcc.target/aarch64/simd/vrev64qs32.x: New file. ++ * gcc.target/aarch64/simd/vrev64qs8_1.c: New file. ++ * gcc.target/aarch64/simd/vrev64qs8.x: New file. ++ * gcc.target/aarch64/simd/vrev64qu16_1.c: New file. ++ * gcc.target/aarch64/simd/vrev64qu16.x: New file. ++ * gcc.target/aarch64/simd/vrev64qu32_1.c: New file. ++ * gcc.target/aarch64/simd/vrev64qu32.x: New file. ++ * gcc.target/aarch64/simd/vrev64qu8_1.c: New file. ++ * gcc.target/aarch64/simd/vrev64qu8.x: New file. ++ * gcc.target/aarch64/simd/vrev64s16_1.c: New file. ++ * gcc.target/aarch64/simd/vrev64s16.x: New file. ++ * gcc.target/aarch64/simd/vrev64s32_1.c: New file. ++ * gcc.target/aarch64/simd/vrev64s32.x: New file. ++ * gcc.target/aarch64/simd/vrev64s8_1.c: New file. ++ * gcc.target/aarch64/simd/vrev64s8.x: New file. ++ * gcc.target/aarch64/simd/vrev64u16_1.c: New file. ++ * gcc.target/aarch64/simd/vrev64u16.x: New file. ++ * gcc.target/aarch64/simd/vrev64u32_1.c: New file. ++ * gcc.target/aarch64/simd/vrev64u32.x: New file. ++ * gcc.target/aarch64/simd/vrev64u8_1.c: New file. ++ * gcc.target/aarch64/simd/vrev64u8.x: New file. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r210148, r210151, r210422. ++ 2014-05-14 Alan Lawrence ++ ++ * gcc.target/arm/simd/vtrnqf32_1.c: New file. ++ * gcc.target/arm/simd/vtrnqp16_1.c: New file. ++ * gcc.target/arm/simd/vtrnqp8_1.c: New file. ++ * gcc.target/arm/simd/vtrnqs16_1.c: New file. ++ * gcc.target/arm/simd/vtrnqs32_1.c: New file. ++ * gcc.target/arm/simd/vtrnqs8_1.c: New file. ++ * gcc.target/arm/simd/vtrnqu16_1.c: New file. ++ * gcc.target/arm/simd/vtrnqu32_1.c: New file. ++ * gcc.target/arm/simd/vtrnqu8_1.c: New file. ++ * gcc.target/arm/simd/vtrnf32_1.c: New file. ++ * gcc.target/arm/simd/vtrnp16_1.c: New file. ++ * gcc.target/arm/simd/vtrnp8_1.c: New file. ++ * gcc.target/arm/simd/vtrns16_1.c: New file. ++ * gcc.target/arm/simd/vtrns32_1.c: New file. ++ * gcc.target/arm/simd/vtrns8_1.c: New file. ++ * gcc.target/arm/simd/vtrnu16_1.c: New file. ++ * gcc.target/arm/simd/vtrnu32_1.c: New file. ++ * gcc.target/arm/simd/vtrnu8_1.c: New file. ++ ++ 2014-05-07 Alan Lawrence ++ ++ * gcc.target/aarch64/vtrns32.c: Expect zip[12] insn rather than trn[12]. ++ * gcc.target/aarch64/vtrnu32.c: Likewise. ++ * gcc.target/aarch64/vtrnf32.c: Likewise. ++ ++ 2014-05-07 Alan Lawrence ++ ++ * gcc.target/aarch64/simd/vtrnf32_1.c: New file. ++ * gcc.target/aarch64/simd/vtrnf32.x: New file. ++ * gcc.target/aarch64/simd/vtrnp16_1.c: New file. ++ * gcc.target/aarch64/simd/vtrnp16.x: New file. ++ * gcc.target/aarch64/simd/vtrnp8_1.c: New file. ++ * gcc.target/aarch64/simd/vtrnp8.x: New file. ++ * gcc.target/aarch64/simd/vtrnqf32_1.c: New file. ++ * gcc.target/aarch64/simd/vtrnqf32.x: New file. ++ * gcc.target/aarch64/simd/vtrnqp16_1.c: New file. ++ * gcc.target/aarch64/simd/vtrnqp16.x: New file. ++ * gcc.target/aarch64/simd/vtrnqp8_1.c: New file. ++ * gcc.target/aarch64/simd/vtrnqp8.x: New file. ++ * gcc.target/aarch64/simd/vtrnqs16_1.c: New file. ++ * gcc.target/aarch64/simd/vtrnqs16.x: New file. ++ * gcc.target/aarch64/simd/vtrnqs32_1.c: New file. ++ * gcc.target/aarch64/simd/vtrnqs32.x: New file. ++ * gcc.target/aarch64/simd/vtrnqs8_1.c: New file. ++ * gcc.target/aarch64/simd/vtrnqs8.x: New file. ++ * gcc.target/aarch64/simd/vtrnqu16_1.c: New file. ++ * gcc.target/aarch64/simd/vtrnqu16.x: New file. ++ * gcc.target/aarch64/simd/vtrnqu32_1.c: New file. ++ * gcc.target/aarch64/simd/vtrnqu32.x: New file. ++ * gcc.target/aarch64/simd/vtrnqu8_1.c: New file. ++ * gcc.target/aarch64/simd/vtrnqu8.x: New file. ++ * gcc.target/aarch64/simd/vtrns16_1.c: New file. ++ * gcc.target/aarch64/simd/vtrns16.x: New file. ++ * gcc.target/aarch64/simd/vtrns32_1.c: New file. ++ * gcc.target/aarch64/simd/vtrns32.x: New file. ++ * gcc.target/aarch64/simd/vtrns8_1.c: New file. ++ * gcc.target/aarch64/simd/vtrns8.x: New file. ++ * gcc.target/aarch64/simd/vtrnu16_1.c: New file. ++ * gcc.target/aarch64/simd/vtrnu16.x: New file. ++ * gcc.target/aarch64/simd/vtrnu32_1.c: New file. ++ * gcc.target/aarch64/simd/vtrnu32.x: New file. ++ * gcc.target/aarch64/simd/vtrnu8_1.c: New file. ++ * gcc.target/aarch64/simd/vtrnu8.x: New file. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r209794, 209858. ++ 2014-04-25 Marek Polacek ++ ++ PR c/60114 ++ * gcc.dg/pr60114.c: New test. ++ ++ 2014-04-28 Kyrylo Tkachov ++ ++ PR c/60983 ++ * gcc.dg/pr60114.c: Use signed chars. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r210861. ++ 2014-05-23 Jiong Wang ++ ++ * gcc.target/aarch64/tail_indirect_call_1.c: New. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r211314. ++ 2014-06-06 James Greenhalgh ++ ++ * gcc.dg/tree-ssa/pr42585.c: Skip for AArch64. ++ * gcc.dg/tree-ssa/sra-12.c: Likewise. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r210967. ++ 2014-05-27 Kyrylo Tkachov ++ ++ * lib/target-supports.exp (check_effective_target_vect_bswap): ++ Specify arm*-*-* support. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r210152, 211059. ++ 2014-05-29 Alan Lawrence ++ ++ * gcc.target/arm/simd/vextQf32_1.c: New file. ++ * gcc.target/arm/simd/vextQp16_1.c: New file. ++ * gcc.target/arm/simd/vextQp8_1.c: New file. ++ * gcc.target/arm/simd/vextQs16_1.c: New file. ++ * gcc.target/arm/simd/vextQs32_1.c: New file. ++ * gcc.target/arm/simd/vextQs64_1.c: New file. ++ * gcc.target/arm/simd/vextQs8_1.c: New file. ++ * gcc.target/arm/simd/vextQu16_1.c: New file. ++ * gcc.target/arm/simd/vextQu32_1.c: New file. ++ * gcc.target/arm/simd/vextQu64_1.c: New file. ++ * gcc.target/arm/simd/vextQu8_1.c: New file. ++ * gcc.target/arm/simd/vextQp64_1.c: New file. ++ * gcc.target/arm/simd/vextf32_1.c: New file. ++ * gcc.target/arm/simd/vextp16_1.c: New file. ++ * gcc.target/arm/simd/vextp8_1.c: New file. ++ * gcc.target/arm/simd/vexts16_1.c: New file. ++ * gcc.target/arm/simd/vexts32_1.c: New file. ++ * gcc.target/arm/simd/vexts64_1.c: New file. ++ * gcc.target/arm/simd/vexts8_1.c: New file. ++ * gcc.target/arm/simd/vextu16_1.c: New file. ++ * gcc.target/arm/simd/vextu32_1.c: New file. ++ * gcc.target/arm/simd/vextu64_1.c: New file. ++ * gcc.target/arm/simd/vextu8_1.c: New file. ++ * gcc.target/arm/simd/vextp64_1.c: New file. ++ ++ 2014-05-07 Alan Lawrence ++ ++ * gcc.target/aarch64/simd/ext_f32.x: New file. ++ * gcc.target/aarch64/simd/ext_f32_1.c: New file. ++ * gcc.target/aarch64/simd/ext_p16.x: New file. ++ * gcc.target/aarch64/simd/ext_p16_1.c: New file. ++ * gcc.target/aarch64/simd/ext_p8.x: New file. ++ * gcc.target/aarch64/simd/ext_p8_1.c: New file. ++ * gcc.target/aarch64/simd/ext_s16.x: New file. ++ * gcc.target/aarch64/simd/ext_s16_1.c: New file. ++ * gcc.target/aarch64/simd/ext_s32.x: New file. ++ * gcc.target/aarch64/simd/ext_s32_1.c: New file. ++ * gcc.target/aarch64/simd/ext_s64.x: New file. ++ * gcc.target/aarch64/simd/ext_s64_1.c: New file. ++ * gcc.target/aarch64/simd/ext_s8.x: New file. ++ * gcc.target/aarch64/simd/ext_s8_1.c: New file. ++ * gcc.target/aarch64/simd/ext_u16.x: New file. ++ * gcc.target/aarch64/simd/ext_u16_1.c: New file. ++ * gcc.target/aarch64/simd/ext_u32.x: New file. ++ * gcc.target/aarch64/simd/ext_u32_1.c: New file. ++ * gcc.target/aarch64/simd/ext_u64.x: New file. ++ * gcc.target/aarch64/simd/ext_u64_1.c: New file. ++ * gcc.target/aarch64/simd/ext_u8.x: New file. ++ * gcc.target/aarch64/simd/ext_u8_1.c: New file. ++ * gcc.target/aarch64/simd/ext_f64.c: New file. ++ * gcc.target/aarch64/simd/extq_f32.x: New file. ++ * gcc.target/aarch64/simd/extq_f32_1.c: New file. ++ * gcc.target/aarch64/simd/extq_p16.x: New file. ++ * gcc.target/aarch64/simd/extq_p16_1.c: New file. ++ * gcc.target/aarch64/simd/extq_p8.x: New file. ++ * gcc.target/aarch64/simd/extq_p8_1.c: New file. ++ * gcc.target/aarch64/simd/extq_s16.x: New file. ++ * gcc.target/aarch64/simd/extq_s16_1.c: New file. ++ * gcc.target/aarch64/simd/extq_s32.x: New file. ++ * gcc.target/aarch64/simd/extq_s32_1.c: New file. ++ * gcc.target/aarch64/simd/extq_s64.x: New file. ++ * gcc.target/aarch64/simd/extq_s64_1.c: New file. ++ * gcc.target/aarch64/simd/extq_s8.x: New file. ++ * gcc.target/aarch64/simd/extq_s8_1.c: New file. ++ * gcc.target/aarch64/simd/extq_u16.x: New file. ++ * gcc.target/aarch64/simd/extq_u16_1.c: New file. ++ * gcc.target/aarch64/simd/extq_u32.x: New file. ++ ++2014-07-16 Yvan Roux ++ ++ Backport from trunk r209940, r209943, r209947. ++ 2014-04-30 Alan Lawrence ++ ++ * gcc.target/arm/simd/vuzpqf32_1.c: New file. ++ * gcc.target/arm/simd/vuzpqp16_1.c: New file. ++ * gcc.target/arm/simd/vuzpqp8_1.c: New file. ++ * gcc.target/arm/simd/vuzpqs16_1.c: New file. ++ * gcc.target/arm/simd/vuzpqs32_1.c: New file. ++ * gcc.target/arm/simd/vuzpqs8_1.c: New file. ++ * gcc.target/arm/simd/vuzpqu16_1.c: New file. ++ * gcc.target/arm/simd/vuzpqu32_1.c: New file. ++ * gcc.target/arm/simd/vuzpqu8_1.c: New file. ++ * gcc.target/arm/simd/vuzpf32_1.c: New file. ++ * gcc.target/arm/simd/vuzpp16_1.c: New file. ++ * gcc.target/arm/simd/vuzpp8_1.c: New file. ++ * gcc.target/arm/simd/vuzps16_1.c: New file. ++ * gcc.target/arm/simd/vuzps32_1.c: New file. ++ * gcc.target/arm/simd/vuzps8_1.c: New file. ++ * gcc.target/arm/simd/vuzpu16_1.c: New file. ++ * gcc.target/arm/simd/vuzpu32_1.c: New file. ++ * gcc.target/arm/simd/vuzpu8_1.c: New file. ++ ++ 2014-04-30 Alan Lawrence ++ ++ * gcc.target/aarch64/vuzps32_1.c: Expect zip1/2 insn rather than uzp1/2. ++ * gcc.target/aarch64/vuzpu32_1.c: Likewise. ++ * gcc.target/aarch64/vuzpf32_1.c: Likewise. ++ ++ 2014-04-30 Alan Lawrence ++ ++ * gcc.target/aarch64/simd/vuzpf32_1.c: New file. ++ * gcc.target/aarch64/simd/vuzpf32.x: New file. ++ * gcc.target/aarch64/simd/vuzpp16_1.c: New file. ++ * gcc.target/aarch64/simd/vuzpp16.x: New file. ++ * gcc.target/aarch64/simd/vuzpp8_1.c: New file. ++ * gcc.target/aarch64/simd/vuzpp8.x: New file. ++ * gcc.target/aarch64/simd/vuzpqf32_1.c: New file. ++ * gcc.target/aarch64/simd/vuzpqf32.x: New file. ++ * gcc.target/aarch64/simd/vuzpqp16_1.c: New file. ++ * gcc.target/aarch64/simd/vuzpqp16.x: New file. ++ * gcc.target/aarch64/simd/vuzpqp8_1.c: New file. ++ * gcc.target/aarch64/simd/vuzpqp8.x: New file. ++ * gcc.target/aarch64/simd/vuzpqs16_1.c: New file. ++ * gcc.target/aarch64/simd/vuzpqs16.x: New file. ++ * gcc.target/aarch64/simd/vuzpqs32_1.c: New file. ++ * gcc.target/aarch64/simd/vuzpqs32.x: New file. ++ * gcc.target/aarch64/simd/vuzpqs8_1.c: New file. ++ * gcc.target/aarch64/simd/vuzpqs8.x: New file. ++ * gcc.target/aarch64/simd/vuzpqu16_1.c: New file. ++ * gcc.target/aarch64/simd/vuzpqu16.x: New file. ++ * gcc.target/aarch64/simd/vuzpqu32_1.c: New file. ++ * gcc.target/aarch64/simd/vuzpqu32.x: New file. ++ * gcc.target/aarch64/simd/vuzpqu8_1.c: New file. ++ * gcc.target/aarch64/simd/vuzpqu8.x: New file. ++ * gcc.target/aarch64/simd/vuzps16_1.c: New file. ++ * gcc.target/aarch64/simd/vuzps16.x: New file. ++ * gcc.target/aarch64/simd/vuzps32_1.c: New file. ++ * gcc.target/aarch64/simd/vuzps32.x: New file. ++ * gcc.target/aarch64/simd/vuzps8_1.c: New file. ++ * gcc.target/aarch64/simd/vuzps8.x: New file. ++ * gcc.target/aarch64/simd/vuzpu16_1.c: New file. ++ * gcc.target/aarch64/simd/vuzpu16.x: New file. ++ * gcc.target/aarch64/simd/vuzpu32_1.c: New file. ++ * gcc.target/aarch64/simd/vuzpu32.x: New file. ++ * gcc.target/aarch64/simd/vuzpu8_1.c: New file. ++ * gcc.target/aarch64/simd/vuzpu8.x: New file. ++ ++2014-06-25 Yvan Roux ++ ++ GCC Linaro 4.9-2014.06-1 released. ++ ++2014-06-13 Yvan Roux ++ ++ Backport from trunk r211206. ++ 2014-06-03 Andrew Pinski ++ ++ * gcc.c-torture/compile/20140528-1.c: New testcase. ++ ++2014-06-12 Yvan Roux ++ ++ GCC Linaro 4.9-2014.06 released. ++ ++2014-05-25 Yvan Roux ++ ++ Backport from trunk r209908. ++ 2013-04-29 Alan Lawrence ++ ++ * gcc.target/arm/simd/simd.exp: New file. ++ * gcc.target/arm/simd/vzipqf32_1.c: New file. ++ * gcc.target/arm/simd/vzipqp16_1.c: New file. ++ * gcc.target/arm/simd/vzipqp8_1.c: New file. ++ * gcc.target/arm/simd/vzipqs16_1.c: New file. ++ * gcc.target/arm/simd/vzipqs32_1.c: New file. ++ * gcc.target/arm/simd/vzipqs8_1.c: New file. ++ * gcc.target/arm/simd/vzipqu16_1.c: New file. ++ * gcc.target/arm/simd/vzipqu32_1.c: New file. ++ * gcc.target/arm/simd/vzipqu8_1.c: New file. ++ * gcc.target/arm/simd/vzipf32_1.c: New file. ++ * gcc.target/arm/simd/vzipp16_1.c: New file. ++ * gcc.target/arm/simd/vzipp8_1.c: New file. ++ * gcc.target/arm/simd/vzips16_1.c: New file. ++ * gcc.target/arm/simd/vzips32_1.c: New file. ++ * gcc.target/arm/simd/vzips8_1.c: New file. ++ * gcc.target/arm/simd/vzipu16_1.c: New file. ++ * gcc.target/arm/simd/vzipu32_1.c: New file. ++ * gcc.target/arm/simd/vzipu8_1.c: New file. ++ ++2014-05-25 Yvan Roux ++ ++ Backport from trunk r209893. ++ 2014-04-29 Alan Lawrence ++ ++ * gcc.target/aarch64/simd/simd.exp: New file. ++ * gcc.target/aarch64/simd/vzipf32_1.c: New file. ++ * gcc.target/aarch64/simd/vzipf32.x: New file. ++ * gcc.target/aarch64/simd/vzipp16_1.c: New file. ++ * gcc.target/aarch64/simd/vzipp16.x: New file. ++ * gcc.target/aarch64/simd/vzipp8_1.c: New file. ++ * gcc.target/aarch64/simd/vzipp8.x: New file. ++ * gcc.target/aarch64/simd/vzipqf32_1.c: New file. ++ * gcc.target/aarch64/simd/vzipqf32.x: New file. ++ * gcc.target/aarch64/simd/vzipqp16_1.c: New file. ++ * gcc.target/aarch64/simd/vzipqp16.x: New file. ++ * gcc.target/aarch64/simd/vzipqp8_1.c: New file. ++ * gcc.target/aarch64/simd/vzipqp8.x: New file. ++ * gcc.target/aarch64/simd/vzipqs16_1.c: New file. ++ * gcc.target/aarch64/simd/vzipqs16.x: New file. ++ * gcc.target/aarch64/simd/vzipqs32_1.c: New file. ++ * gcc.target/aarch64/simd/vzipqs32.x: New file. ++ * gcc.target/aarch64/simd/vzipqs8_1.c: New file. ++ * gcc.target/aarch64/simd/vzipqs8.x: New file. ++ * gcc.target/aarch64/simd/vzipqu16_1.c: New file. ++ * gcc.target/aarch64/simd/vzipqu16.x: New file. ++ * gcc.target/aarch64/simd/vzipqu32_1.c: New file. ++ * gcc.target/aarch64/simd/vzipqu32.x: New file. ++ * gcc.target/aarch64/simd/vzipqu8_1.c: New file. ++ * gcc.target/aarch64/simd/vzipqu8.x: New file. ++ * gcc.target/aarch64/simd/vzips16_1.c: New file. ++ * gcc.target/aarch64/simd/vzips16.x: New file. ++ * gcc.target/aarch64/simd/vzips32_1.c: New file. ++ * gcc.target/aarch64/simd/vzips32.x: New file. ++ * gcc.target/aarch64/simd/vzips8_1.c: New file. ++ * gcc.target/aarch64/simd/vzips8.x: New file. ++ * gcc.target/aarch64/simd/vzipu16_1.c: New file. ++ * gcc.target/aarch64/simd/vzipu16.x: New file. ++ * gcc.target/aarch64/simd/vzipu32_1.c: New file. ++ * gcc.target/aarch64/simd/vzipu32.x: New file. ++ * gcc.target/aarch64/simd/vzipu8_1.c: New file. ++ * gcc.target/aarch64/simd/vzipu8.x: New file. ++ ++2014-05-25 Yvan Roux ++ ++ Backport from trunk r209808. ++ 2014-04-25 Jiong Wang ++ ++ * gcc.target/arm/tail-long-call.c: New test. ++ ++2014-05-25 Yvan Roux ++ ++ Backport from trunk r209749. ++ 2014-04-24 Alan Lawrence ++ ++ * lib/target-supports.exp (check_effective_target_vect_perm): Return ++ true for aarch64_be. ++ ++2014-05-23 Yvan Roux ++ ++ Backport from trunk r209736. ++ 2014-04-24 Kyrylo Tkachov ++ ++ * lib/target-supports.exp (check_effective_target_vect_bswap): New. ++ * gcc.dg/vect/vect-bswap16: New test. ++ * gcc.dg/vect/vect-bswap32: Likewise. ++ * gcc.dg/vect/vect-bswap64: Likewise. ++ ++2014-05-23 Yvan Roux ++ ++ Backport from trunk r209713. ++ 2014-04-23 Alex Velenko ++ ++ * gcc.target/aarch64/vdup_lane_1.c: New testcase. ++ * gcc.target/aarch64/vdup_lane_2.c: New testcase. ++ * gcc.target/aarch64/vdup_n_1.c: New testcase. ++ ++2014-05-23 Yvan Roux ++ ++ Backport from trunk r209704, 209705. ++ 2014-04-23 Kyrylo Tkachov ++ ++ * gcc.target/arm/rev16.c: New test. ++ ++ 2014-04-23 Kyrylo Tkachov ++ ++ * gcc.target/aarch64/rev16_1.c: New test. ++ ++2014-05-23 Yvan Roux ++ ++ Backport from trunk r209642. ++ 2014-04-22 Alex Velenko ++ ++ * gcc.target/aarch64/vreinterpret_f64_1.c: New. ++ ++2014-05-23 Yvan Roux ++ ++ Backport from trunk r209640. ++ 2014-04-22 Alex Velenko ++ ++ * gcc.target/aarch64/vqneg_s64_1.c: New testcase. ++ * gcc.target/aarch64/vqabs_s64_1.c: New testcase. ++ ++2014-05-23 Yvan Roux ++ ++ Backport from trunk r209613, 209614. ++ 2014-04-22 Ian Bolton ++ ++ * gcc.target/arm/anddi_notdi-1.c: New test. ++ * gcc.target/arm/iordi_notdi-1.c: New test case. ++ ++ 2014-04-22 Ian Bolton ++ ++ * gcc.target/arm/iordi_notdi-1.c: New test. ++ ++2014-05-23 Yvan Roux ++ ++ Backport from trunk r209559. ++ 2014-04-22 Alex Velenko ++ ++ * gcc.target/aarch64/vrnd_f64_1.c : New file. ++ ++2014-05-14 Yvan Roux ++ ++ GCC Linaro 4.9-2014.05 released. ++ ++2014-05-13 Yvan Roux ++ ++ Backport from trunk r209889. ++ 2014-04-29 Zhenqiang Chen ++ ++ * gcc.target/aarch64/fcsel_1.c: New test case. ++ ++2014-04-22 Yvan Roux ++ ++ GCC Linaro 4.9-2014.04 released. +--- a/src/gcc/testsuite/gcc.c-torture/compile/20140528-1.c ++++ b/src/gcc/testsuite/gcc.c-torture/compile/20140528-1.c +@@ -0,0 +1,9 @@ ++unsigned f(unsigned flags, unsigned capabilities) ++{ ++ unsigned gfp_mask; ++ unsigned gfp_notmask = 0; ++ gfp_mask = flags & ((1 << 25) - 1); ++ if (!(capabilities & 0x00000001)) ++ gfp_mask |= 0x1000000u; ++ return (gfp_mask & ~gfp_notmask); ++} +--- a/src/gcc/testsuite/gcc.dg/tree-ssa/pr42585.c ++++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr42585.c +@@ -35,6 +35,6 @@ + /* Whether the structs are totally scalarized or not depends on the + MOVE_RATIO macro definition in the back end. The scalarization will + not take place when using small values for MOVE_RATIO. */ +-/* { dg-final { scan-tree-dump-times "struct _fat_ptr _ans" 0 "optimized" { target { ! "arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */ +-/* { dg-final { scan-tree-dump-times "struct _fat_ptr _T2" 0 "optimized" { target { ! "arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */ ++/* { dg-final { scan-tree-dump-times "struct _fat_ptr _ans" 0 "optimized" { target { ! "aarch64*-*-* arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */ ++/* { dg-final { scan-tree-dump-times "struct _fat_ptr _T2" 0 "optimized" { target { ! "aarch64*-*-* arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */ + /* { dg-final { cleanup-tree-dump "optimized" } } */ +--- a/src/gcc/testsuite/gcc.dg/tree-ssa/sra-12.c ++++ b/src/gcc/testsuite/gcc.dg/tree-ssa/sra-12.c +@@ -21,5 +21,5 @@ + *p = l; + } + +-/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" { target { ! "avr*-*-* nds32*-*-*" } } } } */ ++/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" { target { ! "aarch64*-*-* avr*-*-* nds32*-*-*" } } } } */ + /* { dg-final { cleanup-tree-dump "release_ssa" } } */ +--- a/src/gcc/testsuite/gcc.dg/pr60114.c ++++ b/src/gcc/testsuite/gcc.dg/pr60114.c +@@ -0,0 +1,31 @@ ++/* PR c/60114 */ ++/* { dg-do compile } */ ++/* { dg-options "-Wconversion" } */ ++ ++struct S { int n, u[2]; }; ++const signed char z[] = { ++ [0] = 0x100, /* { dg-warning "9:overflow in implicit constant conversion" } */ ++ [2] = 0x101, /* { dg-warning "9:overflow in implicit constant conversion" } */ ++}; ++int A[] = { ++ 0, 0x80000000, /* { dg-warning "16:conversion of unsigned constant value to negative integer" } */ ++ 0xA, 0x80000000, /* { dg-warning "18:conversion of unsigned constant value to negative integer" } */ ++ 0xA, 0xA, 0x80000000 /* { dg-warning "23:conversion of unsigned constant value to negative integer" } */ ++ }; ++int *p = (int []) { 0x80000000 }; /* { dg-warning "21:conversion of unsigned constant value to negative integer" } */ ++union { int k; } u = { .k = 0x80000000 }; /* { dg-warning "29:conversion of unsigned constant value to negative integer" } */ ++typedef int H[]; ++void ++foo (void) ++{ ++ signed char a[][3] = { { 0x100, /* { dg-warning "28:overflow in implicit constant conversion" } */ ++ 1, 0x100 }, /* { dg-warning "24:overflow in implicit constant conversion" } */ ++ { '\0', 0x100, '\0' } /* { dg-warning "27:overflow in implicit constant conversion" } */ ++ }; ++ (const signed char []) { 0x100 }; /* { dg-warning "28:overflow in implicit constant conversion" } */ ++ (const float []) { 1e0, 1e1, 1e100 }; /* { dg-warning "32:conversion" } */ ++ struct S s1 = { 0x80000000 }; /* { dg-warning "19:conversion of unsigned constant value to negative integer" } */ ++ struct S s2 = { .n = 0x80000000 }; /* { dg-warning "24:conversion of unsigned constant value to negative integer" } */ ++ struct S s3 = { .u[1] = 0x80000000 }; /* { dg-warning "27:conversion of unsigned constant value to negative integer" } */ ++ H h = { 1, 2, 0x80000000 }; /* { dg-warning "17:conversion of unsigned constant value to negative integer" } */ ++} +--- a/src/gcc/testsuite/gcc.dg/vect/vect-bswap32.c ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-bswap32.c +@@ -0,0 +1,44 @@ ++/* { dg-require-effective-target vect_bswap } */ ++ ++#include "tree-vect.h" ++ ++#define N 128 ++ ++volatile int y = 0; ++ ++static inline void ++vfoo32 (unsigned int* a) ++{ ++ int i = 0; ++ for (i = 0; i < N; ++i) ++ a[i] = __builtin_bswap32 (a[i]); ++} ++ ++int ++main (void) ++{ ++ unsigned int arr[N]; ++ unsigned int expect[N]; ++ int i; ++ ++ for (i = 0; i < N; ++i) ++ { ++ arr[i] = i; ++ expect[i] = __builtin_bswap32 (i); ++ if (y) /* Avoid vectorisation. */ ++ abort (); ++ } ++ ++ vfoo32 (arr); ++ ++ for (i = 0; i < N; ++i) ++ { ++ if (arr[i] != expect[i]) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ +--- a/src/gcc/testsuite/gcc.dg/vect/vect-bswap16.c ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-bswap16.c +@@ -0,0 +1,44 @@ ++/* { dg-require-effective-target vect_bswap } */ ++ ++#include "tree-vect.h" ++ ++#define N 128 ++ ++volatile int y = 0; ++ ++static inline void ++vfoo16 (unsigned short int* a) ++{ ++ int i = 0; ++ for (i = 0; i < N; ++i) ++ a[i] = __builtin_bswap16 (a[i]); ++} ++ ++int ++main (void) ++{ ++ unsigned short arr[N]; ++ unsigned short expect[N]; ++ int i; ++ ++ for (i = 0; i < N; ++i) ++ { ++ arr[i] = i; ++ expect[i] = __builtin_bswap16 (i); ++ if (y) /* Avoid vectorisation. */ ++ abort (); ++ } ++ ++ vfoo16 (arr); ++ ++ for (i = 0; i < N; ++i) ++ { ++ if (arr[i] != expect[i]) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ +--- a/src/gcc/testsuite/gcc.dg/vect/vect-bswap64.c ++++ b/src/gcc/testsuite/gcc.dg/vect/vect-bswap64.c +@@ -0,0 +1,44 @@ ++/* { dg-require-effective-target vect_bswap } */ ++ ++#include "tree-vect.h" ++ ++#define N 128 ++ ++volatile int y = 0; ++ ++static inline void ++vfoo64 (unsigned long long* a) ++{ ++ int i = 0; ++ for (i = 0; i < N; ++i) ++ a[i] = __builtin_bswap64 (a[i]); ++} ++ ++int ++main (void) ++{ ++ unsigned long long arr[N]; ++ unsigned long long expect[N]; ++ int i; ++ ++ for (i = 0; i < N; ++i) ++ { ++ arr[i] = i; ++ expect[i] = __builtin_bswap64 (i); ++ if (y) /* Avoid vectorisation. */ ++ abort (); ++ } ++ ++ vfoo64 (arr); ++ ++ for (i = 0; i < N; ++i) ++ { ++ if (arr[i] != expect[i]) ++ abort (); ++ } ++ ++ return 0; ++} ++ ++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ ++/* { dg-final { cleanup-tree-dump "vect" } } */ +--- a/src/gcc/objcp/ChangeLog.linaro ++++ b/src/gcc/objcp/ChangeLog.linaro +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ ++2014-06-25 Yvan Roux ++ ++ GCC Linaro 4.9-2014.06-1 released. ++ ++2014-06-12 Yvan Roux ++ ++ GCC Linaro 4.9-2014.06 released. ++ ++2014-05-14 Yvan Roux ++ ++ GCC Linaro 4.9-2014.05 released. ++ ++2014-04-22 Yvan Roux ++ ++ GCC Linaro 4.9-2014.04 released. +--- a/src/gcc/cp/ChangeLog.linaro ++++ b/src/gcc/cp/ChangeLog.linaro +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ ++2014-06-25 Yvan Roux ++ ++ GCC Linaro 4.9-2014.06-1 released. ++ ++2014-06-12 Yvan Roux ++ ++ GCC Linaro 4.9-2014.06 released. ++ ++2014-05-14 Yvan Roux ++ ++ GCC Linaro 4.9-2014.05 released. ++ ++2014-04-22 Yvan Roux ++ ++ GCC Linaro 4.9-2014.04 released. +--- a/src/gcc/expr.c ++++ b/src/gcc/expr.c +@@ -68,22 +68,6 @@ + #include "tree-ssa-address.h" + #include "cfgexpand.h" + +-/* Decide whether a function's arguments should be processed +- from first to last or from last to first. +- +- They should if the stack and args grow in opposite directions, but +- only if we have push insns. */ +- +-#ifdef PUSH_ROUNDING +- +-#ifndef PUSH_ARGS_REVERSED +-#if defined (STACK_GROWS_DOWNWARD) != defined (ARGS_GROW_DOWNWARD) +-#define PUSH_ARGS_REVERSED /* If it's last to first. */ +-#endif +-#endif +- +-#endif +- + #ifndef STACK_PUSH_CODE + #ifdef STACK_GROWS_DOWNWARD + #define STACK_PUSH_CODE PRE_DEC +@@ -4353,11 +4337,7 @@ + /* Loop over all the words allocated on the stack for this arg. */ + /* We can do it by words, because any scalar bigger than a word + has a size a multiple of a word. */ +-#ifndef PUSH_ARGS_REVERSED +- for (i = not_stack; i < size; i++) +-#else + for (i = size - 1; i >= not_stack; i--) +-#endif + if (i >= not_stack + offset) + emit_push_insn (operand_subword_force (x, i, mode), + word_mode, NULL_TREE, NULL_RTX, align, 0, NULL_RTX, +--- a/src/gcc/go/ChangeLog.linaro ++++ b/src/gcc/go/ChangeLog.linaro +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ ++2014-06-25 Yvan Roux ++ ++ GCC Linaro 4.9-2014.06-1 released. ++ ++2014-06-12 Yvan Roux ++ ++ GCC Linaro 4.9-2014.06 released. ++ ++2014-05-14 Yvan Roux ++ ++ GCC Linaro 4.9-2014.05 released. ++ ++2014-04-22 Yvan Roux ++ ++ GCC Linaro 4.9-2014.04 released. +--- a/src/gcc/genattrtab.c ++++ b/src/gcc/genattrtab.c +@@ -4765,6 +4765,7 @@ + + static struct bypass_list *all_bypasses; + static size_t n_bypasses; ++static size_t n_bypassed; + + static void + gen_bypass_1 (const char *s, size_t len) +@@ -4810,12 +4811,18 @@ + struct bypass_list *b; + struct insn_reserv *r; + ++ n_bypassed = 0; ++ + /* The reservation list is likely to be much longer than the bypass + list. */ + for (r = all_insn_reservs; r; r = r->next) + for (b = all_bypasses; b; b = b->next) + if (fnmatch (b->pattern, r->name, 0) == 0) +- r->bypassed = true; ++ { ++ n_bypassed++; ++ r->bypassed = true; ++ break; ++ } + } + + /* Check that attribute NAME is used in define_insn_reservation condition +@@ -5074,7 +5081,7 @@ + process_bypasses (); + + byps_exp = rtx_alloc (COND); +- XVEC (byps_exp, 0) = rtvec_alloc (n_bypasses * 2); ++ XVEC (byps_exp, 0) = rtvec_alloc (n_bypassed * 2); + XEXP (byps_exp, 1) = make_numeric_value (0); + for (decl = all_insn_reservs, i = 0; + decl; +--- a/src/gcc/ada/ChangeLog.linaro ++++ b/src/gcc/ada/ChangeLog.linaro +@@ -0,0 +1,63 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ ++2014-06-25 Yvan Roux ++ ++ GCC Linaro 4.9-2014.06-1 released. ++ ++2014-06-12 Yvan Roux ++ ++ GCC Linaro 4.9-2014.06 released. ++ ++2014-05-14 Yvan Roux ++ ++ GCC Linaro 4.9-2014.05 released. ++ ++2014-05-13 Yvan Roux ++ ++ Backport from trunk r209653,209866,209871. ++ ++ 2014-04-28 Richard Henderson ++ ++ * gcc-interface/Makefile.in: Support aarch64-linux. ++ ++ 2014-04-28 Eric Botcazou ++ ++ * exp_dbug.ads (Get_External_Name): Add 'False' default to Has_Suffix, ++ add 'Suffix' parameter and adjust comment. ++ (Get_External_Name_With_Suffix): Delete. ++ * exp_dbug.adb (Get_External_Name_With_Suffix): Merge into... ++ (Get_External_Name): ...here. Add 'False' default to Has_Suffix, add ++ 'Suffix' parameter. ++ (Get_Encoded_Name): Remove 2nd argument in call to Get_External_Name. ++ Call Get_External_Name instead of Get_External_Name_With_Suffix. ++ (Get_Secondary_DT_External_Name): Likewise. ++ * exp_cg.adb (Write_Call_Info): Likewise. ++ * exp_disp.adb (Export_DT): Likewise. ++ (Import_DT): Likewise. ++ * comperr.ads (Compiler_Abort): Remove Code parameter and add From_GCC ++ parameter with False default. ++ * comperr.adb (Compiler_Abort): Likewise. Adjust accordingly. ++ * types.h (Fat_Pointer): Rename into... ++ (String_Pointer): ...this. Add comment on interfacing rules. ++ * fe.h (Compiler_Abort): Adjust for above renaming. ++ (Error_Msg_N): Likewise. ++ (Error_Msg_NE): Likewise. ++ (Get_External_Name): Likewise. Add third parameter. ++ (Get_External_Name_With_Suffix): Delete. ++ * gcc-interface/decl.c (STDCALL_PREFIX): Define. ++ (create_concat_name): Adjust call to Get_External_Name, remove call to ++ Get_External_Name_With_Suffix, use STDCALL_PREFIX, adjust for renaming. ++ * gcc-interface/trans.c (post_error): Likewise. ++ (post_error_ne): Likewise. ++ * gcc-interface/misc.c (internal_error_function): Likewise. ++ ++ 2014-04-22 Richard Henderson ++ ++ * init.c [__linux__] (HAVE_GNAT_ALTERNATE_STACK): New define. ++ (__gnat_alternate_stack): Enable for all linux except ia64. ++ ++2014-04-22 Yvan Roux ++ ++ GCC Linaro 4.9-2014.04 released. +--- a/src/gcc/fortran/ChangeLog.linaro ++++ b/src/gcc/fortran/ChangeLog.linaro +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ ++2014-06-25 Yvan Roux ++ ++ GCC Linaro 4.9-2014.06-1 released. ++ ++2014-06-12 Yvan Roux ++ ++ GCC Linaro 4.9-2014.06 released. ++ ++2014-05-14 Yvan Roux ++ ++ GCC Linaro 4.9-2014.05 released. ++ ++2014-04-22 Yvan Roux ++ ++ GCC Linaro 4.9-2014.04 released. +--- a/src/gcc/configure.ac ++++ b/src/gcc/configure.ac +@@ -809,7 +809,7 @@ + ) + AC_SUBST(CONFIGURE_SPECS) + +-ACX_PKGVERSION([GCC]) ++ACX_PKGVERSION([Linaro GCC `cat $srcdir/LINARO-VERSION`]) + ACX_BUGURL([http://gcc.gnu.org/bugs.html]) + + # Sanity check enable_languages in case someone does not run the toplevel +--- a/src/gcc/calls.c ++++ b/src/gcc/calls.c +@@ -1104,8 +1104,6 @@ + { + CUMULATIVE_ARGS *args_so_far_pnt = get_cumulative_args (args_so_far); + location_t loc = EXPR_LOCATION (exp); +- /* 1 if scanning parms front to back, -1 if scanning back to front. */ +- int inc; + + /* Count arg position in order args appear. */ + int argpos; +@@ -1116,22 +1114,9 @@ + args_size->var = 0; + + /* In this loop, we consider args in the order they are written. +- We fill up ARGS from the front or from the back if necessary +- so that in any case the first arg to be pushed ends up at the front. */ ++ We fill up ARGS from the back. */ + +- if (PUSH_ARGS_REVERSED) +- { +- i = num_actuals - 1, inc = -1; +- /* In this case, must reverse order of args +- so that we compute and push the last arg first. */ +- } +- else +- { +- i = 0, inc = 1; +- } +- +- /* First fill in the actual arguments in the ARGS array, splitting +- complex arguments if necessary. */ ++ i = num_actuals - 1; + { + int j = i; + call_expr_arg_iterator iter; +@@ -1140,7 +1125,7 @@ + if (struct_value_addr_value) + { + args[j].tree_value = struct_value_addr_value; +- j += inc; ++ j--; + } + FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) + { +@@ -1152,17 +1137,17 @@ + { + tree subtype = TREE_TYPE (argtype); + args[j].tree_value = build1 (REALPART_EXPR, subtype, arg); +- j += inc; ++ j--; + args[j].tree_value = build1 (IMAGPART_EXPR, subtype, arg); + } + else + args[j].tree_value = arg; +- j += inc; ++ j--; + } + } + + /* I counts args in order (to be) pushed; ARGPOS counts in order written. */ +- for (argpos = 0; argpos < num_actuals; i += inc, argpos++) ++ for (argpos = 0; argpos < num_actuals; i--, argpos++) + { + tree type = TREE_TYPE (args[i].tree_value); + int unsignedp; +@@ -2952,9 +2937,8 @@ + + compute_argument_addresses (args, argblock, num_actuals); + +- /* If we push args individually in reverse order, perform stack alignment +- before the first push (the last arg). */ +- if (PUSH_ARGS_REVERSED && argblock == 0 ++ /* Perform stack alignment before the first push (the last arg). */ ++ if (argblock == 0 + && adjusted_args_size.constant > reg_parm_stack_space + && adjusted_args_size.constant != unadjusted_args_size) + { +@@ -3097,12 +3081,6 @@ + sibcall_failure = 1; + } + +- /* If we pushed args in forward order, perform stack alignment +- after pushing the last arg. */ +- if (!PUSH_ARGS_REVERSED && argblock == 0) +- anti_adjust_stack (GEN_INT (adjusted_args_size.constant +- - unadjusted_args_size)); +- + /* If register arguments require space on the stack and stack space + was not preallocated, allocate stack space here for arguments + passed in registers. */ +@@ -3152,8 +3130,7 @@ + if (pass == 1 && (return_flags & ERF_RETURNS_ARG)) + { + int arg_nr = return_flags & ERF_RETURN_ARG_MASK; +- if (PUSH_ARGS_REVERSED) +- arg_nr = num_actuals - arg_nr - 1; ++ arg_nr = num_actuals - arg_nr - 1; + if (arg_nr >= 0 + && arg_nr < num_actuals + && args[arg_nr].reg +@@ -3597,7 +3574,6 @@ + isn't present here, so we default to native calling abi here. */ + tree fndecl ATTRIBUTE_UNUSED = NULL_TREE; /* library calls default to host calling abi ? */ + tree fntype ATTRIBUTE_UNUSED = NULL_TREE; /* library calls default to host calling abi ? */ +- int inc; + int count; + rtx argblock = 0; + CUMULATIVE_ARGS args_so_far_v; +@@ -3946,22 +3922,13 @@ + argblock = push_block (GEN_INT (args_size.constant), 0, 0); + } + +- /* If we push args individually in reverse order, perform stack alignment ++ /* We push args individually in reverse order, perform stack alignment + before the first push (the last arg). */ +- if (argblock == 0 && PUSH_ARGS_REVERSED) ++ if (argblock == 0) + anti_adjust_stack (GEN_INT (args_size.constant + - original_args_size.constant)); + +- if (PUSH_ARGS_REVERSED) +- { +- inc = -1; +- argnum = nargs - 1; +- } +- else +- { +- inc = 1; +- argnum = 0; +- } ++ argnum = nargs - 1; + + #ifdef REG_PARM_STACK_SPACE + if (ACCUMULATE_OUTGOING_ARGS) +@@ -3978,7 +3945,7 @@ + + /* ARGNUM indexes the ARGVEC array in the order in which the arguments + are to be pushed. */ +- for (count = 0; count < nargs; count++, argnum += inc) ++ for (count = 0; count < nargs; count++, argnum--) + { + enum machine_mode mode = argvec[argnum].mode; + rtx val = argvec[argnum].value; +@@ -4080,17 +4047,8 @@ + } + } + +- /* If we pushed args in forward order, perform stack alignment +- after pushing the last arg. */ +- if (argblock == 0 && !PUSH_ARGS_REVERSED) +- anti_adjust_stack (GEN_INT (args_size.constant +- - original_args_size.constant)); ++ argnum = nargs - 1; + +- if (PUSH_ARGS_REVERSED) +- argnum = nargs - 1; +- else +- argnum = 0; +- + fun = prepare_call_address (NULL, fun, NULL, &call_fusage, 0, 0); + + /* Now load any reg parms into their regs. */ +@@ -4097,7 +4055,7 @@ + + /* ARGNUM indexes the ARGVEC array in the order in which the arguments + are to be pushed. */ +- for (count = 0; count < nargs; count++, argnum += inc) ++ for (count = 0; count < nargs; count++, argnum--) + { + enum machine_mode mode = argvec[argnum].mode; + rtx val = argvec[argnum].value; +--- a/src/gcc/lto/ChangeLog.linaro ++++ b/src/gcc/lto/ChangeLog.linaro +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ ++2014-06-25 Yvan Roux ++ ++ GCC Linaro 4.9-2014.06-1 released. ++ ++2014-06-12 Yvan Roux ++ ++ GCC Linaro 4.9-2014.06 released. ++ ++2014-05-14 Yvan Roux ++ ++ GCC Linaro 4.9-2014.05 released. ++ ++2014-04-22 Yvan Roux ++ ++ GCC Linaro 4.9-2014.04 released. +--- a/src/gcc/po/ChangeLog.linaro ++++ b/src/gcc/po/ChangeLog.linaro +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ ++2014-06-25 Yvan Roux ++ ++ GCC Linaro 4.9-2014.06-1 released. ++ ++2014-06-12 Yvan Roux ++ ++ GCC Linaro 4.9-2014.06 released. ++ ++2014-05-14 Yvan Roux ++ ++ GCC Linaro 4.9-2014.05 released. ++ ++2014-04-22 Yvan Roux ++ ++ GCC Linaro 4.9-2014.04 released. +--- a/src/gcc/config.gcc ++++ b/src/gcc/config.gcc +@@ -311,8 +311,7 @@ + ;; + aarch64*-*-*) + cpu_type=aarch64 +- need_64bit_hwint=yes +- extra_headers="arm_neon.h" ++ extra_headers="arm_neon.h arm_acle.h" + extra_objs="aarch64-builtins.o aarch-common.o" + target_has_targetm_common=yes + ;; +--- a/src/gcc/Makefile.in ++++ b/src/gcc/Makefile.in +@@ -2798,7 +2798,7 @@ + contribute.texi compat.texi funding.texi gnu.texi gpl_v3.texi \ + fdl.texi contrib.texi cppenv.texi cppopts.texi avr-mmcu.texi \ + implement-c.texi implement-cxx.texi arm-neon-intrinsics.texi \ +- arm-acle-intrinsics.texi ++ arm-acle-intrinsics.texi aarch64-acle-intrinsics.texi + + # we explicitly use $(srcdir)/doc/tm.texi here to avoid confusion with + # the generated tm.texi; the latter might have a more recent timestamp, +--- a/src/gcc/config/host-linux.c ++++ b/src/gcc/config/host-linux.c +@@ -86,6 +86,8 @@ + # define TRY_EMPTY_VM_SPACE 0x60000000 + #elif defined(__mc68000__) + # define TRY_EMPTY_VM_SPACE 0x40000000 ++#elif defined(__aarch64__) && defined(__ILP32__) ++# define TRY_EMPTY_VM_SPACE 0x60000000 + #elif defined(__aarch64__) + # define TRY_EMPTY_VM_SPACE 0x1000000000 + #elif defined(__ARM_EABI__) +--- a/src/gcc/config/aarch64/aarch64-simd.md ++++ b/src/gcc/config/aarch64/aarch64-simd.md +@@ -19,8 +19,8 @@ + ;; . + + (define_expand "mov" +- [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "") +- (match_operand:VALL 1 "aarch64_simd_general_operand" ""))] ++ [(set (match_operand:VALL 0 "nonimmediate_operand" "") ++ (match_operand:VALL 1 "general_operand" ""))] + "TARGET_SIMD" + " + if (GET_CODE (operands[0]) == MEM) +@@ -29,8 +29,8 @@ + ) + + (define_expand "movmisalign" +- [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "") +- (match_operand:VALL 1 "aarch64_simd_general_operand" ""))] ++ [(set (match_operand:VALL 0 "nonimmediate_operand" "") ++ (match_operand:VALL 1 "general_operand" ""))] + "TARGET_SIMD" + { + /* This pattern is not permitted to fail during expansion: if both arguments +@@ -91,9 +91,9 @@ + ) + + (define_insn "*aarch64_simd_mov" +- [(set (match_operand:VD 0 "aarch64_simd_nonimmediate_operand" ++ [(set (match_operand:VD 0 "nonimmediate_operand" + "=w, m, w, ?r, ?w, ?r, w") +- (match_operand:VD 1 "aarch64_simd_general_operand" ++ (match_operand:VD 1 "general_operand" + "m, w, w, w, r, r, Dn"))] + "TARGET_SIMD + && (register_operand (operands[0], mode) +@@ -119,9 +119,9 @@ + ) + + (define_insn "*aarch64_simd_mov" +- [(set (match_operand:VQ 0 "aarch64_simd_nonimmediate_operand" ++ [(set (match_operand:VQ 0 "nonimmediate_operand" + "=w, m, w, ?r, ?w, ?r, w") +- (match_operand:VQ 1 "aarch64_simd_general_operand" ++ (match_operand:VQ 1 "general_operand" + "m, w, w, w, r, r, Dn"))] + "TARGET_SIMD + && (register_operand (operands[0], mode) +@@ -286,6 +286,14 @@ + [(set_attr "type" "neon_mul_")] + ) + ++(define_insn "bswap" ++ [(set (match_operand:VDQHSD 0 "register_operand" "=w") ++ (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] ++ "TARGET_SIMD" ++ "rev\\t%0., %1." ++ [(set_attr "type" "neon_rev")] ++) ++ + (define_insn "*aarch64_mul3_elt" + [(set (match_operand:VMUL 0 "register_operand" "=w") + (mult:VMUL +@@ -954,7 +962,7 @@ + dup\\t%d0, %1.d[0] + fmov\\t%d0, %1 + dup\\t%d0, %1" +- [(set_attr "type" "neon_dup,fmov,neon_dup") ++ [(set_attr "type" "neon_dup,f_mcr,neon_dup") + (set_attr "simd" "yes,*,yes") + (set_attr "fp" "*,yes,*") + (set_attr "length" "4")] +@@ -1509,7 +1517,7 @@ + ) + + ;; Vector versions of the floating-point frint patterns. +-;; Expands to btrunc, ceil, floor, nearbyint, rint, round. ++;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. + (define_insn "2" + [(set (match_operand:VDQF 0 "register_operand" "=w") + (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] +@@ -2316,6 +2324,15 @@ + DONE; + }) + ++(define_expand "aarch64_reinterpretdf" ++ [(match_operand:DF 0 "register_operand" "") ++ (match_operand:VD_RE 1 "register_operand" "")] ++ "TARGET_SIMD" ++{ ++ aarch64_simd_reinterpret (operands[0], operands[1]); ++ DONE; ++}) ++ + (define_expand "aarch64_reinterpretv16qi" + [(match_operand:V16QI 0 "register_operand" "") + (match_operand:VQ 1 "register_operand" "")] +@@ -2702,9 +2719,9 @@ + ;; q + + (define_insn "aarch64_s" +- [(set (match_operand:VSDQ_I_BHSI 0 "register_operand" "=w") +- (UNQOPS:VSDQ_I_BHSI +- (match_operand:VSDQ_I_BHSI 1 "register_operand" "w")))] ++ [(set (match_operand:VSDQ_I 0 "register_operand" "=w") ++ (UNQOPS:VSDQ_I ++ (match_operand:VSDQ_I 1 "register_operand" "w")))] + "TARGET_SIMD" + "s\\t%0, %1" + [(set_attr "type" "neon_")] +@@ -3756,26 +3773,46 @@ + ))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_SIMD" +- "@ +- cm\t%d0, %d, %d +- cm\t%d0, %d1, #0 +- #" +- "reload_completed +- /* We need to prevent the split from +- happening in the 'w' constraint cases. */ +- && GP_REGNUM_P (REGNO (operands[0])) +- && GP_REGNUM_P (REGNO (operands[1]))" +- [(const_int 0)] ++ "#" ++ "reload_completed" ++ [(set (match_operand:DI 0 "register_operand") ++ (neg:DI ++ (COMPARISONS:DI ++ (match_operand:DI 1 "register_operand") ++ (match_operand:DI 2 "aarch64_simd_reg_or_zero") ++ )))] + { +- enum machine_mode mode = SELECT_CC_MODE (, operands[1], operands[2]); +- rtx cc_reg = aarch64_gen_compare_reg (, operands[1], operands[2]); +- rtx comparison = gen_rtx_ (mode, operands[1], operands[2]); +- emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); +- DONE; ++ /* If we are in the general purpose register file, ++ we split to a sequence of comparison and store. */ ++ if (GP_REGNUM_P (REGNO (operands[0])) ++ && GP_REGNUM_P (REGNO (operands[1]))) ++ { ++ enum machine_mode mode = SELECT_CC_MODE (, operands[1], operands[2]); ++ rtx cc_reg = aarch64_gen_compare_reg (, operands[1], operands[2]); ++ rtx comparison = gen_rtx_ (mode, operands[1], operands[2]); ++ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); ++ DONE; ++ } ++ /* Otherwise, we expand to a similar pattern which does not ++ clobber CC_REGNUM. */ + } + [(set_attr "type" "neon_compare, neon_compare_zero, multiple")] + ) + ++(define_insn "*aarch64_cmdi" ++ [(set (match_operand:DI 0 "register_operand" "=w,w") ++ (neg:DI ++ (COMPARISONS:DI ++ (match_operand:DI 1 "register_operand" "w,w") ++ (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz") ++ )))] ++ "TARGET_SIMD && reload_completed" ++ "@ ++ cm\t%d0, %d, %d ++ cm\t%d0, %d1, #0" ++ [(set_attr "type" "neon_compare, neon_compare_zero")] ++) ++ + ;; cm(hs|hi) + + (define_insn "aarch64_cm" +@@ -3799,25 +3836,44 @@ + ))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_SIMD" +- "@ +- cm\t%d0, %d, %d +- #" +- "reload_completed +- /* We need to prevent the split from +- happening in the 'w' constraint cases. */ +- && GP_REGNUM_P (REGNO (operands[0])) +- && GP_REGNUM_P (REGNO (operands[1]))" +- [(const_int 0)] ++ "#" ++ "reload_completed" ++ [(set (match_operand:DI 0 "register_operand") ++ (neg:DI ++ (UCOMPARISONS:DI ++ (match_operand:DI 1 "register_operand") ++ (match_operand:DI 2 "aarch64_simd_reg_or_zero") ++ )))] + { +- enum machine_mode mode = CCmode; +- rtx cc_reg = aarch64_gen_compare_reg (, operands[1], operands[2]); +- rtx comparison = gen_rtx_ (mode, operands[1], operands[2]); +- emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); +- DONE; ++ /* If we are in the general purpose register file, ++ we split to a sequence of comparison and store. */ ++ if (GP_REGNUM_P (REGNO (operands[0])) ++ && GP_REGNUM_P (REGNO (operands[1]))) ++ { ++ enum machine_mode mode = CCmode; ++ rtx cc_reg = aarch64_gen_compare_reg (, operands[1], operands[2]); ++ rtx comparison = gen_rtx_ (mode, operands[1], operands[2]); ++ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); ++ DONE; ++ } ++ /* Otherwise, we expand to a similar pattern which does not ++ clobber CC_REGNUM. */ + } +- [(set_attr "type" "neon_compare, neon_compare_zero")] ++ [(set_attr "type" "neon_compare,multiple")] + ) + ++(define_insn "*aarch64_cmdi" ++ [(set (match_operand:DI 0 "register_operand" "=w") ++ (neg:DI ++ (UCOMPARISONS:DI ++ (match_operand:DI 1 "register_operand" "w") ++ (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w") ++ )))] ++ "TARGET_SIMD && reload_completed" ++ "cm\t%d0, %d, %d" ++ [(set_attr "type" "neon_compare")] ++) ++ + ;; cmtst + + (define_insn "aarch64_cmtst" +@@ -3843,23 +3899,44 @@ + (const_int 0)))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_SIMD" +- "@ +- cmtst\t%d0, %d1, %d2 +- #" +- "reload_completed +- /* We need to prevent the split from +- happening in the 'w' constraint cases. */ +- && GP_REGNUM_P (REGNO (operands[0])) +- && GP_REGNUM_P (REGNO (operands[1]))" +- [(const_int 0)] ++ "#" ++ "reload_completed" ++ [(set (match_operand:DI 0 "register_operand") ++ (neg:DI ++ (ne:DI ++ (and:DI ++ (match_operand:DI 1 "register_operand") ++ (match_operand:DI 2 "register_operand")) ++ (const_int 0))))] + { +- rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]); +- enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx); +- rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx); +- rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx); +- emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); +- DONE; ++ /* If we are in the general purpose register file, ++ we split to a sequence of comparison and store. */ ++ if (GP_REGNUM_P (REGNO (operands[0])) ++ && GP_REGNUM_P (REGNO (operands[1]))) ++ { ++ rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]); ++ enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx); ++ rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx); ++ rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx); ++ emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); ++ DONE; ++ } ++ /* Otherwise, we expand to a similar pattern which does not ++ clobber CC_REGNUM. */ + } ++ [(set_attr "type" "neon_tst,multiple")] ++) ++ ++(define_insn "*aarch64_cmtstdi" ++ [(set (match_operand:DI 0 "register_operand" "=w") ++ (neg:DI ++ (ne:DI ++ (and:DI ++ (match_operand:DI 1 "register_operand" "w") ++ (match_operand:DI 2 "register_operand" "w")) ++ (const_int 0))))] ++ "TARGET_SIMD" ++ "cmtst\t%d0, %d1, %d2" + [(set_attr "type" "neon_tst")] + ) + +@@ -3950,6 +4027,17 @@ + [(set_attr "type" "neon_store2_2reg")] + ) + ++(define_insn "vec_store_lanesoi_lane" ++ [(set (match_operand: 0 "aarch64_simd_struct_operand" "=Utv") ++ (unspec: [(match_operand:OI 1 "register_operand" "w") ++ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ++ (match_operand:SI 2 "immediate_operand" "i")] ++ UNSPEC_ST2_LANE))] ++ "TARGET_SIMD" ++ "st2\\t{%S1. - %T1.}[%2], %0" ++ [(set_attr "type" "neon_store3_one_lane")] ++) ++ + (define_insn "vec_load_lanesci" + [(set (match_operand:CI 0 "register_operand" "=w") + (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") +@@ -3970,6 +4058,17 @@ + [(set_attr "type" "neon_store3_3reg")] + ) + ++(define_insn "vec_store_lanesci_lane" ++ [(set (match_operand: 0 "aarch64_simd_struct_operand" "=Utv") ++ (unspec: [(match_operand:CI 1 "register_operand" "w") ++ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ++ (match_operand:SI 2 "immediate_operand" "i")] ++ UNSPEC_ST3_LANE))] ++ "TARGET_SIMD" ++ "st3\\t{%S1. - %U1.}[%2], %0" ++ [(set_attr "type" "neon_store3_one_lane")] ++) ++ + (define_insn "vec_load_lanesxi" + [(set (match_operand:XI 0 "register_operand" "=w") + (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") +@@ -3990,6 +4089,17 @@ + [(set_attr "type" "neon_store4_4reg")] + ) + ++(define_insn "vec_store_lanesxi_lane" ++ [(set (match_operand: 0 "aarch64_simd_struct_operand" "=Utv") ++ (unspec: [(match_operand:XI 1 "register_operand" "w") ++ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ++ (match_operand:SI 2 "immediate_operand" "i")] ++ UNSPEC_ST4_LANE))] ++ "TARGET_SIMD" ++ "st4\\t{%S1. - %V1.}[%2], %0" ++ [(set_attr "type" "neon_store4_one_lane")] ++) ++ + ;; Reload patterns for AdvSIMD register list operands. + + (define_expand "mov" +@@ -4398,6 +4508,44 @@ + [(set_attr "type" "neon_permute")] + ) + ++;; Note immediate (third) operand is lane index not byte index. ++(define_insn "aarch64_ext" ++ [(set (match_operand:VALL 0 "register_operand" "=w") ++ (unspec:VALL [(match_operand:VALL 1 "register_operand" "w") ++ (match_operand:VALL 2 "register_operand" "w") ++ (match_operand:SI 3 "immediate_operand" "i")] ++ UNSPEC_EXT))] ++ "TARGET_SIMD" ++{ ++ operands[3] = GEN_INT (INTVAL (operands[3]) ++ * GET_MODE_SIZE (GET_MODE_INNER (mode))); ++ return "ext\\t%0., %1., %2., #%3"; ++} ++ [(set_attr "type" "neon_ext")] ++) ++ ++;; This exists solely to check the arguments to the corresponding __builtin. ++;; Used where we want an error for out-of-range indices which would otherwise ++;; be silently wrapped (e.g. the mask to a __builtin_shuffle). ++(define_expand "aarch64_im_lane_boundsi" ++ [(match_operand:SI 0 "immediate_operand" "i") ++ (match_operand:SI 1 "immediate_operand" "i")] ++ "TARGET_SIMD" ++{ ++ aarch64_simd_lane_bounds (operands[0], 0, INTVAL (operands[1])); ++ DONE; ++} ++) ++ ++(define_insn "aarch64_rev" ++ [(set (match_operand:VALL 0 "register_operand" "=w") ++ (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")] ++ REVERSE))] ++ "TARGET_SIMD" ++ "rev\\t%0., %1." ++ [(set_attr "type" "neon_rev")] ++) ++ + (define_insn "aarch64_st2_dreg" + [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv") + (unspec:TI [(match_operand:OI 1 "register_operand" "w") +@@ -4484,6 +4632,57 @@ + DONE; + }) + ++(define_expand "aarch64_st2_lane" ++ [(match_operand:DI 0 "register_operand" "r") ++ (match_operand:OI 1 "register_operand" "w") ++ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ++ (match_operand:SI 2 "immediate_operand")] ++ "TARGET_SIMD" ++{ ++ enum machine_mode mode = mode; ++ rtx mem = gen_rtx_MEM (mode, operands[0]); ++ operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); ++ ++ emit_insn (gen_vec_store_lanesoi_lane (mem, ++ operands[1], ++ operands[2])); ++ DONE; ++}) ++ ++(define_expand "aarch64_st3_lane" ++ [(match_operand:DI 0 "register_operand" "r") ++ (match_operand:CI 1 "register_operand" "w") ++ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ++ (match_operand:SI 2 "immediate_operand")] ++ "TARGET_SIMD" ++{ ++ enum machine_mode mode = mode; ++ rtx mem = gen_rtx_MEM (mode, operands[0]); ++ operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); ++ ++ emit_insn (gen_vec_store_lanesci_lane (mem, ++ operands[1], ++ operands[2])); ++ DONE; ++}) ++ ++(define_expand "aarch64_st4_lane" ++ [(match_operand:DI 0 "register_operand" "r") ++ (match_operand:XI 1 "register_operand" "w") ++ (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ++ (match_operand:SI 2 "immediate_operand")] ++ "TARGET_SIMD" ++{ ++ enum machine_mode mode = mode; ++ rtx mem = gen_rtx_MEM (mode, operands[0]); ++ operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); ++ ++ emit_insn (gen_vec_store_lanesxi_lane (mem, ++ operands[1], ++ operands[2])); ++ DONE; ++}) ++ + (define_expand "aarch64_st1" + [(match_operand:DI 0 "register_operand") + (match_operand:VALL 1 "register_operand")] +--- a/src/gcc/config/aarch64/predicates.md ++++ b/src/gcc/config/aarch64/predicates.md +@@ -26,6 +26,10 @@ + && GET_MODE_CLASS (GET_MODE (op)) == MODE_CC")))) + ) + ++(define_predicate "aarch64_call_insn_operand" ++ (ior (match_code "symbol_ref") ++ (match_operand 0 "register_operand"))) ++ + (define_predicate "aarch64_simd_register" + (and (match_code "reg") + (ior (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_LO_REGS") +--- a/src/gcc/config/aarch64/arm_neon.h ++++ b/src/gcc/config/aarch64/arm_neon.h +@@ -2119,29 +2119,26 @@ + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vqadd_u8 (uint8x8_t __a, uint8x8_t __b) + { +- return (uint8x8_t) __builtin_aarch64_uqaddv8qi ((int8x8_t) __a, +- (int8x8_t) __b); ++ return __builtin_aarch64_uqaddv8qi_uuu (__a, __b); + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vqadd_u16 (uint16x4_t __a, uint16x4_t __b) + { +- return (uint16x4_t) __builtin_aarch64_uqaddv4hi ((int16x4_t) __a, +- (int16x4_t) __b); ++ return __builtin_aarch64_uqaddv4hi_uuu (__a, __b); + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vqadd_u32 (uint32x2_t __a, uint32x2_t __b) + { +- return (uint32x2_t) __builtin_aarch64_uqaddv2si ((int32x2_t) __a, +- (int32x2_t) __b); ++ return __builtin_aarch64_uqaddv2si_uuu (__a, __b); + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vqadd_u64 (uint64x1_t __a, uint64x1_t __b) + { +- return (uint64x1_t) __builtin_aarch64_uqadddi ((int64x1_t) __a, +- (int64x1_t) __b); ++ return (uint64x1_t) __builtin_aarch64_uqadddi_uuu ((uint64_t) __a, ++ (uint64_t) __b); + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +@@ -2171,29 +2168,25 @@ + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vqaddq_u8 (uint8x16_t __a, uint8x16_t __b) + { +- return (uint8x16_t) __builtin_aarch64_uqaddv16qi ((int8x16_t) __a, +- (int8x16_t) __b); ++ return __builtin_aarch64_uqaddv16qi_uuu (__a, __b); + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vqaddq_u16 (uint16x8_t __a, uint16x8_t __b) + { +- return (uint16x8_t) __builtin_aarch64_uqaddv8hi ((int16x8_t) __a, +- (int16x8_t) __b); ++ return __builtin_aarch64_uqaddv8hi_uuu (__a, __b); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vqaddq_u32 (uint32x4_t __a, uint32x4_t __b) + { +- return (uint32x4_t) __builtin_aarch64_uqaddv4si ((int32x4_t) __a, +- (int32x4_t) __b); ++ return __builtin_aarch64_uqaddv4si_uuu (__a, __b); + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vqaddq_u64 (uint64x2_t __a, uint64x2_t __b) + { +- return (uint64x2_t) __builtin_aarch64_uqaddv2di ((int64x2_t) __a, +- (int64x2_t) __b); ++ return __builtin_aarch64_uqaddv2di_uuu (__a, __b); + } + + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +@@ -2223,29 +2216,26 @@ + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vqsub_u8 (uint8x8_t __a, uint8x8_t __b) + { +- return (uint8x8_t) __builtin_aarch64_uqsubv8qi ((int8x8_t) __a, +- (int8x8_t) __b); ++ return __builtin_aarch64_uqsubv8qi_uuu (__a, __b); + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vqsub_u16 (uint16x4_t __a, uint16x4_t __b) + { +- return (uint16x4_t) __builtin_aarch64_uqsubv4hi ((int16x4_t) __a, +- (int16x4_t) __b); ++ return __builtin_aarch64_uqsubv4hi_uuu (__a, __b); + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vqsub_u32 (uint32x2_t __a, uint32x2_t __b) + { +- return (uint32x2_t) __builtin_aarch64_uqsubv2si ((int32x2_t) __a, +- (int32x2_t) __b); ++ return __builtin_aarch64_uqsubv2si_uuu (__a, __b); + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vqsub_u64 (uint64x1_t __a, uint64x1_t __b) + { +- return (uint64x1_t) __builtin_aarch64_uqsubdi ((int64x1_t) __a, +- (int64x1_t) __b); ++ return (uint64x1_t) __builtin_aarch64_uqsubdi_uuu ((uint64_t) __a, ++ (uint64_t) __b); + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +@@ -2275,29 +2265,25 @@ + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vqsubq_u8 (uint8x16_t __a, uint8x16_t __b) + { +- return (uint8x16_t) __builtin_aarch64_uqsubv16qi ((int8x16_t) __a, +- (int8x16_t) __b); ++ return __builtin_aarch64_uqsubv16qi_uuu (__a, __b); + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vqsubq_u16 (uint16x8_t __a, uint16x8_t __b) + { +- return (uint16x8_t) __builtin_aarch64_uqsubv8hi ((int16x8_t) __a, +- (int16x8_t) __b); ++ return __builtin_aarch64_uqsubv8hi_uuu (__a, __b); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vqsubq_u32 (uint32x4_t __a, uint32x4_t __b) + { +- return (uint32x4_t) __builtin_aarch64_uqsubv4si ((int32x4_t) __a, +- (int32x4_t) __b); ++ return __builtin_aarch64_uqsubv4si_uuu (__a, __b); + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vqsubq_u64 (uint64x2_t __a, uint64x2_t __b) + { +- return (uint64x2_t) __builtin_aarch64_uqsubv2di ((int64x2_t) __a, +- (int64x2_t) __b); ++ return __builtin_aarch64_uqsubv2di_uuu (__a, __b); + } + + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +@@ -2318,6 +2304,12 @@ + return (int32x2_t) __builtin_aarch64_sqnegv2si (__a); + } + ++__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) ++vqneg_s64 (int64x1_t __a) ++{ ++ return __builtin_aarch64_sqnegdi (__a); ++} ++ + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) + vqnegq_s8 (int8x16_t __a) + { +@@ -2354,6 +2346,12 @@ + return (int32x2_t) __builtin_aarch64_sqabsv2si (__a); + } + ++__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) ++vqabs_s64 (int64x1_t __a) ++{ ++ return __builtin_aarch64_sqabsdi (__a); ++} ++ + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) + vqabsq_s8 (int8x16_t __a) + { +@@ -2643,1352 +2641,1587 @@ + /* vreinterpret */ + + __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) ++vreinterpret_p8_f64 (float64x1_t __a) ++{ ++ return __builtin_aarch64_reinterpretv8qidf_ps (__a); ++} ++ ++__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) + vreinterpret_p8_s8 (int8x8_t __a) + { +- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a); ++ return (poly8x8_t) __a; + } + + __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) + vreinterpret_p8_s16 (int16x4_t __a) + { +- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a); ++ return (poly8x8_t) __a; + } + + __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) + vreinterpret_p8_s32 (int32x2_t __a) + { +- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a); ++ return (poly8x8_t) __a; + } + + __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) + vreinterpret_p8_s64 (int64x1_t __a) + { +- return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi (__a); ++ return (poly8x8_t) __a; + } + + __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) + vreinterpret_p8_f32 (float32x2_t __a) + { +- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a); ++ return (poly8x8_t) __a; + } + + __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) + vreinterpret_p8_u8 (uint8x8_t __a) + { +- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); ++ return (poly8x8_t) __a; + } + + __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) + vreinterpret_p8_u16 (uint16x4_t __a) + { +- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); ++ return (poly8x8_t) __a; + } + + __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) + vreinterpret_p8_u32 (uint32x2_t __a) + { +- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a); ++ return (poly8x8_t) __a; + } + + __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) + vreinterpret_p8_u64 (uint64x1_t __a) + { +- return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a); ++ return (poly8x8_t) __a; + } + + __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) + vreinterpret_p8_p16 (poly16x4_t __a) + { +- return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); ++ return (poly8x8_t) __a; + } + + __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) ++vreinterpretq_p8_f64 (float64x2_t __a) ++{ ++ return (poly8x16_t) __a; ++} ++ ++__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_p8_s8 (int8x16_t __a) + { +- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a); ++ return (poly8x16_t) __a; + } + + __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_p8_s16 (int16x8_t __a) + { +- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a); ++ return (poly8x16_t) __a; + } + + __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_p8_s32 (int32x4_t __a) + { +- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a); ++ return (poly8x16_t) __a; + } + + __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_p8_s64 (int64x2_t __a) + { +- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a); ++ return (poly8x16_t) __a; + } + + __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_p8_f32 (float32x4_t __a) + { +- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a); ++ return (poly8x16_t) __a; + } + + __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_p8_u8 (uint8x16_t __a) + { +- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) +- __a); ++ return (poly8x16_t) __a; + } + + __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_p8_u16 (uint16x8_t __a) + { +- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) +- __a); ++ return (poly8x16_t) __a; + } + + __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_p8_u32 (uint32x4_t __a) + { +- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) +- __a); ++ return (poly8x16_t) __a; + } + + __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_p8_u64 (uint64x2_t __a) + { +- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) +- __a); ++ return (poly8x16_t) __a; + } + + __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_p8_p16 (poly16x8_t __a) + { +- return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) +- __a); ++ return (poly8x16_t) __a; + } + + __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) ++vreinterpret_p16_f64 (float64x1_t __a) ++{ ++ return __builtin_aarch64_reinterpretv4hidf_ps (__a); ++} ++ ++__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) + vreinterpret_p16_s8 (int8x8_t __a) + { +- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a); ++ return (poly16x4_t) __a; + } + + __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) + vreinterpret_p16_s16 (int16x4_t __a) + { +- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a); ++ return (poly16x4_t) __a; + } + + __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) + vreinterpret_p16_s32 (int32x2_t __a) + { +- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a); ++ return (poly16x4_t) __a; + } + + __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) + vreinterpret_p16_s64 (int64x1_t __a) + { +- return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi (__a); ++ return (poly16x4_t) __a; + } + + __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) + vreinterpret_p16_f32 (float32x2_t __a) + { +- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a); ++ return (poly16x4_t) __a; + } + + __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) + vreinterpret_p16_u8 (uint8x8_t __a) + { +- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); ++ return (poly16x4_t) __a; + } + + __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) + vreinterpret_p16_u16 (uint16x4_t __a) + { +- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); ++ return (poly16x4_t) __a; + } + + __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) + vreinterpret_p16_u32 (uint32x2_t __a) + { +- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a); ++ return (poly16x4_t) __a; + } + + __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) + vreinterpret_p16_u64 (uint64x1_t __a) + { +- return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a); ++ return (poly16x4_t) __a; + } + + __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) + vreinterpret_p16_p8 (poly8x8_t __a) + { +- return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); ++ return (poly16x4_t) __a; + } + + __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) ++vreinterpretq_p16_f64 (float64x2_t __a) ++{ ++ return (poly16x8_t) __a; ++} ++ ++__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_p16_s8 (int8x16_t __a) + { +- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a); ++ return (poly16x8_t) __a; + } + + __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_p16_s16 (int16x8_t __a) + { +- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a); ++ return (poly16x8_t) __a; + } + + __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_p16_s32 (int32x4_t __a) + { +- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a); ++ return (poly16x8_t) __a; + } + + __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_p16_s64 (int64x2_t __a) + { +- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a); ++ return (poly16x8_t) __a; + } + + __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_p16_f32 (float32x4_t __a) + { +- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a); ++ return (poly16x8_t) __a; + } + + __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_p16_u8 (uint8x16_t __a) + { +- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) +- __a); ++ return (poly16x8_t) __a; + } + + __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_p16_u16 (uint16x8_t __a) + { +- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); ++ return (poly16x8_t) __a; + } + + __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_p16_u32 (uint32x4_t __a) + { +- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a); ++ return (poly16x8_t) __a; + } + + __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_p16_u64 (uint64x2_t __a) + { +- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a); ++ return (poly16x8_t) __a; + } + + __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_p16_p8 (poly8x16_t __a) + { +- return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) +- __a); ++ return (poly16x8_t) __a; + } + + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) ++vreinterpret_f32_f64 (float64x1_t __a) ++{ ++ return __builtin_aarch64_reinterpretv2sfdf (__a); ++} ++ ++__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vreinterpret_f32_s8 (int8x8_t __a) + { +- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi (__a); ++ return (float32x2_t) __a; + } + + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vreinterpret_f32_s16 (int16x4_t __a) + { +- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi (__a); ++ return (float32x2_t) __a; + } + + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vreinterpret_f32_s32 (int32x2_t __a) + { +- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si (__a); ++ return (float32x2_t) __a; + } + + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vreinterpret_f32_s64 (int64x1_t __a) + { +- return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi (__a); ++ return (float32x2_t) __a; + } + + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vreinterpret_f32_u8 (uint8x8_t __a) + { +- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a); ++ return (float32x2_t) __a; + } + + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vreinterpret_f32_u16 (uint16x4_t __a) + { +- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t) +- __a); ++ return (float32x2_t) __a; + } + + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vreinterpret_f32_u32 (uint32x2_t __a) + { +- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si ((int32x2_t) +- __a); ++ return (float32x2_t) __a; + } + + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vreinterpret_f32_u64 (uint64x1_t __a) + { +- return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi ((int64x1_t) __a); ++ return (float32x2_t) __a; + } + + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vreinterpret_f32_p8 (poly8x8_t __a) + { +- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a); ++ return (float32x2_t) __a; + } + + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vreinterpret_f32_p16 (poly16x4_t __a) + { +- return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t) +- __a); ++ return (float32x2_t) __a; + } + + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) ++vreinterpretq_f32_f64 (float64x2_t __a) ++{ ++ return (float32x4_t) __a; ++} ++ ++__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_f32_s8 (int8x16_t __a) + { +- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi (__a); ++ return (float32x4_t) __a; + } + + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_f32_s16 (int16x8_t __a) + { +- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi (__a); ++ return (float32x4_t) __a; + } + + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_f32_s32 (int32x4_t __a) + { +- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si (__a); ++ return (float32x4_t) __a; + } + + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_f32_s64 (int64x2_t __a) + { +- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di (__a); ++ return (float32x4_t) __a; + } + + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_f32_u8 (uint8x16_t __a) + { +- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t) +- __a); ++ return (float32x4_t) __a; + } + + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_f32_u16 (uint16x8_t __a) + { +- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t) +- __a); ++ return (float32x4_t) __a; + } + + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_f32_u32 (uint32x4_t __a) + { +- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si ((int32x4_t) +- __a); ++ return (float32x4_t) __a; + } + + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_f32_u64 (uint64x2_t __a) + { +- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di ((int64x2_t) +- __a); ++ return (float32x4_t) __a; + } + + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_f32_p8 (poly8x16_t __a) + { +- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t) +- __a); ++ return (float32x4_t) __a; + } + + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_f32_p16 (poly16x8_t __a) + { +- return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t) +- __a); ++ return (float32x4_t) __a; + } + ++__extension__ static __inline float64x1_t __attribute__((__always_inline__)) ++vreinterpret_f64_f32 (float32x2_t __a) ++{ ++ return __builtin_aarch64_reinterpretdfv2sf (__a); ++} ++ ++__extension__ static __inline float64x1_t __attribute__((__always_inline__)) ++vreinterpret_f64_p8 (poly8x8_t __a) ++{ ++ return __builtin_aarch64_reinterpretdfv8qi_sp (__a); ++} ++ ++__extension__ static __inline float64x1_t __attribute__((__always_inline__)) ++vreinterpret_f64_p16 (poly16x4_t __a) ++{ ++ return __builtin_aarch64_reinterpretdfv4hi_sp (__a); ++} ++ ++__extension__ static __inline float64x1_t __attribute__((__always_inline__)) ++vreinterpret_f64_s8 (int8x8_t __a) ++{ ++ return __builtin_aarch64_reinterpretdfv8qi (__a); ++} ++ ++__extension__ static __inline float64x1_t __attribute__((__always_inline__)) ++vreinterpret_f64_s16 (int16x4_t __a) ++{ ++ return __builtin_aarch64_reinterpretdfv4hi (__a); ++} ++ ++__extension__ static __inline float64x1_t __attribute__((__always_inline__)) ++vreinterpret_f64_s32 (int32x2_t __a) ++{ ++ return __builtin_aarch64_reinterpretdfv2si (__a); ++} ++ ++__extension__ static __inline float64x1_t __attribute__((__always_inline__)) ++vreinterpret_f64_s64 (int64x1_t __a) ++{ ++ return __builtin_aarch64_createdf ((uint64_t) vget_lane_s64 (__a, 0)); ++} ++ ++__extension__ static __inline float64x1_t __attribute__((__always_inline__)) ++vreinterpret_f64_u8 (uint8x8_t __a) ++{ ++ return __builtin_aarch64_reinterpretdfv8qi_su (__a); ++} ++ ++__extension__ static __inline float64x1_t __attribute__((__always_inline__)) ++vreinterpret_f64_u16 (uint16x4_t __a) ++{ ++ return __builtin_aarch64_reinterpretdfv4hi_su (__a); ++} ++ ++__extension__ static __inline float64x1_t __attribute__((__always_inline__)) ++vreinterpret_f64_u32 (uint32x2_t __a) ++{ ++ return __builtin_aarch64_reinterpretdfv2si_su (__a); ++} ++ ++__extension__ static __inline float64x1_t __attribute__((__always_inline__)) ++vreinterpret_f64_u64 (uint64x1_t __a) ++{ ++ return __builtin_aarch64_createdf (vget_lane_u64 (__a, 0)); ++} ++ ++__extension__ static __inline float64x2_t __attribute__((__always_inline__)) ++vreinterpretq_f64_f32 (float32x4_t __a) ++{ ++ return (float64x2_t) __a; ++} ++ ++__extension__ static __inline float64x2_t __attribute__((__always_inline__)) ++vreinterpretq_f64_p8 (poly8x16_t __a) ++{ ++ return (float64x2_t) __a; ++} ++ ++__extension__ static __inline float64x2_t __attribute__((__always_inline__)) ++vreinterpretq_f64_p16 (poly16x8_t __a) ++{ ++ return (float64x2_t) __a; ++} ++ ++__extension__ static __inline float64x2_t __attribute__((__always_inline__)) ++vreinterpretq_f64_s8 (int8x16_t __a) ++{ ++ return (float64x2_t) __a; ++} ++ ++__extension__ static __inline float64x2_t __attribute__((__always_inline__)) ++vreinterpretq_f64_s16 (int16x8_t __a) ++{ ++ return (float64x2_t) __a; ++} ++ ++__extension__ static __inline float64x2_t __attribute__((__always_inline__)) ++vreinterpretq_f64_s32 (int32x4_t __a) ++{ ++ return (float64x2_t) __a; ++} ++ ++__extension__ static __inline float64x2_t __attribute__((__always_inline__)) ++vreinterpretq_f64_s64 (int64x2_t __a) ++{ ++ return (float64x2_t) __a; ++} ++ ++__extension__ static __inline float64x2_t __attribute__((__always_inline__)) ++vreinterpretq_f64_u8 (uint8x16_t __a) ++{ ++ return (float64x2_t) __a; ++} ++ ++__extension__ static __inline float64x2_t __attribute__((__always_inline__)) ++vreinterpretq_f64_u16 (uint16x8_t __a) ++{ ++ return (float64x2_t) __a; ++} ++ ++__extension__ static __inline float64x2_t __attribute__((__always_inline__)) ++vreinterpretq_f64_u32 (uint32x4_t __a) ++{ ++ return (float64x2_t) __a; ++} ++ ++__extension__ static __inline float64x2_t __attribute__((__always_inline__)) ++vreinterpretq_f64_u64 (uint64x2_t __a) ++{ ++ return (float64x2_t) __a; ++} ++ + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) ++vreinterpret_s64_f64 (float64x1_t __a) ++{ ++ return __builtin_aarch64_reinterpretdidf (__a); ++} ++ ++__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) + vreinterpret_s64_s8 (int8x8_t __a) + { +- return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a); ++ return (int64x1_t) __a; + } + + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) + vreinterpret_s64_s16 (int16x4_t __a) + { +- return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a); ++ return (int64x1_t) __a; + } + + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) + vreinterpret_s64_s32 (int32x2_t __a) + { +- return (int64x1_t) __builtin_aarch64_reinterpretdiv2si (__a); ++ return (int64x1_t) __a; + } + + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) + vreinterpret_s64_f32 (float32x2_t __a) + { +- return (int64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a); ++ return (int64x1_t) __a; + } + + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) + vreinterpret_s64_u8 (uint8x8_t __a) + { +- return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); ++ return (int64x1_t) __a; + } + + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) + vreinterpret_s64_u16 (uint16x4_t __a) + { +- return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); ++ return (int64x1_t) __a; + } + + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) + vreinterpret_s64_u32 (uint32x2_t __a) + { +- return (int64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a); ++ return (int64x1_t) __a; + } + + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) + vreinterpret_s64_u64 (uint64x1_t __a) + { +- return (int64x1_t) __builtin_aarch64_reinterpretdidi ((int64x1_t) __a); ++ return (int64x1_t) __a; + } + + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) + vreinterpret_s64_p8 (poly8x8_t __a) + { +- return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); ++ return (int64x1_t) __a; + } + + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) + vreinterpret_s64_p16 (poly16x4_t __a) + { +- return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); ++ return (int64x1_t) __a; + } + + __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) ++vreinterpretq_s64_f64 (float64x2_t __a) ++{ ++ return (int64x2_t) __a; ++} ++ ++__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) + vreinterpretq_s64_s8 (int8x16_t __a) + { +- return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a); ++ return (int64x2_t) __a; + } + + __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) + vreinterpretq_s64_s16 (int16x8_t __a) + { +- return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a); ++ return (int64x2_t) __a; + } + + __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) + vreinterpretq_s64_s32 (int32x4_t __a) + { +- return (int64x2_t) __builtin_aarch64_reinterpretv2div4si (__a); ++ return (int64x2_t) __a; + } + + __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) + vreinterpretq_s64_f32 (float32x4_t __a) + { +- return (int64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a); ++ return (int64x2_t) __a; + } + + __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) + vreinterpretq_s64_u8 (uint8x16_t __a) + { +- return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a); ++ return (int64x2_t) __a; + } + + __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) + vreinterpretq_s64_u16 (uint16x8_t __a) + { +- return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); ++ return (int64x2_t) __a; + } + + __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) + vreinterpretq_s64_u32 (uint32x4_t __a) + { +- return (int64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a); ++ return (int64x2_t) __a; + } + + __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) + vreinterpretq_s64_u64 (uint64x2_t __a) + { +- return (int64x2_t) __builtin_aarch64_reinterpretv2div2di ((int64x2_t) __a); ++ return (int64x2_t) __a; + } + + __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) + vreinterpretq_s64_p8 (poly8x16_t __a) + { +- return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a); ++ return (int64x2_t) __a; + } + + __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) + vreinterpretq_s64_p16 (poly16x8_t __a) + { +- return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); ++ return (int64x2_t) __a; + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) ++vreinterpret_u64_f64 (float64x1_t __a) ++{ ++ return __builtin_aarch64_reinterpretdidf_us (__a); ++} ++ ++__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vreinterpret_u64_s8 (int8x8_t __a) + { +- return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a); ++ return (uint64x1_t) __a; + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vreinterpret_u64_s16 (int16x4_t __a) + { +- return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a); ++ return (uint64x1_t) __a; + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vreinterpret_u64_s32 (int32x2_t __a) + { +- return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si (__a); ++ return (uint64x1_t) __a; + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vreinterpret_u64_s64 (int64x1_t __a) + { +- return (uint64x1_t) __builtin_aarch64_reinterpretdidi (__a); ++ return (uint64x1_t) __a; + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vreinterpret_u64_f32 (float32x2_t __a) + { +- return (uint64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a); ++ return (uint64x1_t) __a; + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vreinterpret_u64_u8 (uint8x8_t __a) + { +- return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); ++ return (uint64x1_t) __a; + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vreinterpret_u64_u16 (uint16x4_t __a) + { +- return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); ++ return (uint64x1_t) __a; + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vreinterpret_u64_u32 (uint32x2_t __a) + { +- return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a); ++ return (uint64x1_t) __a; + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vreinterpret_u64_p8 (poly8x8_t __a) + { +- return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); ++ return (uint64x1_t) __a; + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vreinterpret_u64_p16 (poly16x4_t __a) + { +- return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); ++ return (uint64x1_t) __a; + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) ++vreinterpretq_u64_f64 (float64x2_t __a) ++{ ++ return (uint64x2_t) __a; ++} ++ ++__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vreinterpretq_u64_s8 (int8x16_t __a) + { +- return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a); ++ return (uint64x2_t) __a; + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vreinterpretq_u64_s16 (int16x8_t __a) + { +- return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a); ++ return (uint64x2_t) __a; + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vreinterpretq_u64_s32 (int32x4_t __a) + { +- return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si (__a); ++ return (uint64x2_t) __a; + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vreinterpretq_u64_s64 (int64x2_t __a) + { +- return (uint64x2_t) __builtin_aarch64_reinterpretv2div2di (__a); ++ return (uint64x2_t) __a; + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vreinterpretq_u64_f32 (float32x4_t __a) + { +- return (uint64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a); ++ return (uint64x2_t) __a; + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vreinterpretq_u64_u8 (uint8x16_t __a) + { +- return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) +- __a); ++ return (uint64x2_t) __a; + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vreinterpretq_u64_u16 (uint16x8_t __a) + { +- return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); ++ return (uint64x2_t) __a; + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vreinterpretq_u64_u32 (uint32x4_t __a) + { +- return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a); ++ return (uint64x2_t) __a; + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vreinterpretq_u64_p8 (poly8x16_t __a) + { +- return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) +- __a); ++ return (uint64x2_t) __a; + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vreinterpretq_u64_p16 (poly16x8_t __a) + { +- return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); ++ return (uint64x2_t) __a; + } + + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) ++vreinterpret_s8_f64 (float64x1_t __a) ++{ ++ return __builtin_aarch64_reinterpretv8qidf (__a); ++} ++ ++__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) + vreinterpret_s8_s16 (int16x4_t __a) + { +- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a); ++ return (int8x8_t) __a; + } + + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) + vreinterpret_s8_s32 (int32x2_t __a) + { +- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a); ++ return (int8x8_t) __a; + } + + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) + vreinterpret_s8_s64 (int64x1_t __a) + { +- return (int8x8_t) __builtin_aarch64_reinterpretv8qidi (__a); ++ return (int8x8_t) __a; + } + + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) + vreinterpret_s8_f32 (float32x2_t __a) + { +- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a); ++ return (int8x8_t) __a; + } + + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) + vreinterpret_s8_u8 (uint8x8_t __a) + { +- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); ++ return (int8x8_t) __a; + } + + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) + vreinterpret_s8_u16 (uint16x4_t __a) + { +- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); ++ return (int8x8_t) __a; + } + + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) + vreinterpret_s8_u32 (uint32x2_t __a) + { +- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a); ++ return (int8x8_t) __a; + } + + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) + vreinterpret_s8_u64 (uint64x1_t __a) + { +- return (int8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a); ++ return (int8x8_t) __a; + } + + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) + vreinterpret_s8_p8 (poly8x8_t __a) + { +- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); ++ return (int8x8_t) __a; + } + + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) + vreinterpret_s8_p16 (poly16x4_t __a) + { +- return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); ++ return (int8x8_t) __a; + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) ++vreinterpretq_s8_f64 (float64x2_t __a) ++{ ++ return (int8x16_t) __a; ++} ++ ++__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_s8_s16 (int16x8_t __a) + { +- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a); ++ return (int8x16_t) __a; + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_s8_s32 (int32x4_t __a) + { +- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a); ++ return (int8x16_t) __a; + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_s8_s64 (int64x2_t __a) + { +- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a); ++ return (int8x16_t) __a; + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_s8_f32 (float32x4_t __a) + { +- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a); ++ return (int8x16_t) __a; + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_s8_u8 (uint8x16_t __a) + { +- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) +- __a); ++ return (int8x16_t) __a; + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_s8_u16 (uint16x8_t __a) + { +- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a); ++ return (int8x16_t) __a; + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_s8_u32 (uint32x4_t __a) + { +- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) __a); ++ return (int8x16_t) __a; + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_s8_u64 (uint64x2_t __a) + { +- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) __a); ++ return (int8x16_t) __a; + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_s8_p8 (poly8x16_t __a) + { +- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) +- __a); ++ return (int8x16_t) __a; + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_s8_p16 (poly16x8_t __a) + { +- return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a); ++ return (int8x16_t) __a; + } + + __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) ++vreinterpret_s16_f64 (float64x1_t __a) ++{ ++ return __builtin_aarch64_reinterpretv4hidf (__a); ++} ++ ++__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) + vreinterpret_s16_s8 (int8x8_t __a) + { +- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a); ++ return (int16x4_t) __a; + } + + __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) + vreinterpret_s16_s32 (int32x2_t __a) + { +- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a); ++ return (int16x4_t) __a; + } + + __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) + vreinterpret_s16_s64 (int64x1_t __a) + { +- return (int16x4_t) __builtin_aarch64_reinterpretv4hidi (__a); ++ return (int16x4_t) __a; + } + + __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) + vreinterpret_s16_f32 (float32x2_t __a) + { +- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a); ++ return (int16x4_t) __a; + } + + __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) + vreinterpret_s16_u8 (uint8x8_t __a) + { +- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); ++ return (int16x4_t) __a; + } + + __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) + vreinterpret_s16_u16 (uint16x4_t __a) + { +- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); ++ return (int16x4_t) __a; + } + + __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) + vreinterpret_s16_u32 (uint32x2_t __a) + { +- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a); ++ return (int16x4_t) __a; + } + + __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) + vreinterpret_s16_u64 (uint64x1_t __a) + { +- return (int16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a); ++ return (int16x4_t) __a; + } + + __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) + vreinterpret_s16_p8 (poly8x8_t __a) + { +- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); ++ return (int16x4_t) __a; + } + + __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) + vreinterpret_s16_p16 (poly16x4_t __a) + { +- return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); ++ return (int16x4_t) __a; + } + + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) ++vreinterpretq_s16_f64 (float64x2_t __a) ++{ ++ return (int16x8_t) __a; ++} ++ ++__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_s16_s8 (int8x16_t __a) + { +- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a); ++ return (int16x8_t) __a; + } + + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_s16_s32 (int32x4_t __a) + { +- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a); ++ return (int16x8_t) __a; + } + + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_s16_s64 (int64x2_t __a) + { +- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a); ++ return (int16x8_t) __a; + } + + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_s16_f32 (float32x4_t __a) + { +- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a); ++ return (int16x8_t) __a; + } + + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_s16_u8 (uint8x16_t __a) + { +- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a); ++ return (int16x8_t) __a; + } + + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_s16_u16 (uint16x8_t __a) + { +- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); ++ return (int16x8_t) __a; + } + + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_s16_u32 (uint32x4_t __a) + { +- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a); ++ return (int16x8_t) __a; + } + + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_s16_u64 (uint64x2_t __a) + { +- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a); ++ return (int16x8_t) __a; + } + + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_s16_p8 (poly8x16_t __a) + { +- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a); ++ return (int16x8_t) __a; + } + + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_s16_p16 (poly16x8_t __a) + { +- return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); ++ return (int16x8_t) __a; + } + + __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) ++vreinterpret_s32_f64 (float64x1_t __a) ++{ ++ return __builtin_aarch64_reinterpretv2sidf (__a); ++} ++ ++__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) + vreinterpret_s32_s8 (int8x8_t __a) + { +- return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a); ++ return (int32x2_t) __a; + } + + __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) + vreinterpret_s32_s16 (int16x4_t __a) + { +- return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a); ++ return (int32x2_t) __a; + } + + __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) + vreinterpret_s32_s64 (int64x1_t __a) + { +- return (int32x2_t) __builtin_aarch64_reinterpretv2sidi (__a); ++ return (int32x2_t) __a; + } + + __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) + vreinterpret_s32_f32 (float32x2_t __a) + { +- return (int32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a); ++ return (int32x2_t) __a; + } + + __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) + vreinterpret_s32_u8 (uint8x8_t __a) + { +- return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); ++ return (int32x2_t) __a; + } + + __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) + vreinterpret_s32_u16 (uint16x4_t __a) + { +- return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); ++ return (int32x2_t) __a; + } + + __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) + vreinterpret_s32_u32 (uint32x2_t __a) + { +- return (int32x2_t) __builtin_aarch64_reinterpretv2siv2si ((int32x2_t) __a); ++ return (int32x2_t) __a; + } + + __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) + vreinterpret_s32_u64 (uint64x1_t __a) + { +- return (int32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a); ++ return (int32x2_t) __a; + } + + __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) + vreinterpret_s32_p8 (poly8x8_t __a) + { +- return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); ++ return (int32x2_t) __a; + } + + __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) + vreinterpret_s32_p16 (poly16x4_t __a) + { +- return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); ++ return (int32x2_t) __a; + } + + __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) ++vreinterpretq_s32_f64 (float64x2_t __a) ++{ ++ return (int32x4_t) __a; ++} ++ ++__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_s32_s8 (int8x16_t __a) + { +- return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a); ++ return (int32x4_t) __a; + } + + __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_s32_s16 (int16x8_t __a) + { +- return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a); ++ return (int32x4_t) __a; + } + + __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_s32_s64 (int64x2_t __a) + { +- return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a); ++ return (int32x4_t) __a; + } + + __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_s32_f32 (float32x4_t __a) + { +- return (int32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a); ++ return (int32x4_t) __a; + } + + __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_s32_u8 (uint8x16_t __a) + { +- return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a); ++ return (int32x4_t) __a; + } + + __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_s32_u16 (uint16x8_t __a) + { +- return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); ++ return (int32x4_t) __a; + } + + __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_s32_u32 (uint32x4_t __a) + { +- return (int32x4_t) __builtin_aarch64_reinterpretv4siv4si ((int32x4_t) __a); ++ return (int32x4_t) __a; + } + + __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_s32_u64 (uint64x2_t __a) + { +- return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a); ++ return (int32x4_t) __a; + } + + __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_s32_p8 (poly8x16_t __a) + { +- return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a); ++ return (int32x4_t) __a; + } + + __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_s32_p16 (poly16x8_t __a) + { +- return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); ++ return (int32x4_t) __a; + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) ++vreinterpret_u8_f64 (float64x1_t __a) ++{ ++ return __builtin_aarch64_reinterpretv8qidf_us (__a); ++} ++ ++__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vreinterpret_u8_s8 (int8x8_t __a) + { +- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a); ++ return (uint8x8_t) __a; + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vreinterpret_u8_s16 (int16x4_t __a) + { +- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a); ++ return (uint8x8_t) __a; + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vreinterpret_u8_s32 (int32x2_t __a) + { +- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a); ++ return (uint8x8_t) __a; + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vreinterpret_u8_s64 (int64x1_t __a) + { +- return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi (__a); ++ return (uint8x8_t) __a; + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vreinterpret_u8_f32 (float32x2_t __a) + { +- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a); ++ return (uint8x8_t) __a; + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vreinterpret_u8_u16 (uint16x4_t __a) + { +- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); ++ return (uint8x8_t) __a; + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vreinterpret_u8_u32 (uint32x2_t __a) + { +- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a); ++ return (uint8x8_t) __a; + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vreinterpret_u8_u64 (uint64x1_t __a) + { +- return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a); ++ return (uint8x8_t) __a; + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vreinterpret_u8_p8 (poly8x8_t __a) + { +- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); ++ return (uint8x8_t) __a; + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vreinterpret_u8_p16 (poly16x4_t __a) + { +- return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); ++ return (uint8x8_t) __a; + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) ++vreinterpretq_u8_f64 (float64x2_t __a) ++{ ++ return (uint8x16_t) __a; ++} ++ ++__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_u8_s8 (int8x16_t __a) + { +- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a); ++ return (uint8x16_t) __a; + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_u8_s16 (int16x8_t __a) + { +- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a); ++ return (uint8x16_t) __a; + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_u8_s32 (int32x4_t __a) + { +- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a); ++ return (uint8x16_t) __a; + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_u8_s64 (int64x2_t __a) + { +- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a); ++ return (uint8x16_t) __a; + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_u8_f32 (float32x4_t __a) + { +- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a); ++ return (uint8x16_t) __a; + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_u8_u16 (uint16x8_t __a) + { +- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) +- __a); ++ return (uint8x16_t) __a; + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_u8_u32 (uint32x4_t __a) + { +- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) +- __a); ++ return (uint8x16_t) __a; + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_u8_u64 (uint64x2_t __a) + { +- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) +- __a); ++ return (uint8x16_t) __a; + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_u8_p8 (poly8x16_t __a) + { +- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) +- __a); ++ return (uint8x16_t) __a; + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vreinterpretq_u8_p16 (poly16x8_t __a) + { +- return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) +- __a); ++ return (uint8x16_t) __a; + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) ++vreinterpret_u16_f64 (float64x1_t __a) ++{ ++ return __builtin_aarch64_reinterpretv4hidf_us (__a); ++} ++ ++__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vreinterpret_u16_s8 (int8x8_t __a) + { +- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a); ++ return (uint16x4_t) __a; + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vreinterpret_u16_s16 (int16x4_t __a) + { +- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a); ++ return (uint16x4_t) __a; + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vreinterpret_u16_s32 (int32x2_t __a) + { +- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a); ++ return (uint16x4_t) __a; + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vreinterpret_u16_s64 (int64x1_t __a) + { +- return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi (__a); ++ return (uint16x4_t) __a; + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vreinterpret_u16_f32 (float32x2_t __a) + { +- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a); ++ return (uint16x4_t) __a; + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vreinterpret_u16_u8 (uint8x8_t __a) + { +- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); ++ return (uint16x4_t) __a; + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vreinterpret_u16_u32 (uint32x2_t __a) + { +- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a); ++ return (uint16x4_t) __a; + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vreinterpret_u16_u64 (uint64x1_t __a) + { +- return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a); ++ return (uint16x4_t) __a; + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vreinterpret_u16_p8 (poly8x8_t __a) + { +- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); ++ return (uint16x4_t) __a; + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vreinterpret_u16_p16 (poly16x4_t __a) + { +- return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); ++ return (uint16x4_t) __a; + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) ++vreinterpretq_u16_f64 (float64x2_t __a) ++{ ++ return (uint16x8_t) __a; ++} ++ ++__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_u16_s8 (int8x16_t __a) + { +- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a); ++ return (uint16x8_t) __a; + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_u16_s16 (int16x8_t __a) + { +- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a); ++ return (uint16x8_t) __a; + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_u16_s32 (int32x4_t __a) + { +- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a); ++ return (uint16x8_t) __a; + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_u16_s64 (int64x2_t __a) + { +- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a); ++ return (uint16x8_t) __a; + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_u16_f32 (float32x4_t __a) + { +- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a); ++ return (uint16x8_t) __a; + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_u16_u8 (uint8x16_t __a) + { +- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) +- __a); ++ return (uint16x8_t) __a; + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_u16_u32 (uint32x4_t __a) + { +- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a); ++ return (uint16x8_t) __a; + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_u16_u64 (uint64x2_t __a) + { +- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a); ++ return (uint16x8_t) __a; + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_u16_p8 (poly8x16_t __a) + { +- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) +- __a); ++ return (uint16x8_t) __a; + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vreinterpretq_u16_p16 (poly16x8_t __a) + { +- return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); ++ return (uint16x8_t) __a; + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) ++vreinterpret_u32_f64 (float64x1_t __a) ++{ ++ return __builtin_aarch64_reinterpretv2sidf_us (__a); ++} ++ ++__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vreinterpret_u32_s8 (int8x8_t __a) + { +- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a); ++ return (uint32x2_t) __a; + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vreinterpret_u32_s16 (int16x4_t __a) + { +- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a); ++ return (uint32x2_t) __a; + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vreinterpret_u32_s32 (int32x2_t __a) + { +- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2si (__a); ++ return (uint32x2_t) __a; + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vreinterpret_u32_s64 (int64x1_t __a) + { +- return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi (__a); ++ return (uint32x2_t) __a; + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vreinterpret_u32_f32 (float32x2_t __a) + { +- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a); ++ return (uint32x2_t) __a; + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vreinterpret_u32_u8 (uint8x8_t __a) + { +- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); ++ return (uint32x2_t) __a; + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vreinterpret_u32_u16 (uint16x4_t __a) + { +- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); ++ return (uint32x2_t) __a; + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vreinterpret_u32_u64 (uint64x1_t __a) + { +- return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a); ++ return (uint32x2_t) __a; + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vreinterpret_u32_p8 (poly8x8_t __a) + { +- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); ++ return (uint32x2_t) __a; + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vreinterpret_u32_p16 (poly16x4_t __a) + { +- return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); ++ return (uint32x2_t) __a; + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) ++vreinterpretq_u32_f64 (float64x2_t __a) ++{ ++ return (uint32x4_t) __a; ++} ++ ++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_u32_s8 (int8x16_t __a) + { +- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a); ++ return (uint32x4_t) __a; + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_u32_s16 (int16x8_t __a) + { +- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a); ++ return (uint32x4_t) __a; + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_u32_s32 (int32x4_t __a) + { +- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4si (__a); ++ return (uint32x4_t) __a; + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_u32_s64 (int64x2_t __a) + { +- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a); ++ return (uint32x4_t) __a; + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_u32_f32 (float32x4_t __a) + { +- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a); ++ return (uint32x4_t) __a; + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_u32_u8 (uint8x16_t __a) + { +- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) +- __a); ++ return (uint32x4_t) __a; + } + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vreinterpretq_u32_u16 (uint16x8_t __a) { @@ -8295,18 +17599,1672 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + return (uint32x4_t) __a; } - __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) - vreinterpretq_u32_p16 (poly16x8_t __a) + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vreinterpretq_u32_p16 (poly16x8_t __a) + { +- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); ++ return (uint32x4_t) __a; + } + + #define __GET_LOW(__TYPE) \ +@@ -5414,318 +5647,6 @@ + return result; + } + +-#define vext_f32(a, b, c) \ +- __extension__ \ +- ({ \ +- float32x2_t b_ = (b); \ +- float32x2_t a_ = (a); \ +- float32x2_t result; \ +- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vext_f64(a, b, c) \ +- __extension__ \ +- ({ \ +- float64x1_t b_ = (b); \ +- float64x1_t a_ = (a); \ +- float64x1_t result; \ +- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vext_p8(a, b, c) \ +- __extension__ \ +- ({ \ +- poly8x8_t b_ = (b); \ +- poly8x8_t a_ = (a); \ +- poly8x8_t result; \ +- __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vext_p16(a, b, c) \ +- __extension__ \ +- ({ \ +- poly16x4_t b_ = (b); \ +- poly16x4_t a_ = (a); \ +- poly16x4_t result; \ +- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vext_s8(a, b, c) \ +- __extension__ \ +- ({ \ +- int8x8_t b_ = (b); \ +- int8x8_t a_ = (a); \ +- int8x8_t result; \ +- __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vext_s16(a, b, c) \ +- __extension__ \ +- ({ \ +- int16x4_t b_ = (b); \ +- int16x4_t a_ = (a); \ +- int16x4_t result; \ +- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vext_s32(a, b, c) \ +- __extension__ \ +- ({ \ +- int32x2_t b_ = (b); \ +- int32x2_t a_ = (a); \ +- int32x2_t result; \ +- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vext_s64(a, b, c) \ +- __extension__ \ +- ({ \ +- int64x1_t b_ = (b); \ +- int64x1_t a_ = (a); \ +- int64x1_t result; \ +- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vext_u8(a, b, c) \ +- __extension__ \ +- ({ \ +- uint8x8_t b_ = (b); \ +- uint8x8_t a_ = (a); \ +- uint8x8_t result; \ +- __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vext_u16(a, b, c) \ +- __extension__ \ +- ({ \ +- uint16x4_t b_ = (b); \ +- uint16x4_t a_ = (a); \ +- uint16x4_t result; \ +- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vext_u32(a, b, c) \ +- __extension__ \ +- ({ \ +- uint32x2_t b_ = (b); \ +- uint32x2_t a_ = (a); \ +- uint32x2_t result; \ +- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vext_u64(a, b, c) \ +- __extension__ \ +- ({ \ +- uint64x1_t b_ = (b); \ +- uint64x1_t a_ = (a); \ +- uint64x1_t result; \ +- __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vextq_f32(a, b, c) \ +- __extension__ \ +- ({ \ +- float32x4_t b_ = (b); \ +- float32x4_t a_ = (a); \ +- float32x4_t result; \ +- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vextq_f64(a, b, c) \ +- __extension__ \ +- ({ \ +- float64x2_t b_ = (b); \ +- float64x2_t a_ = (a); \ +- float64x2_t result; \ +- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vextq_p8(a, b, c) \ +- __extension__ \ +- ({ \ +- poly8x16_t b_ = (b); \ +- poly8x16_t a_ = (a); \ +- poly8x16_t result; \ +- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vextq_p16(a, b, c) \ +- __extension__ \ +- ({ \ +- poly16x8_t b_ = (b); \ +- poly16x8_t a_ = (a); \ +- poly16x8_t result; \ +- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vextq_s8(a, b, c) \ +- __extension__ \ +- ({ \ +- int8x16_t b_ = (b); \ +- int8x16_t a_ = (a); \ +- int8x16_t result; \ +- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vextq_s16(a, b, c) \ +- __extension__ \ +- ({ \ +- int16x8_t b_ = (b); \ +- int16x8_t a_ = (a); \ +- int16x8_t result; \ +- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vextq_s32(a, b, c) \ +- __extension__ \ +- ({ \ +- int32x4_t b_ = (b); \ +- int32x4_t a_ = (a); \ +- int32x4_t result; \ +- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vextq_s64(a, b, c) \ +- __extension__ \ +- ({ \ +- int64x2_t b_ = (b); \ +- int64x2_t a_ = (a); \ +- int64x2_t result; \ +- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vextq_u8(a, b, c) \ +- __extension__ \ +- ({ \ +- uint8x16_t b_ = (b); \ +- uint8x16_t a_ = (a); \ +- uint8x16_t result; \ +- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vextq_u16(a, b, c) \ +- __extension__ \ +- ({ \ +- uint16x8_t b_ = (b); \ +- uint16x8_t a_ = (a); \ +- uint16x8_t result; \ +- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vextq_u32(a, b, c) \ +- __extension__ \ +- ({ \ +- uint32x4_t b_ = (b); \ +- uint32x4_t a_ = (a); \ +- uint32x4_t result; \ +- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- +-#define vextq_u64(a, b, c) \ +- __extension__ \ +- ({ \ +- uint64x2_t b_ = (b); \ +- uint64x2_t a_ = (a); \ +- uint64x2_t result; \ +- __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ +- : "=w"(result) \ +- : "w"(a_), "w"(b_), "i"(c) \ +- : /* No clobbers */); \ +- result; \ +- }) +- + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c) + { +@@ -10628,402 +10549,6 @@ + return result; + } + +-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +-vrev16_p8 (poly8x8_t a) +-{ +- poly8x8_t result; +- __asm__ ("rev16 %0.8b,%1.8b" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +-vrev16_s8 (int8x8_t a) +-{ +- int8x8_t result; +- __asm__ ("rev16 %0.8b,%1.8b" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +-vrev16_u8 (uint8x8_t a) +-{ +- uint8x8_t result; +- __asm__ ("rev16 %0.8b,%1.8b" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +-vrev16q_p8 (poly8x16_t a) +-{ +- poly8x16_t result; +- __asm__ ("rev16 %0.16b,%1.16b" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +-vrev16q_s8 (int8x16_t a) +-{ +- int8x16_t result; +- __asm__ ("rev16 %0.16b,%1.16b" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +-vrev16q_u8 (uint8x16_t a) +-{ +- uint8x16_t result; +- __asm__ ("rev16 %0.16b,%1.16b" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +-vrev32_p8 (poly8x8_t a) +-{ +- poly8x8_t result; +- __asm__ ("rev32 %0.8b,%1.8b" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +-vrev32_p16 (poly16x4_t a) +-{ +- poly16x4_t result; +- __asm__ ("rev32 %0.4h,%1.4h" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +-vrev32_s8 (int8x8_t a) +-{ +- int8x8_t result; +- __asm__ ("rev32 %0.8b,%1.8b" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +-vrev32_s16 (int16x4_t a) +-{ +- int16x4_t result; +- __asm__ ("rev32 %0.4h,%1.4h" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +-vrev32_u8 (uint8x8_t a) +-{ +- uint8x8_t result; +- __asm__ ("rev32 %0.8b,%1.8b" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +-vrev32_u16 (uint16x4_t a) +-{ +- uint16x4_t result; +- __asm__ ("rev32 %0.4h,%1.4h" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +-vrev32q_p8 (poly8x16_t a) +-{ +- poly8x16_t result; +- __asm__ ("rev32 %0.16b,%1.16b" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +-vrev32q_p16 (poly16x8_t a) +-{ +- poly16x8_t result; +- __asm__ ("rev32 %0.8h,%1.8h" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +-vrev32q_s8 (int8x16_t a) +-{ +- int8x16_t result; +- __asm__ ("rev32 %0.16b,%1.16b" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +-vrev32q_s16 (int16x8_t a) +-{ +- int16x8_t result; +- __asm__ ("rev32 %0.8h,%1.8h" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +-vrev32q_u8 (uint8x16_t a) +-{ +- uint8x16_t result; +- __asm__ ("rev32 %0.16b,%1.16b" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +-vrev32q_u16 (uint16x8_t a) +-{ +- uint16x8_t result; +- __asm__ ("rev32 %0.8h,%1.8h" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +-vrev64_f32 (float32x2_t a) +-{ +- float32x2_t result; +- __asm__ ("rev64 %0.2s,%1.2s" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +-vrev64_p8 (poly8x8_t a) +-{ +- poly8x8_t result; +- __asm__ ("rev64 %0.8b,%1.8b" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +-vrev64_p16 (poly16x4_t a) +-{ +- poly16x4_t result; +- __asm__ ("rev64 %0.4h,%1.4h" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +-vrev64_s8 (int8x8_t a) +-{ +- int8x8_t result; +- __asm__ ("rev64 %0.8b,%1.8b" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +-vrev64_s16 (int16x4_t a) +-{ +- int16x4_t result; +- __asm__ ("rev64 %0.4h,%1.4h" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +-vrev64_s32 (int32x2_t a) +-{ +- int32x2_t result; +- __asm__ ("rev64 %0.2s,%1.2s" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +-vrev64_u8 (uint8x8_t a) +-{ +- uint8x8_t result; +- __asm__ ("rev64 %0.8b,%1.8b" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +-vrev64_u16 (uint16x4_t a) +-{ +- uint16x4_t result; +- __asm__ ("rev64 %0.4h,%1.4h" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +-vrev64_u32 (uint32x2_t a) +-{ +- uint32x2_t result; +- __asm__ ("rev64 %0.2s,%1.2s" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +-vrev64q_f32 (float32x4_t a) +-{ +- float32x4_t result; +- __asm__ ("rev64 %0.4s,%1.4s" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +-vrev64q_p8 (poly8x16_t a) +-{ +- poly8x16_t result; +- __asm__ ("rev64 %0.16b,%1.16b" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +-vrev64q_p16 (poly16x8_t a) +-{ +- poly16x8_t result; +- __asm__ ("rev64 %0.8h,%1.8h" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +-vrev64q_s8 (int8x16_t a) +-{ +- int8x16_t result; +- __asm__ ("rev64 %0.16b,%1.16b" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +-vrev64q_s16 (int16x8_t a) +-{ +- int16x8_t result; +- __asm__ ("rev64 %0.8h,%1.8h" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +-vrev64q_s32 (int32x4_t a) +-{ +- int32x4_t result; +- __asm__ ("rev64 %0.4s,%1.4s" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +-vrev64q_u8 (uint8x16_t a) +-{ +- uint8x16_t result; +- __asm__ ("rev64 %0.16b,%1.16b" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +-vrev64q_u16 (uint16x8_t a) +-{ +- uint16x8_t result; +- __asm__ ("rev64 %0.8h,%1.8h" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +-vrev64q_u32 (uint32x4_t a) +-{ +- uint32x4_t result; +- __asm__ ("rev64 %0.4s,%1.4s" +- : "=w"(result) +- : "w"(a) +- : /* No clobbers */); +- return result; +-} +- + #define vrshrn_high_n_s16(a, b, c) \ + __extension__ \ + ({ \ +@@ -12447,469 +11972,7 @@ + return result; + } + +-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +-vtrn1_f32 (float32x2_t a, float32x2_t b) +-{ +- float32x2_t result; +- __asm__ ("trn1 %0.2s,%1.2s,%2.2s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +-vtrn1_p8 (poly8x8_t a, poly8x8_t b) +-{ +- poly8x8_t result; +- __asm__ ("trn1 %0.8b,%1.8b,%2.8b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +-vtrn1_p16 (poly16x4_t a, poly16x4_t b) +-{ +- poly16x4_t result; +- __asm__ ("trn1 %0.4h,%1.4h,%2.4h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +-vtrn1_s8 (int8x8_t a, int8x8_t b) +-{ +- int8x8_t result; +- __asm__ ("trn1 %0.8b,%1.8b,%2.8b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +-vtrn1_s16 (int16x4_t a, int16x4_t b) +-{ +- int16x4_t result; +- __asm__ ("trn1 %0.4h,%1.4h,%2.4h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +-vtrn1_s32 (int32x2_t a, int32x2_t b) +-{ +- int32x2_t result; +- __asm__ ("trn1 %0.2s,%1.2s,%2.2s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +-vtrn1_u8 (uint8x8_t a, uint8x8_t b) +-{ +- uint8x8_t result; +- __asm__ ("trn1 %0.8b,%1.8b,%2.8b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +-vtrn1_u16 (uint16x4_t a, uint16x4_t b) +-{ +- uint16x4_t result; +- __asm__ ("trn1 %0.4h,%1.4h,%2.4h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +-vtrn1_u32 (uint32x2_t a, uint32x2_t b) +-{ +- uint32x2_t result; +- __asm__ ("trn1 %0.2s,%1.2s,%2.2s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +-vtrn1q_f32 (float32x4_t a, float32x4_t b) +-{ +- float32x4_t result; +- __asm__ ("trn1 %0.4s,%1.4s,%2.4s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +-vtrn1q_f64 (float64x2_t a, float64x2_t b) +-{ +- float64x2_t result; +- __asm__ ("trn1 %0.2d,%1.2d,%2.2d" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +-vtrn1q_p8 (poly8x16_t a, poly8x16_t b) +-{ +- poly8x16_t result; +- __asm__ ("trn1 %0.16b,%1.16b,%2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +-vtrn1q_p16 (poly16x8_t a, poly16x8_t b) +-{ +- poly16x8_t result; +- __asm__ ("trn1 %0.8h,%1.8h,%2.8h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +-vtrn1q_s8 (int8x16_t a, int8x16_t b) +-{ +- int8x16_t result; +- __asm__ ("trn1 %0.16b,%1.16b,%2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +-vtrn1q_s16 (int16x8_t a, int16x8_t b) +-{ +- int16x8_t result; +- __asm__ ("trn1 %0.8h,%1.8h,%2.8h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +-vtrn1q_s32 (int32x4_t a, int32x4_t b) +-{ +- int32x4_t result; +- __asm__ ("trn1 %0.4s,%1.4s,%2.4s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +-vtrn1q_s64 (int64x2_t a, int64x2_t b) +-{ +- int64x2_t result; +- __asm__ ("trn1 %0.2d,%1.2d,%2.2d" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +-vtrn1q_u8 (uint8x16_t a, uint8x16_t b) +-{ +- uint8x16_t result; +- __asm__ ("trn1 %0.16b,%1.16b,%2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +-vtrn1q_u16 (uint16x8_t a, uint16x8_t b) +-{ +- uint16x8_t result; +- __asm__ ("trn1 %0.8h,%1.8h,%2.8h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +-vtrn1q_u32 (uint32x4_t a, uint32x4_t b) +-{ +- uint32x4_t result; +- __asm__ ("trn1 %0.4s,%1.4s,%2.4s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +-vtrn1q_u64 (uint64x2_t a, uint64x2_t b) +-{ +- uint64x2_t result; +- __asm__ ("trn1 %0.2d,%1.2d,%2.2d" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +-vtrn2_f32 (float32x2_t a, float32x2_t b) +-{ +- float32x2_t result; +- __asm__ ("trn2 %0.2s,%1.2s,%2.2s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +-vtrn2_p8 (poly8x8_t a, poly8x8_t b) +-{ +- poly8x8_t result; +- __asm__ ("trn2 %0.8b,%1.8b,%2.8b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +-vtrn2_p16 (poly16x4_t a, poly16x4_t b) +-{ +- poly16x4_t result; +- __asm__ ("trn2 %0.4h,%1.4h,%2.4h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +-vtrn2_s8 (int8x8_t a, int8x8_t b) +-{ +- int8x8_t result; +- __asm__ ("trn2 %0.8b,%1.8b,%2.8b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +-vtrn2_s16 (int16x4_t a, int16x4_t b) +-{ +- int16x4_t result; +- __asm__ ("trn2 %0.4h,%1.4h,%2.4h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +-vtrn2_s32 (int32x2_t a, int32x2_t b) +-{ +- int32x2_t result; +- __asm__ ("trn2 %0.2s,%1.2s,%2.2s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +-vtrn2_u8 (uint8x8_t a, uint8x8_t b) +-{ +- uint8x8_t result; +- __asm__ ("trn2 %0.8b,%1.8b,%2.8b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +-vtrn2_u16 (uint16x4_t a, uint16x4_t b) +-{ +- uint16x4_t result; +- __asm__ ("trn2 %0.4h,%1.4h,%2.4h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +-vtrn2_u32 (uint32x2_t a, uint32x2_t b) +-{ +- uint32x2_t result; +- __asm__ ("trn2 %0.2s,%1.2s,%2.2s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +-vtrn2q_f32 (float32x4_t a, float32x4_t b) +-{ +- float32x4_t result; +- __asm__ ("trn2 %0.4s,%1.4s,%2.4s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +-vtrn2q_f64 (float64x2_t a, float64x2_t b) +-{ +- float64x2_t result; +- __asm__ ("trn2 %0.2d,%1.2d,%2.2d" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +-vtrn2q_p8 (poly8x16_t a, poly8x16_t b) +-{ +- poly8x16_t result; +- __asm__ ("trn2 %0.16b,%1.16b,%2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +-vtrn2q_p16 (poly16x8_t a, poly16x8_t b) +-{ +- poly16x8_t result; +- __asm__ ("trn2 %0.8h,%1.8h,%2.8h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +-vtrn2q_s8 (int8x16_t a, int8x16_t b) +-{ +- int8x16_t result; +- __asm__ ("trn2 %0.16b,%1.16b,%2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +-vtrn2q_s16 (int16x8_t a, int16x8_t b) +-{ +- int16x8_t result; +- __asm__ ("trn2 %0.8h,%1.8h,%2.8h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +-vtrn2q_s32 (int32x4_t a, int32x4_t b) +-{ +- int32x4_t result; +- __asm__ ("trn2 %0.4s,%1.4s,%2.4s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +-vtrn2q_s64 (int64x2_t a, int64x2_t b) +-{ +- int64x2_t result; +- __asm__ ("trn2 %0.2d,%1.2d,%2.2d" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +-vtrn2q_u8 (uint8x16_t a, uint8x16_t b) +-{ +- uint8x16_t result; +- __asm__ ("trn2 %0.16b,%1.16b,%2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +-vtrn2q_u16 (uint16x8_t a, uint16x8_t b) +-{ +- uint16x8_t result; +- __asm__ ("trn2 %0.8h,%1.8h,%2.8h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +-vtrn2q_u32 (uint32x4_t a, uint32x4_t b) +-{ +- uint32x4_t result; +- __asm__ ("trn2 %0.4s,%1.4s,%2.4s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +-vtrn2q_u64 (uint64x2_t a, uint64x2_t b) +-{ +- uint64x2_t result; +- __asm__ ("trn2 %0.2d,%1.2d,%2.2d" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vtst_p8 (poly8x8_t a, poly8x8_t b) { -- return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); -+ return (uint32x4_t) __a; - } - - #define __GET_LOW(__TYPE) \ -@@ -13414,468 +13661,6 @@ + uint8x8_t result; +@@ -12952,930 +12015,7 @@ + : /* No clobbers */); return result; } +-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +-vuzp1_f32 (float32x2_t a, float32x2_t b) +-{ +- float32x2_t result; +- __asm__ ("uzp1 %0.2s,%1.2s,%2.2s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +-vuzp1_p8 (poly8x8_t a, poly8x8_t b) +-{ +- poly8x8_t result; +- __asm__ ("uzp1 %0.8b,%1.8b,%2.8b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +-vuzp1_p16 (poly16x4_t a, poly16x4_t b) +-{ +- poly16x4_t result; +- __asm__ ("uzp1 %0.4h,%1.4h,%2.4h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +-vuzp1_s8 (int8x8_t a, int8x8_t b) +-{ +- int8x8_t result; +- __asm__ ("uzp1 %0.8b,%1.8b,%2.8b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +-vuzp1_s16 (int16x4_t a, int16x4_t b) +-{ +- int16x4_t result; +- __asm__ ("uzp1 %0.4h,%1.4h,%2.4h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +-vuzp1_s32 (int32x2_t a, int32x2_t b) +-{ +- int32x2_t result; +- __asm__ ("uzp1 %0.2s,%1.2s,%2.2s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +-vuzp1_u8 (uint8x8_t a, uint8x8_t b) +-{ +- uint8x8_t result; +- __asm__ ("uzp1 %0.8b,%1.8b,%2.8b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +-vuzp1_u16 (uint16x4_t a, uint16x4_t b) +-{ +- uint16x4_t result; +- __asm__ ("uzp1 %0.4h,%1.4h,%2.4h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +-vuzp1_u32 (uint32x2_t a, uint32x2_t b) +-{ +- uint32x2_t result; +- __asm__ ("uzp1 %0.2s,%1.2s,%2.2s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +-vuzp1q_f32 (float32x4_t a, float32x4_t b) +-{ +- float32x4_t result; +- __asm__ ("uzp1 %0.4s,%1.4s,%2.4s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +-vuzp1q_f64 (float64x2_t a, float64x2_t b) +-{ +- float64x2_t result; +- __asm__ ("uzp1 %0.2d,%1.2d,%2.2d" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +-vuzp1q_p8 (poly8x16_t a, poly8x16_t b) +-{ +- poly8x16_t result; +- __asm__ ("uzp1 %0.16b,%1.16b,%2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +-vuzp1q_p16 (poly16x8_t a, poly16x8_t b) +-{ +- poly16x8_t result; +- __asm__ ("uzp1 %0.8h,%1.8h,%2.8h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +-vuzp1q_s8 (int8x16_t a, int8x16_t b) +-{ +- int8x16_t result; +- __asm__ ("uzp1 %0.16b,%1.16b,%2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +-vuzp1q_s16 (int16x8_t a, int16x8_t b) +-{ +- int16x8_t result; +- __asm__ ("uzp1 %0.8h,%1.8h,%2.8h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +-vuzp1q_s32 (int32x4_t a, int32x4_t b) +-{ +- int32x4_t result; +- __asm__ ("uzp1 %0.4s,%1.4s,%2.4s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +-vuzp1q_s64 (int64x2_t a, int64x2_t b) +-{ +- int64x2_t result; +- __asm__ ("uzp1 %0.2d,%1.2d,%2.2d" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +-vuzp1q_u8 (uint8x16_t a, uint8x16_t b) +-{ +- uint8x16_t result; +- __asm__ ("uzp1 %0.16b,%1.16b,%2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +-vuzp1q_u16 (uint16x8_t a, uint16x8_t b) +-{ +- uint16x8_t result; +- __asm__ ("uzp1 %0.8h,%1.8h,%2.8h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +-vuzp1q_u32 (uint32x4_t a, uint32x4_t b) +-{ +- uint32x4_t result; +- __asm__ ("uzp1 %0.4s,%1.4s,%2.4s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +-vuzp1q_u64 (uint64x2_t a, uint64x2_t b) +-{ +- uint64x2_t result; +- __asm__ ("uzp1 %0.2d,%1.2d,%2.2d" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +-vuzp2_f32 (float32x2_t a, float32x2_t b) +-{ +- float32x2_t result; +- __asm__ ("uzp2 %0.2s,%1.2s,%2.2s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +-vuzp2_p8 (poly8x8_t a, poly8x8_t b) +-{ +- poly8x8_t result; +- __asm__ ("uzp2 %0.8b,%1.8b,%2.8b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +-vuzp2_p16 (poly16x4_t a, poly16x4_t b) +-{ +- poly16x4_t result; +- __asm__ ("uzp2 %0.4h,%1.4h,%2.4h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +-vuzp2_s8 (int8x8_t a, int8x8_t b) +-{ +- int8x8_t result; +- __asm__ ("uzp2 %0.8b,%1.8b,%2.8b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +-vuzp2_s16 (int16x4_t a, int16x4_t b) +-{ +- int16x4_t result; +- __asm__ ("uzp2 %0.4h,%1.4h,%2.4h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +-vuzp2_s32 (int32x2_t a, int32x2_t b) +-{ +- int32x2_t result; +- __asm__ ("uzp2 %0.2s,%1.2s,%2.2s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +-vuzp2_u8 (uint8x8_t a, uint8x8_t b) +-{ +- uint8x8_t result; +- __asm__ ("uzp2 %0.8b,%1.8b,%2.8b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +-vuzp2_u16 (uint16x4_t a, uint16x4_t b) +-{ +- uint16x4_t result; +- __asm__ ("uzp2 %0.4h,%1.4h,%2.4h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +-vuzp2_u32 (uint32x2_t a, uint32x2_t b) +-{ +- uint32x2_t result; +- __asm__ ("uzp2 %0.2s,%1.2s,%2.2s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +-vuzp2q_f32 (float32x4_t a, float32x4_t b) +-{ +- float32x4_t result; +- __asm__ ("uzp2 %0.4s,%1.4s,%2.4s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +-vuzp2q_f64 (float64x2_t a, float64x2_t b) +-{ +- float64x2_t result; +- __asm__ ("uzp2 %0.2d,%1.2d,%2.2d" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +-vuzp2q_p8 (poly8x16_t a, poly8x16_t b) +-{ +- poly8x16_t result; +- __asm__ ("uzp2 %0.16b,%1.16b,%2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +-vuzp2q_p16 (poly16x8_t a, poly16x8_t b) +-{ +- poly16x8_t result; +- __asm__ ("uzp2 %0.8h,%1.8h,%2.8h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +-vuzp2q_s8 (int8x16_t a, int8x16_t b) +-{ +- int8x16_t result; +- __asm__ ("uzp2 %0.16b,%1.16b,%2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +-vuzp2q_s16 (int16x8_t a, int16x8_t b) +-{ +- int16x8_t result; +- __asm__ ("uzp2 %0.8h,%1.8h,%2.8h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +-vuzp2q_s32 (int32x4_t a, int32x4_t b) +-{ +- int32x4_t result; +- __asm__ ("uzp2 %0.4s,%1.4s,%2.4s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +-vuzp2q_s64 (int64x2_t a, int64x2_t b) +-{ +- int64x2_t result; +- __asm__ ("uzp2 %0.2d,%1.2d,%2.2d" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +-vuzp2q_u8 (uint8x16_t a, uint8x16_t b) +-{ +- uint8x16_t result; +- __asm__ ("uzp2 %0.16b,%1.16b,%2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +-vuzp2q_u16 (uint16x8_t a, uint16x8_t b) +-{ +- uint16x8_t result; +- __asm__ ("uzp2 %0.8h,%1.8h,%2.8h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +-vuzp2q_u32 (uint32x4_t a, uint32x4_t b) +-{ +- uint32x4_t result; +- __asm__ ("uzp2 %0.4s,%1.4s,%2.4s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +-vuzp2q_u64 (uint64x2_t a, uint64x2_t b) +-{ +- uint64x2_t result; +- __asm__ ("uzp2 %0.2d,%1.2d,%2.2d" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vzip1_f32 (float32x2_t a, float32x2_t b) -{ @@ -8681,4518 +19639,10556 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ - return result; -} - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vzip2q_s8 (int8x16_t a, int8x16_t b) --{ -- int8x16_t result; -- __asm__ ("zip2 %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} +-__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +-vzip2q_s8 (int8x16_t a, int8x16_t b) +-{ +- int8x16_t result; +- __asm__ ("zip2 %0.16b,%1.16b,%2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +-vzip2q_s16 (int16x8_t a, int16x8_t b) +-{ +- int16x8_t result; +- __asm__ ("zip2 %0.8h,%1.8h,%2.8h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +-vzip2q_s32 (int32x4_t a, int32x4_t b) +-{ +- int32x4_t result; +- __asm__ ("zip2 %0.4s,%1.4s,%2.4s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +-vzip2q_s64 (int64x2_t a, int64x2_t b) +-{ +- int64x2_t result; +- __asm__ ("zip2 %0.2d,%1.2d,%2.2d" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +-vzip2q_u8 (uint8x16_t a, uint8x16_t b) +-{ +- uint8x16_t result; +- __asm__ ("zip2 %0.16b,%1.16b,%2.16b" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +-vzip2q_u16 (uint16x8_t a, uint16x8_t b) +-{ +- uint16x8_t result; +- __asm__ ("zip2 %0.8h,%1.8h,%2.8h" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +-vzip2q_u32 (uint32x4_t a, uint32x4_t b) +-{ +- uint32x4_t result; +- __asm__ ("zip2 %0.4s,%1.4s,%2.4s" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- +-__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +-vzip2q_u64 (uint64x2_t a, uint64x2_t b) +-{ +- uint64x2_t result; +- __asm__ ("zip2 %0.2d,%1.2d,%2.2d" +- : "=w"(result) +- : "w"(a), "w"(b) +- : /* No clobbers */); +- return result; +-} +- + /* End of temporary inline asm implementations. */ + + /* Start of temporary inline asm for vldn, vstn and friends. */ +@@ -14205,132 +12345,225 @@ + __LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q) + __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q) + +-#define __ST2_LANE_FUNC(intype, ptrtype, regsuffix, \ +- lnsuffix, funcsuffix, Q) \ +- typedef struct { ptrtype __x[2]; } __ST2_LANE_STRUCTURE_##intype; \ +- __extension__ static __inline void \ +- __attribute__ ((__always_inline__)) \ +- vst2 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \ +- intype b, const int c) \ +- { \ +- __ST2_LANE_STRUCTURE_##intype *__p = \ +- (__ST2_LANE_STRUCTURE_##intype *)ptr; \ +- __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \ +- "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t" \ +- : "=Q"(*__p) \ +- : "Q"(b), "i"(c) \ +- : "v16", "v17"); \ +- } ++#define __ST2_LANE_FUNC(intype, largetype, ptrtype, \ ++ mode, ptr_mode, funcsuffix, signedtype) \ ++__extension__ static __inline void \ ++__attribute__ ((__always_inline__)) \ ++vst2_lane_ ## funcsuffix (ptrtype *__ptr, \ ++ intype __b, const int __c) \ ++{ \ ++ __builtin_aarch64_simd_oi __o; \ ++ largetype __temp; \ ++ __temp.val[0] \ ++ = vcombine_##funcsuffix (__b.val[0], \ ++ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ ++ __temp.val[1] \ ++ = vcombine_##funcsuffix (__b.val[1], \ ++ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ ++ __o = __builtin_aarch64_set_qregoi##mode (__o, \ ++ (signedtype) __temp.val[0], 0); \ ++ __o = __builtin_aarch64_set_qregoi##mode (__o, \ ++ (signedtype) __temp.val[1], 1); \ ++ __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ ++ __ptr, __o, __c); \ ++} + +-__ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,) +-__ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,) +-__ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,) +-__ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,) +-__ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,) +-__ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,) +-__ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,) +-__ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,) +-__ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,) +-__ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,) +-__ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,) +-__ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,) +-__ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q) +-__ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q) +-__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q) +-__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q) +-__ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q) +-__ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q) +-__ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q) +-__ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q) +-__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q) +-__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q) +-__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q) +-__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q) ++__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v4sf, sf, f32, ++ float32x4_t) ++__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, v2df, df, f64, ++ float64x2_t) ++__ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v16qi, qi, p8, int8x16_t) ++__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v8hi, hi, p16, ++ int16x8_t) ++__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v16qi, qi, s8, int8x16_t) ++__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v8hi, hi, s16, int16x8_t) ++__ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v4si, si, s32, int32x4_t) ++__ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, v2di, di, s64, int64x2_t) ++__ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v16qi, qi, u8, int8x16_t) ++__ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v8hi, hi, u16, ++ int16x8_t) ++__ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v4si, si, u32, ++ int32x4_t) ++__ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, v2di, di, u64, ++ int64x2_t) + +-#define __ST3_LANE_FUNC(intype, ptrtype, regsuffix, \ +- lnsuffix, funcsuffix, Q) \ +- typedef struct { ptrtype __x[3]; } __ST3_LANE_STRUCTURE_##intype; \ +- __extension__ static __inline void \ +- __attribute__ ((__always_inline__)) \ +- vst3 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \ +- intype b, const int c) \ +- { \ +- __ST3_LANE_STRUCTURE_##intype *__p = \ +- (__ST3_LANE_STRUCTURE_##intype *)ptr; \ +- __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \ +- "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t" \ +- : "=Q"(*__p) \ +- : "Q"(b), "i"(c) \ +- : "v16", "v17", "v18"); \ +- } ++#undef __ST2_LANE_FUNC ++#define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ ++__extension__ static __inline void \ ++__attribute__ ((__always_inline__)) \ ++vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \ ++ intype __b, const int __c) \ ++{ \ ++ union { intype __i; \ ++ __builtin_aarch64_simd_oi __o; } __temp = { __b }; \ ++ __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ ++ __ptr, __temp.__o, __c); \ ++} + +-__ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,) +-__ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,) +-__ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,) +-__ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,) +-__ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,) +-__ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,) +-__ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,) +-__ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,) +-__ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,) +-__ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,) +-__ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,) +-__ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,) +-__ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q) +-__ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q) +-__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q) +-__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q) +-__ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q) +-__ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q) +-__ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q) +-__ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q) +-__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q) +-__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q) +-__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q) +-__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q) ++__ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32) ++__ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64) ++__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8) ++__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16) ++__ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8) ++__ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16) ++__ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32) ++__ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64) ++__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8) ++__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16) ++__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32) ++__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64) + +-#define __ST4_LANE_FUNC(intype, ptrtype, regsuffix, \ +- lnsuffix, funcsuffix, Q) \ +- typedef struct { ptrtype __x[4]; } __ST4_LANE_STRUCTURE_##intype; \ +- __extension__ static __inline void \ +- __attribute__ ((__always_inline__)) \ +- vst4 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \ +- intype b, const int c) \ +- { \ +- __ST4_LANE_STRUCTURE_##intype *__p = \ +- (__ST4_LANE_STRUCTURE_##intype *)ptr; \ +- __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \ +- "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t" \ +- : "=Q"(*__p) \ +- : "Q"(b), "i"(c) \ +- : "v16", "v17", "v18", "v19"); \ +- } ++#define __ST3_LANE_FUNC(intype, largetype, ptrtype, \ ++ mode, ptr_mode, funcsuffix, signedtype) \ ++__extension__ static __inline void \ ++__attribute__ ((__always_inline__)) \ ++vst3_lane_ ## funcsuffix (ptrtype *__ptr, \ ++ intype __b, const int __c) \ ++{ \ ++ __builtin_aarch64_simd_ci __o; \ ++ largetype __temp; \ ++ __temp.val[0] \ ++ = vcombine_##funcsuffix (__b.val[0], \ ++ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ ++ __temp.val[1] \ ++ = vcombine_##funcsuffix (__b.val[1], \ ++ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ ++ __temp.val[2] \ ++ = vcombine_##funcsuffix (__b.val[2], \ ++ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ ++ __o = __builtin_aarch64_set_qregci##mode (__o, \ ++ (signedtype) __temp.val[0], 0); \ ++ __o = __builtin_aarch64_set_qregci##mode (__o, \ ++ (signedtype) __temp.val[1], 1); \ ++ __o = __builtin_aarch64_set_qregci##mode (__o, \ ++ (signedtype) __temp.val[2], 2); \ ++ __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ ++ __ptr, __o, __c); \ ++} + +-__ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,) +-__ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,) +-__ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,) +-__ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,) +-__ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,) +-__ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,) +-__ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,) +-__ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,) +-__ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,) +-__ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,) +-__ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,) +-__ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,) +-__ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q) +-__ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q) +-__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q) +-__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q) +-__ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q) +-__ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q) +-__ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q) +-__ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q) +-__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q) +-__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q) +-__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q) +-__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q) ++__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v4sf, sf, f32, ++ float32x4_t) ++__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, v2df, df, f64, ++ float64x2_t) ++__ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v16qi, qi, p8, int8x16_t) ++__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v8hi, hi, p16, ++ int16x8_t) ++__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v16qi, qi, s8, int8x16_t) ++__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v8hi, hi, s16, int16x8_t) ++__ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v4si, si, s32, int32x4_t) ++__ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, v2di, di, s64, int64x2_t) ++__ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v16qi, qi, u8, int8x16_t) ++__ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v8hi, hi, u16, ++ int16x8_t) ++__ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v4si, si, u32, ++ int32x4_t) ++__ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, v2di, di, u64, ++ int64x2_t) + ++#undef __ST3_LANE_FUNC ++#define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ ++__extension__ static __inline void \ ++__attribute__ ((__always_inline__)) \ ++vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \ ++ intype __b, const int __c) \ ++{ \ ++ union { intype __i; \ ++ __builtin_aarch64_simd_ci __o; } __temp = { __b }; \ ++ __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ ++ __ptr, __temp.__o, __c); \ ++} ++ ++__ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32) ++__ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64) ++__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8) ++__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16) ++__ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8) ++__ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16) ++__ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32) ++__ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64) ++__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8) ++__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16) ++__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32) ++__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64) ++ ++#define __ST4_LANE_FUNC(intype, largetype, ptrtype, \ ++ mode, ptr_mode, funcsuffix, signedtype) \ ++__extension__ static __inline void \ ++__attribute__ ((__always_inline__)) \ ++vst4_lane_ ## funcsuffix (ptrtype *__ptr, \ ++ intype __b, const int __c) \ ++{ \ ++ __builtin_aarch64_simd_xi __o; \ ++ largetype __temp; \ ++ __temp.val[0] \ ++ = vcombine_##funcsuffix (__b.val[0], \ ++ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ ++ __temp.val[1] \ ++ = vcombine_##funcsuffix (__b.val[1], \ ++ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ ++ __temp.val[2] \ ++ = vcombine_##funcsuffix (__b.val[2], \ ++ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ ++ __temp.val[3] \ ++ = vcombine_##funcsuffix (__b.val[3], \ ++ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ ++ __o = __builtin_aarch64_set_qregxi##mode (__o, \ ++ (signedtype) __temp.val[0], 0); \ ++ __o = __builtin_aarch64_set_qregxi##mode (__o, \ ++ (signedtype) __temp.val[1], 1); \ ++ __o = __builtin_aarch64_set_qregxi##mode (__o, \ ++ (signedtype) __temp.val[2], 2); \ ++ __o = __builtin_aarch64_set_qregxi##mode (__o, \ ++ (signedtype) __temp.val[3], 3); \ ++ __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ ++ __ptr, __o, __c); \ ++} ++ ++__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v4sf, sf, f32, ++ float32x4_t) ++__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, v2df, df, f64, ++ float64x2_t) ++__ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v16qi, qi, p8, int8x16_t) ++__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v8hi, hi, p16, ++ int16x8_t) ++__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v16qi, qi, s8, int8x16_t) ++__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v8hi, hi, s16, int16x8_t) ++__ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v4si, si, s32, int32x4_t) ++__ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, v2di, di, s64, int64x2_t) ++__ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v16qi, qi, u8, int8x16_t) ++__ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v8hi, hi, u16, ++ int16x8_t) ++__ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v4si, si, u32, ++ int32x4_t) ++__ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, v2di, di, u64, ++ int64x2_t) ++ ++#undef __ST4_LANE_FUNC ++#define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ ++__extension__ static __inline void \ ++__attribute__ ((__always_inline__)) \ ++vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \ ++ intype __b, const int __c) \ ++{ \ ++ union { intype __i; \ ++ __builtin_aarch64_simd_xi __o; } __temp = { __b }; \ ++ __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ ++ __ptr, __temp.__o, __c); \ ++} ++ ++__ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32) ++__ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64) ++__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8) ++__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16) ++__ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8) ++__ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16) ++__ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32) ++__ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64) ++__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8) ++__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16) ++__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32) ++__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64) ++ + __extension__ static __inline int64_t __attribute__ ((__always_inline__)) + vaddlv_s32 (int32x2_t a) + { +@@ -18489,6 +16722,292 @@ + return __aarch64_vgetq_lane_u64 (__a, __b); + } + ++/* vext */ ++ ++__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) ++vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c) ++{ ++ __builtin_aarch64_im_lane_boundsi (__c, 2); ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1}); ++#endif ++} ++ ++__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) ++vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c) ++{ ++ /* The only possible index to the assembler instruction returns element 0. */ ++ __builtin_aarch64_im_lane_boundsi (__c, 1); ++ return __a; ++} ++__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) ++vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c) ++{ ++ __builtin_aarch64_im_lane_boundsi (__c, 8); ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__b, __a, (uint8x8_t) ++ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); ++#else ++ return __builtin_shuffle (__a, __b, ++ (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); ++#endif ++} ++ ++__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) ++vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c) ++{ ++ __builtin_aarch64_im_lane_boundsi (__c, 4); ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__b, __a, ++ (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3}); ++#endif ++} ++ ++__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) ++vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c) ++{ ++ __builtin_aarch64_im_lane_boundsi (__c, 8); ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__b, __a, (uint8x8_t) ++ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); ++#else ++ return __builtin_shuffle (__a, __b, ++ (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); ++#endif ++} ++ ++__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) ++vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c) ++{ ++ __builtin_aarch64_im_lane_boundsi (__c, 4); ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__b, __a, ++ (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3}); ++#endif ++} ++ ++__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) ++vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c) ++{ ++ __builtin_aarch64_im_lane_boundsi (__c, 2); ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1}); ++#endif ++} ++ ++__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) ++vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c) ++{ ++ /* The only possible index to the assembler instruction returns element 0. */ ++ __builtin_aarch64_im_lane_boundsi (__c, 1); ++ return __a; ++} ++ ++__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) ++vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c) ++{ ++ __builtin_aarch64_im_lane_boundsi (__c, 8); ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__b, __a, (uint8x8_t) ++ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); ++#else ++ return __builtin_shuffle (__a, __b, ++ (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); ++#endif ++} ++ ++__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) ++vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c) ++{ ++ __builtin_aarch64_im_lane_boundsi (__c, 4); ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__b, __a, ++ (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3}); ++#endif ++} ++ ++__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) ++vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c) ++{ ++ __builtin_aarch64_im_lane_boundsi (__c, 2); ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1}); ++#endif ++} ++ ++__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) ++vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c) ++{ ++ /* The only possible index to the assembler instruction returns element 0. */ ++ __builtin_aarch64_im_lane_boundsi (__c, 1); ++ return __a; ++} ++ ++__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) ++vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c) ++{ ++ __builtin_aarch64_im_lane_boundsi (__c, 4); ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__b, __a, ++ (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3}); ++#endif ++} ++ ++__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) ++vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c) ++{ ++ __builtin_aarch64_im_lane_boundsi (__c, 2); ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c}); ++#else ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1}); ++#endif ++} ++ ++__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) ++vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c) ++{ ++ __builtin_aarch64_im_lane_boundsi (__c, 16); ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__b, __a, (uint8x16_t) ++ {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c, ++ 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x16_t) ++ {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, ++ __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15}); ++#endif ++} ++ ++__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) ++vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c) ++{ ++ __builtin_aarch64_im_lane_boundsi (__c, 8); ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__b, __a, (uint16x8_t) ++ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); ++#else ++ return __builtin_shuffle (__a, __b, ++ (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); ++#endif ++} ++ ++__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) ++vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c) ++{ ++ __builtin_aarch64_im_lane_boundsi (__c, 16); ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__b, __a, (uint8x16_t) ++ {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c, ++ 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x16_t) ++ {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, ++ __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15}); ++#endif ++} ++ ++__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) ++vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c) ++{ ++ __builtin_aarch64_im_lane_boundsi (__c, 8); ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__b, __a, (uint16x8_t) ++ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); ++#else ++ return __builtin_shuffle (__a, __b, ++ (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); ++#endif ++} ++ ++__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) ++vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c) ++{ ++ __builtin_aarch64_im_lane_boundsi (__c, 4); ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__b, __a, ++ (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3}); ++#endif ++} ++ ++__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) ++vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c) ++{ ++ __builtin_aarch64_im_lane_boundsi (__c, 2); ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c}); ++#else ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1}); ++#endif ++} ++ ++__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) ++vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c) ++{ ++ __builtin_aarch64_im_lane_boundsi (__c, 16); ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__b, __a, (uint8x16_t) ++ {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c, ++ 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x16_t) ++ {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, ++ __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15}); ++#endif ++} ++ ++__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) ++vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c) ++{ ++ __builtin_aarch64_im_lane_boundsi (__c, 8); ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__b, __a, (uint16x8_t) ++ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); ++#else ++ return __builtin_shuffle (__a, __b, ++ (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); ++#endif ++} ++ ++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) ++vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c) ++{ ++ __builtin_aarch64_im_lane_boundsi (__c, 4); ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__b, __a, ++ (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3}); ++#endif ++} ++ ++__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) ++vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c) ++{ ++ __builtin_aarch64_im_lane_boundsi (__c, 2); ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c}); ++#else ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1}); ++#endif ++} ++ + /* vfma_lane */ + + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +@@ -20943,6 +19462,12 @@ + return (int32x1_t) __builtin_aarch64_sqabssi (__a); + } + ++__extension__ static __inline int64_t __attribute__ ((__always_inline__)) ++vqabsd_s64 (int64_t __a) ++{ ++ return __builtin_aarch64_sqabsdi (__a); ++} ++ + /* vqadd */ + + __extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) +@@ -20972,25 +19497,26 @@ + __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) + vqaddb_u8 (uint8x1_t __a, uint8x1_t __b) + { +- return (uint8x1_t) __builtin_aarch64_uqaddqi (__a, __b); ++ return (uint8x1_t) __builtin_aarch64_uqaddqi_uuu (__a, __b); + } + + __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) + vqaddh_u16 (uint16x1_t __a, uint16x1_t __b) + { +- return (uint16x1_t) __builtin_aarch64_uqaddhi (__a, __b); ++ return (uint16x1_t) __builtin_aarch64_uqaddhi_uuu (__a, __b); + } + + __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) + vqadds_u32 (uint32x1_t __a, uint32x1_t __b) + { +- return (uint32x1_t) __builtin_aarch64_uqaddsi (__a, __b); ++ return (uint32x1_t) __builtin_aarch64_uqaddsi_uuu (__a, __b); + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vqaddd_u64 (uint64x1_t __a, uint64x1_t __b) + { +- return (uint64x1_t) __builtin_aarch64_uqadddi (__a, __b); ++ return (uint64x1_t) __builtin_aarch64_uqadddi_uuu ((uint64_t) __a, ++ (uint64_t) __b); + } + + /* vqdmlal */ +@@ -21555,6 +20081,12 @@ + return (int32x1_t) __builtin_aarch64_sqnegsi (__a); + } + ++__extension__ static __inline int64_t __attribute__ ((__always_inline__)) ++vqnegd_s64 (int64_t __a) ++{ ++ return __builtin_aarch64_sqnegdi (__a); ++} ++ + /* vqrdmulh */ + + __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +@@ -21634,25 +20166,25 @@ + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vqrshl_u8 (uint8x8_t __a, int8x8_t __b) + { +- return (uint8x8_t) __builtin_aarch64_uqrshlv8qi ((int8x8_t) __a, __b); ++ return __builtin_aarch64_uqrshlv8qi_uus ( __a, __b); + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vqrshl_u16 (uint16x4_t __a, int16x4_t __b) + { +- return (uint16x4_t) __builtin_aarch64_uqrshlv4hi ((int16x4_t) __a, __b); ++ return __builtin_aarch64_uqrshlv4hi_uus ( __a, __b); + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vqrshl_u32 (uint32x2_t __a, int32x2_t __b) + { +- return (uint32x2_t) __builtin_aarch64_uqrshlv2si ((int32x2_t) __a, __b); ++ return __builtin_aarch64_uqrshlv2si_uus ( __a, __b); + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vqrshl_u64 (uint64x1_t __a, int64x1_t __b) + { +- return (uint64x1_t) __builtin_aarch64_uqrshldi ((int64x1_t) __a, __b); ++ return __builtin_aarch64_uqrshldi_uus ( __a, __b); + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +@@ -21682,25 +20214,25 @@ + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vqrshlq_u8 (uint8x16_t __a, int8x16_t __b) + { +- return (uint8x16_t) __builtin_aarch64_uqrshlv16qi ((int8x16_t) __a, __b); ++ return __builtin_aarch64_uqrshlv16qi_uus ( __a, __b); + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vqrshlq_u16 (uint16x8_t __a, int16x8_t __b) + { +- return (uint16x8_t) __builtin_aarch64_uqrshlv8hi ((int16x8_t) __a, __b); ++ return __builtin_aarch64_uqrshlv8hi_uus ( __a, __b); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vqrshlq_u32 (uint32x4_t __a, int32x4_t __b) + { +- return (uint32x4_t) __builtin_aarch64_uqrshlv4si ((int32x4_t) __a, __b); ++ return __builtin_aarch64_uqrshlv4si_uus ( __a, __b); + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vqrshlq_u64 (uint64x2_t __a, int64x2_t __b) + { +- return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b); ++ return __builtin_aarch64_uqrshlv2di_uus ( __a, __b); + } + + __extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) +@@ -21730,25 +20262,25 @@ + __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) + vqrshlb_u8 (uint8x1_t __a, uint8x1_t __b) + { +- return (uint8x1_t) __builtin_aarch64_uqrshlqi (__a, __b); ++ return __builtin_aarch64_uqrshlqi_uus (__a, __b); + } + + __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) + vqrshlh_u16 (uint16x1_t __a, uint16x1_t __b) + { +- return (uint16x1_t) __builtin_aarch64_uqrshlhi (__a, __b); ++ return __builtin_aarch64_uqrshlhi_uus (__a, __b); + } + + __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) + vqrshls_u32 (uint32x1_t __a, uint32x1_t __b) + { +- return (uint32x1_t) __builtin_aarch64_uqrshlsi (__a, __b); ++ return __builtin_aarch64_uqrshlsi_uus (__a, __b); + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vqrshld_u64 (uint64x1_t __a, uint64x1_t __b) + { +- return (uint64x1_t) __builtin_aarch64_uqrshldi (__a, __b); ++ return __builtin_aarch64_uqrshldi_uus (__a, __b); + } + + /* vqrshrn */ +@@ -21774,19 +20306,19 @@ + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vqrshrn_n_u16 (uint16x8_t __a, const int __b) + { +- return (uint8x8_t) __builtin_aarch64_uqrshrn_nv8hi ((int16x8_t) __a, __b); ++ return __builtin_aarch64_uqrshrn_nv8hi_uus ( __a, __b); + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vqrshrn_n_u32 (uint32x4_t __a, const int __b) + { +- return (uint16x4_t) __builtin_aarch64_uqrshrn_nv4si ((int32x4_t) __a, __b); ++ return __builtin_aarch64_uqrshrn_nv4si_uus ( __a, __b); + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vqrshrn_n_u64 (uint64x2_t __a, const int __b) + { +- return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b); ++ return __builtin_aarch64_uqrshrn_nv2di_uus ( __a, __b); + } + + __extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) +@@ -21810,19 +20342,19 @@ + __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) + vqrshrnh_n_u16 (uint16x1_t __a, const int __b) + { +- return (uint8x1_t) __builtin_aarch64_uqrshrn_nhi (__a, __b); ++ return __builtin_aarch64_uqrshrn_nhi_uus (__a, __b); + } + + __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) + vqrshrns_n_u32 (uint32x1_t __a, const int __b) + { +- return (uint16x1_t) __builtin_aarch64_uqrshrn_nsi (__a, __b); ++ return __builtin_aarch64_uqrshrn_nsi_uus (__a, __b); + } + + __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) + vqrshrnd_n_u64 (uint64x1_t __a, const int __b) + { +- return (uint32x1_t) __builtin_aarch64_uqrshrn_ndi (__a, __b); ++ return __builtin_aarch64_uqrshrn_ndi_uus (__a, __b); + } + + /* vqrshrun */ +@@ -21892,25 +20424,25 @@ + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vqshl_u8 (uint8x8_t __a, int8x8_t __b) + { +- return (uint8x8_t) __builtin_aarch64_uqshlv8qi ((int8x8_t) __a, __b); ++ return __builtin_aarch64_uqshlv8qi_uus ( __a, __b); + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vqshl_u16 (uint16x4_t __a, int16x4_t __b) + { +- return (uint16x4_t) __builtin_aarch64_uqshlv4hi ((int16x4_t) __a, __b); ++ return __builtin_aarch64_uqshlv4hi_uus ( __a, __b); + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vqshl_u32 (uint32x2_t __a, int32x2_t __b) + { +- return (uint32x2_t) __builtin_aarch64_uqshlv2si ((int32x2_t) __a, __b); ++ return __builtin_aarch64_uqshlv2si_uus ( __a, __b); + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vqshl_u64 (uint64x1_t __a, int64x1_t __b) + { +- return (uint64x1_t) __builtin_aarch64_uqshldi ((int64x1_t) __a, __b); ++ return __builtin_aarch64_uqshldi_uus ( __a, __b); + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +@@ -21940,25 +20472,25 @@ + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vqshlq_u8 (uint8x16_t __a, int8x16_t __b) + { +- return (uint8x16_t) __builtin_aarch64_uqshlv16qi ((int8x16_t) __a, __b); ++ return __builtin_aarch64_uqshlv16qi_uus ( __a, __b); + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vqshlq_u16 (uint16x8_t __a, int16x8_t __b) + { +- return (uint16x8_t) __builtin_aarch64_uqshlv8hi ((int16x8_t) __a, __b); ++ return __builtin_aarch64_uqshlv8hi_uus ( __a, __b); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vqshlq_u32 (uint32x4_t __a, int32x4_t __b) + { +- return (uint32x4_t) __builtin_aarch64_uqshlv4si ((int32x4_t) __a, __b); ++ return __builtin_aarch64_uqshlv4si_uus ( __a, __b); + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vqshlq_u64 (uint64x2_t __a, int64x2_t __b) + { +- return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b); ++ return __builtin_aarch64_uqshlv2di_uus ( __a, __b); + } + + __extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) +@@ -21988,25 +20520,25 @@ + __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) + vqshlb_u8 (uint8x1_t __a, uint8x1_t __b) + { +- return (uint8x1_t) __builtin_aarch64_uqshlqi (__a, __b); ++ return __builtin_aarch64_uqshlqi_uus (__a, __b); + } + + __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) + vqshlh_u16 (uint16x1_t __a, uint16x1_t __b) + { +- return (uint16x1_t) __builtin_aarch64_uqshlhi (__a, __b); ++ return __builtin_aarch64_uqshlhi_uus (__a, __b); + } + + __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) + vqshls_u32 (uint32x1_t __a, uint32x1_t __b) + { +- return (uint32x1_t) __builtin_aarch64_uqshlsi (__a, __b); ++ return __builtin_aarch64_uqshlsi_uus (__a, __b); + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vqshld_u64 (uint64x1_t __a, uint64x1_t __b) + { +- return (uint64x1_t) __builtin_aarch64_uqshldi (__a, __b); ++ return __builtin_aarch64_uqshldi_uus (__a, __b); + } + + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +@@ -22036,25 +20568,25 @@ + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vqshl_n_u8 (uint8x8_t __a, const int __b) + { +- return (uint8x8_t) __builtin_aarch64_uqshl_nv8qi ((int8x8_t) __a, __b); ++ return __builtin_aarch64_uqshl_nv8qi_uus (__a, __b); + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vqshl_n_u16 (uint16x4_t __a, const int __b) + { +- return (uint16x4_t) __builtin_aarch64_uqshl_nv4hi ((int16x4_t) __a, __b); ++ return __builtin_aarch64_uqshl_nv4hi_uus (__a, __b); + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vqshl_n_u32 (uint32x2_t __a, const int __b) + { +- return (uint32x2_t) __builtin_aarch64_uqshl_nv2si ((int32x2_t) __a, __b); ++ return __builtin_aarch64_uqshl_nv2si_uus (__a, __b); + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vqshl_n_u64 (uint64x1_t __a, const int __b) + { +- return (uint64x1_t) __builtin_aarch64_uqshl_ndi ((int64x1_t) __a, __b); ++ return __builtin_aarch64_uqshl_ndi_uus (__a, __b); + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +@@ -22084,25 +20616,25 @@ + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vqshlq_n_u8 (uint8x16_t __a, const int __b) + { +- return (uint8x16_t) __builtin_aarch64_uqshl_nv16qi ((int8x16_t) __a, __b); ++ return __builtin_aarch64_uqshl_nv16qi_uus (__a, __b); + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vqshlq_n_u16 (uint16x8_t __a, const int __b) + { +- return (uint16x8_t) __builtin_aarch64_uqshl_nv8hi ((int16x8_t) __a, __b); ++ return __builtin_aarch64_uqshl_nv8hi_uus (__a, __b); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vqshlq_n_u32 (uint32x4_t __a, const int __b) + { +- return (uint32x4_t) __builtin_aarch64_uqshl_nv4si ((int32x4_t) __a, __b); ++ return __builtin_aarch64_uqshl_nv4si_uus (__a, __b); + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vqshlq_n_u64 (uint64x2_t __a, const int __b) + { +- return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b); ++ return __builtin_aarch64_uqshl_nv2di_uus (__a, __b); + } + + __extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) +@@ -22132,25 +20664,25 @@ + __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) + vqshlb_n_u8 (uint8x1_t __a, const int __b) + { +- return (uint8x1_t) __builtin_aarch64_uqshl_nqi (__a, __b); ++ return __builtin_aarch64_uqshl_nqi_uus (__a, __b); + } + + __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) + vqshlh_n_u16 (uint16x1_t __a, const int __b) + { +- return (uint16x1_t) __builtin_aarch64_uqshl_nhi (__a, __b); ++ return __builtin_aarch64_uqshl_nhi_uus (__a, __b); + } + + __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) + vqshls_n_u32 (uint32x1_t __a, const int __b) + { +- return (uint32x1_t) __builtin_aarch64_uqshl_nsi (__a, __b); ++ return __builtin_aarch64_uqshl_nsi_uus (__a, __b); + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vqshld_n_u64 (uint64x1_t __a, const int __b) + { +- return (uint64x1_t) __builtin_aarch64_uqshl_ndi (__a, __b); ++ return __builtin_aarch64_uqshl_ndi_uus (__a, __b); + } + + /* vqshlu */ +@@ -22158,73 +20690,73 @@ + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vqshlu_n_s8 (int8x8_t __a, const int __b) + { +- return (uint8x8_t) __builtin_aarch64_sqshlu_nv8qi (__a, __b); ++ return __builtin_aarch64_sqshlu_nv8qi_uss (__a, __b); + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vqshlu_n_s16 (int16x4_t __a, const int __b) + { +- return (uint16x4_t) __builtin_aarch64_sqshlu_nv4hi (__a, __b); ++ return __builtin_aarch64_sqshlu_nv4hi_uss (__a, __b); + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vqshlu_n_s32 (int32x2_t __a, const int __b) + { +- return (uint32x2_t) __builtin_aarch64_sqshlu_nv2si (__a, __b); ++ return __builtin_aarch64_sqshlu_nv2si_uss (__a, __b); + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vqshlu_n_s64 (int64x1_t __a, const int __b) + { +- return (uint64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b); ++ return __builtin_aarch64_sqshlu_ndi_uss (__a, __b); + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vqshluq_n_s8 (int8x16_t __a, const int __b) + { +- return (uint8x16_t) __builtin_aarch64_sqshlu_nv16qi (__a, __b); ++ return __builtin_aarch64_sqshlu_nv16qi_uss (__a, __b); + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vqshluq_n_s16 (int16x8_t __a, const int __b) + { +- return (uint16x8_t) __builtin_aarch64_sqshlu_nv8hi (__a, __b); ++ return __builtin_aarch64_sqshlu_nv8hi_uss (__a, __b); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vqshluq_n_s32 (int32x4_t __a, const int __b) + { +- return (uint32x4_t) __builtin_aarch64_sqshlu_nv4si (__a, __b); ++ return __builtin_aarch64_sqshlu_nv4si_uss (__a, __b); + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vqshluq_n_s64 (int64x2_t __a, const int __b) + { +- return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b); ++ return __builtin_aarch64_sqshlu_nv2di_uss (__a, __b); + } + + __extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) + vqshlub_n_s8 (int8x1_t __a, const int __b) + { +- return (int8x1_t) __builtin_aarch64_sqshlu_nqi (__a, __b); ++ return (int8x1_t) __builtin_aarch64_sqshlu_nqi_uss (__a, __b); + } + + __extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) + vqshluh_n_s16 (int16x1_t __a, const int __b) + { +- return (int16x1_t) __builtin_aarch64_sqshlu_nhi (__a, __b); ++ return (int16x1_t) __builtin_aarch64_sqshlu_nhi_uss (__a, __b); + } + + __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) + vqshlus_n_s32 (int32x1_t __a, const int __b) + { +- return (int32x1_t) __builtin_aarch64_sqshlu_nsi (__a, __b); ++ return (int32x1_t) __builtin_aarch64_sqshlu_nsi_uss (__a, __b); + } + + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) + vqshlud_n_s64 (int64x1_t __a, const int __b) + { +- return (int64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b); ++ return (int64x1_t) __builtin_aarch64_sqshlu_ndi_uss (__a, __b); + } + + /* vqshrn */ +@@ -22250,19 +20782,19 @@ + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vqshrn_n_u16 (uint16x8_t __a, const int __b) + { +- return (uint8x8_t) __builtin_aarch64_uqshrn_nv8hi ((int16x8_t) __a, __b); ++ return __builtin_aarch64_uqshrn_nv8hi_uus ( __a, __b); + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vqshrn_n_u32 (uint32x4_t __a, const int __b) + { +- return (uint16x4_t) __builtin_aarch64_uqshrn_nv4si ((int32x4_t) __a, __b); ++ return __builtin_aarch64_uqshrn_nv4si_uus ( __a, __b); + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vqshrn_n_u64 (uint64x2_t __a, const int __b) + { +- return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b); ++ return __builtin_aarch64_uqshrn_nv2di_uus ( __a, __b); + } + + __extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) +@@ -22286,19 +20818,19 @@ + __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) + vqshrnh_n_u16 (uint16x1_t __a, const int __b) + { +- return (uint8x1_t) __builtin_aarch64_uqshrn_nhi (__a, __b); ++ return __builtin_aarch64_uqshrn_nhi_uus (__a, __b); + } + + __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) + vqshrns_n_u32 (uint32x1_t __a, const int __b) + { +- return (uint16x1_t) __builtin_aarch64_uqshrn_nsi (__a, __b); ++ return __builtin_aarch64_uqshrn_nsi_uus (__a, __b); + } + + __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) + vqshrnd_n_u64 (uint64x1_t __a, const int __b) + { +- return (uint32x1_t) __builtin_aarch64_uqshrn_ndi (__a, __b); ++ return __builtin_aarch64_uqshrn_ndi_uus (__a, __b); + } + + /* vqshrun */ +@@ -22368,25 +20900,26 @@ + __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) + vqsubb_u8 (uint8x1_t __a, uint8x1_t __b) + { +- return (uint8x1_t) __builtin_aarch64_uqsubqi (__a, __b); ++ return (uint8x1_t) __builtin_aarch64_uqsubqi_uuu (__a, __b); + } + + __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) + vqsubh_u16 (uint16x1_t __a, uint16x1_t __b) + { +- return (uint16x1_t) __builtin_aarch64_uqsubhi (__a, __b); ++ return (uint16x1_t) __builtin_aarch64_uqsubhi_uuu (__a, __b); + } + + __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) + vqsubs_u32 (uint32x1_t __a, uint32x1_t __b) + { +- return (uint32x1_t) __builtin_aarch64_uqsubsi (__a, __b); ++ return (uint32x1_t) __builtin_aarch64_uqsubsi_uuu (__a, __b); + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vqsubd_u64 (uint64x1_t __a, uint64x1_t __b) + { +- return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b); ++ return (uint64x1_t) __builtin_aarch64_uqsubdi_uuu ((uint64_t) __a, ++ (uint64_t) __b); + } + + /* vrecpe */ +@@ -22467,6 +21000,234 @@ + return __builtin_aarch64_frecpxdf (__a); + } + ++ ++/* vrev */ ++ ++__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) ++vrev16_p8 (poly8x8_t a) ++{ ++ return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); ++} ++ ++__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) ++vrev16_s8 (int8x8_t a) ++{ ++ return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); ++} ++ ++__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) ++vrev16_u8 (uint8x8_t a) ++{ ++ return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); ++} ++ ++__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) ++vrev16q_p8 (poly8x16_t a) ++{ ++ return __builtin_shuffle (a, ++ (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); ++} ++ ++__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) ++vrev16q_s8 (int8x16_t a) ++{ ++ return __builtin_shuffle (a, ++ (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); ++} ++ ++__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) ++vrev16q_u8 (uint8x16_t a) ++{ ++ return __builtin_shuffle (a, ++ (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); ++} ++ ++__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) ++vrev32_p8 (poly8x8_t a) ++{ ++ return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); ++} ++ ++__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) ++vrev32_p16 (poly16x4_t a) ++{ ++ return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); ++} ++ ++__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) ++vrev32_s8 (int8x8_t a) ++{ ++ return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); ++} ++ ++__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) ++vrev32_s16 (int16x4_t a) ++{ ++ return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); ++} ++ ++__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) ++vrev32_u8 (uint8x8_t a) ++{ ++ return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); ++} ++ ++__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) ++vrev32_u16 (uint16x4_t a) ++{ ++ return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); ++} ++ ++__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) ++vrev32q_p8 (poly8x16_t a) ++{ ++ return __builtin_shuffle (a, ++ (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); ++} ++ ++__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) ++vrev32q_p16 (poly16x8_t a) ++{ ++ return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); ++} ++ ++__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) ++vrev32q_s8 (int8x16_t a) ++{ ++ return __builtin_shuffle (a, ++ (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); ++} ++ ++__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) ++vrev32q_s16 (int16x8_t a) ++{ ++ return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); ++} ++ ++__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) ++vrev32q_u8 (uint8x16_t a) ++{ ++ return __builtin_shuffle (a, ++ (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); ++} ++ ++__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) ++vrev32q_u16 (uint16x8_t a) ++{ ++ return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); ++} ++ ++__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) ++vrev64_f32 (float32x2_t a) ++{ ++ return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); ++} ++ ++__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) ++vrev64_p8 (poly8x8_t a) ++{ ++ return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); ++} ++ ++__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) ++vrev64_p16 (poly16x4_t a) ++{ ++ return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); ++} ++ ++__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) ++vrev64_s8 (int8x8_t a) ++{ ++ return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); ++} ++ ++__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) ++vrev64_s16 (int16x4_t a) ++{ ++ return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); ++} ++ ++__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) ++vrev64_s32 (int32x2_t a) ++{ ++ return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); ++} ++ ++__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) ++vrev64_u8 (uint8x8_t a) ++{ ++ return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); ++} ++ ++__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) ++vrev64_u16 (uint16x4_t a) ++{ ++ return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); ++} ++ ++__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) ++vrev64_u32 (uint32x2_t a) ++{ ++ return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); ++} ++ ++__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) ++vrev64q_f32 (float32x4_t a) ++{ ++ return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); ++} ++ ++__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) ++vrev64q_p8 (poly8x16_t a) ++{ ++ return __builtin_shuffle (a, ++ (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); ++} ++ ++__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) ++vrev64q_p16 (poly16x8_t a) ++{ ++ return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); ++} ++ ++__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) ++vrev64q_s8 (int8x16_t a) ++{ ++ return __builtin_shuffle (a, ++ (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); ++} ++ ++__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) ++vrev64q_s16 (int16x8_t a) ++{ ++ return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); ++} ++ ++__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) ++vrev64q_s32 (int32x4_t a) ++{ ++ return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); ++} ++ ++__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) ++vrev64q_u8 (uint8x16_t a) ++{ ++ return __builtin_shuffle (a, ++ (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); ++} ++ ++__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) ++vrev64q_u16 (uint16x8_t a) ++{ ++ return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); ++} ++ ++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) ++vrev64q_u32 (uint32x4_t a) ++{ ++ return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); ++} ++ + /* vrnd */ + + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +@@ -22475,6 +21236,12 @@ + return __builtin_aarch64_btruncv2sf (__a); + } + ++__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) ++vrnd_f64 (float64x1_t __a) ++{ ++ return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0); ++} ++ + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vrndq_f32 (float32x4_t __a) + { +@@ -22495,6 +21262,12 @@ + return __builtin_aarch64_roundv2sf (__a); + } + ++__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) ++vrnda_f64 (float64x1_t __a) ++{ ++ return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0); ++} ++ + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vrndaq_f32 (float32x4_t __a) + { +@@ -22515,6 +21288,12 @@ + return __builtin_aarch64_nearbyintv2sf (__a); + } + ++__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) ++vrndi_f64 (float64x1_t __a) ++{ ++ return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0); ++} ++ + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vrndiq_f32 (float32x4_t __a) + { +@@ -22535,6 +21314,12 @@ + return __builtin_aarch64_floorv2sf (__a); + } + ++__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) ++vrndm_f64 (float64x1_t __a) ++{ ++ return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0); ++} ++ + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vrndmq_f32 (float32x4_t __a) + { +@@ -22554,6 +21339,13 @@ + { + return __builtin_aarch64_frintnv2sf (__a); + } ++ ++__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) ++vrndn_f64 (float64x1_t __a) ++{ ++ return __builtin_aarch64_frintndf (__a); ++} ++ + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vrndnq_f32 (float32x4_t __a) + { +@@ -22574,6 +21366,12 @@ + return __builtin_aarch64_ceilv2sf (__a); + } + ++__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) ++vrndp_f64 (float64x1_t __a) ++{ ++ return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0); ++} ++ + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vrndpq_f32 (float32x4_t __a) + { +@@ -22594,6 +21392,12 @@ + return __builtin_aarch64_rintv2sf (__a); + } + ++__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) ++vrndx_f64 (float64x1_t __a) ++{ ++ return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0); ++} ++ + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vrndxq_f32 (float32x4_t __a) + { +@@ -22635,25 +21439,25 @@ + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vrshl_u8 (uint8x8_t __a, int8x8_t __b) + { +- return (uint8x8_t) __builtin_aarch64_urshlv8qi ((int8x8_t) __a, __b); ++ return __builtin_aarch64_urshlv8qi_uus (__a, __b); + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vrshl_u16 (uint16x4_t __a, int16x4_t __b) + { +- return (uint16x4_t) __builtin_aarch64_urshlv4hi ((int16x4_t) __a, __b); ++ return __builtin_aarch64_urshlv4hi_uus (__a, __b); + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vrshl_u32 (uint32x2_t __a, int32x2_t __b) + { +- return (uint32x2_t) __builtin_aarch64_urshlv2si ((int32x2_t) __a, __b); ++ return __builtin_aarch64_urshlv2si_uus (__a, __b); + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vrshl_u64 (uint64x1_t __a, int64x1_t __b) + { +- return (uint64x1_t) __builtin_aarch64_urshldi ((int64x1_t) __a, __b); ++ return __builtin_aarch64_urshldi_uus (__a, __b); + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +@@ -22683,25 +21487,25 @@ + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vrshlq_u8 (uint8x16_t __a, int8x16_t __b) + { +- return (uint8x16_t) __builtin_aarch64_urshlv16qi ((int8x16_t) __a, __b); ++ return __builtin_aarch64_urshlv16qi_uus (__a, __b); + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vrshlq_u16 (uint16x8_t __a, int16x8_t __b) + { +- return (uint16x8_t) __builtin_aarch64_urshlv8hi ((int16x8_t) __a, __b); ++ return __builtin_aarch64_urshlv8hi_uus (__a, __b); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vrshlq_u32 (uint32x4_t __a, int32x4_t __b) + { +- return (uint32x4_t) __builtin_aarch64_urshlv4si ((int32x4_t) __a, __b); ++ return __builtin_aarch64_urshlv4si_uus (__a, __b); + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vrshlq_u64 (uint64x2_t __a, int64x2_t __b) + { +- return (uint64x2_t) __builtin_aarch64_urshlv2di ((int64x2_t) __a, __b); ++ return __builtin_aarch64_urshlv2di_uus (__a, __b); + } + + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +@@ -22713,7 +21517,7 @@ + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vrshld_u64 (uint64x1_t __a, uint64x1_t __b) + { +- return (uint64x1_t) __builtin_aarch64_urshldi (__a, __b); ++ return __builtin_aarch64_urshldi_uus (__a, __b); + } + + /* vrshr */ +@@ -22745,25 +21549,25 @@ + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vrshr_n_u8 (uint8x8_t __a, const int __b) + { +- return (uint8x8_t) __builtin_aarch64_urshr_nv8qi ((int8x8_t) __a, __b); ++ return __builtin_aarch64_urshr_nv8qi_uus (__a, __b); + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vrshr_n_u16 (uint16x4_t __a, const int __b) + { +- return (uint16x4_t) __builtin_aarch64_urshr_nv4hi ((int16x4_t) __a, __b); ++ return __builtin_aarch64_urshr_nv4hi_uus (__a, __b); + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vrshr_n_u32 (uint32x2_t __a, const int __b) + { +- return (uint32x2_t) __builtin_aarch64_urshr_nv2si ((int32x2_t) __a, __b); ++ return __builtin_aarch64_urshr_nv2si_uus (__a, __b); + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vrshr_n_u64 (uint64x1_t __a, const int __b) + { +- return (uint64x1_t) __builtin_aarch64_urshr_ndi ((int64x1_t) __a, __b); ++ return __builtin_aarch64_urshr_ndi_uus (__a, __b); + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +@@ -22793,25 +21597,25 @@ + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vrshrq_n_u8 (uint8x16_t __a, const int __b) + { +- return (uint8x16_t) __builtin_aarch64_urshr_nv16qi ((int8x16_t) __a, __b); ++ return __builtin_aarch64_urshr_nv16qi_uus (__a, __b); + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vrshrq_n_u16 (uint16x8_t __a, const int __b) + { +- return (uint16x8_t) __builtin_aarch64_urshr_nv8hi ((int16x8_t) __a, __b); ++ return __builtin_aarch64_urshr_nv8hi_uus (__a, __b); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vrshrq_n_u32 (uint32x4_t __a, const int __b) + { +- return (uint32x4_t) __builtin_aarch64_urshr_nv4si ((int32x4_t) __a, __b); ++ return __builtin_aarch64_urshr_nv4si_uus (__a, __b); + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vrshrq_n_u64 (uint64x2_t __a, const int __b) + { +- return (uint64x2_t) __builtin_aarch64_urshr_nv2di ((int64x2_t) __a, __b); ++ return __builtin_aarch64_urshr_nv2di_uus (__a, __b); + } + + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +@@ -22823,7 +21627,7 @@ + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vrshrd_n_u64 (uint64x1_t __a, const int __b) + { +- return (uint64x1_t) __builtin_aarch64_urshr_ndi (__a, __b); ++ return __builtin_aarch64_urshr_ndi_uus (__a, __b); + } + + /* vrsra */ +@@ -22855,29 +21659,25 @@ + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) + { +- return (uint8x8_t) __builtin_aarch64_ursra_nv8qi ((int8x8_t) __a, +- (int8x8_t) __b, __c); ++ return __builtin_aarch64_ursra_nv8qi_uuus (__a, __b, __c); + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) + { +- return (uint16x4_t) __builtin_aarch64_ursra_nv4hi ((int16x4_t) __a, +- (int16x4_t) __b, __c); ++ return __builtin_aarch64_ursra_nv4hi_uuus (__a, __b, __c); + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) + { +- return (uint32x2_t) __builtin_aarch64_ursra_nv2si ((int32x2_t) __a, +- (int32x2_t) __b, __c); ++ return __builtin_aarch64_ursra_nv2si_uuus (__a, __b, __c); + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) + { +- return (uint64x1_t) __builtin_aarch64_ursra_ndi ((int64x1_t) __a, +- (int64x1_t) __b, __c); ++ return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c); + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +@@ -22907,29 +21707,25 @@ + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) + { +- return (uint8x16_t) __builtin_aarch64_ursra_nv16qi ((int8x16_t) __a, +- (int8x16_t) __b, __c); ++ return __builtin_aarch64_ursra_nv16qi_uuus (__a, __b, __c); + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) + { +- return (uint16x8_t) __builtin_aarch64_ursra_nv8hi ((int16x8_t) __a, +- (int16x8_t) __b, __c); ++ return __builtin_aarch64_ursra_nv8hi_uuus (__a, __b, __c); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) + { +- return (uint32x4_t) __builtin_aarch64_ursra_nv4si ((int32x4_t) __a, +- (int32x4_t) __b, __c); ++ return __builtin_aarch64_ursra_nv4si_uuus (__a, __b, __c); + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) + { +- return (uint64x2_t) __builtin_aarch64_ursra_nv2di ((int64x2_t) __a, +- (int64x2_t) __b, __c); ++ return __builtin_aarch64_ursra_nv2di_uuus (__a, __b, __c); + } + + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +@@ -22941,7 +21737,7 @@ + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) + { +- return (uint64x1_t) __builtin_aarch64_ursra_ndi (__a, __b, __c); ++ return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c); + } + + #ifdef __ARM_FEATURE_CRYPTO +@@ -23134,109 +21930,109 @@ + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) + vshl_s8 (int8x8_t __a, int8x8_t __b) + { +- return (int8x8_t) __builtin_aarch64_sshlv8qi (__a, __b); ++ return __builtin_aarch64_sshlv8qi (__a, __b); + } + + __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) + vshl_s16 (int16x4_t __a, int16x4_t __b) + { +- return (int16x4_t) __builtin_aarch64_sshlv4hi (__a, __b); ++ return __builtin_aarch64_sshlv4hi (__a, __b); + } + + __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) + vshl_s32 (int32x2_t __a, int32x2_t __b) + { +- return (int32x2_t) __builtin_aarch64_sshlv2si (__a, __b); ++ return __builtin_aarch64_sshlv2si (__a, __b); + } + + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) + vshl_s64 (int64x1_t __a, int64x1_t __b) + { +- return (int64x1_t) __builtin_aarch64_sshldi (__a, __b); ++ return __builtin_aarch64_sshldi (__a, __b); + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vshl_u8 (uint8x8_t __a, int8x8_t __b) + { +- return (uint8x8_t) __builtin_aarch64_ushlv8qi ((int8x8_t) __a, __b); ++ return __builtin_aarch64_ushlv8qi_uus (__a, __b); + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vshl_u16 (uint16x4_t __a, int16x4_t __b) + { +- return (uint16x4_t) __builtin_aarch64_ushlv4hi ((int16x4_t) __a, __b); ++ return __builtin_aarch64_ushlv4hi_uus (__a, __b); + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vshl_u32 (uint32x2_t __a, int32x2_t __b) + { +- return (uint32x2_t) __builtin_aarch64_ushlv2si ((int32x2_t) __a, __b); ++ return __builtin_aarch64_ushlv2si_uus (__a, __b); + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vshl_u64 (uint64x1_t __a, int64x1_t __b) + { +- return (uint64x1_t) __builtin_aarch64_ushldi ((int64x1_t) __a, __b); ++ return __builtin_aarch64_ushldi_uus (__a, __b); + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) + vshlq_s8 (int8x16_t __a, int8x16_t __b) + { +- return (int8x16_t) __builtin_aarch64_sshlv16qi (__a, __b); ++ return __builtin_aarch64_sshlv16qi (__a, __b); + } + + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) + vshlq_s16 (int16x8_t __a, int16x8_t __b) + { +- return (int16x8_t) __builtin_aarch64_sshlv8hi (__a, __b); ++ return __builtin_aarch64_sshlv8hi (__a, __b); + } + + __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) + vshlq_s32 (int32x4_t __a, int32x4_t __b) + { +- return (int32x4_t) __builtin_aarch64_sshlv4si (__a, __b); ++ return __builtin_aarch64_sshlv4si (__a, __b); + } + + __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) + vshlq_s64 (int64x2_t __a, int64x2_t __b) + { +- return (int64x2_t) __builtin_aarch64_sshlv2di (__a, __b); ++ return __builtin_aarch64_sshlv2di (__a, __b); + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vshlq_u8 (uint8x16_t __a, int8x16_t __b) + { +- return (uint8x16_t) __builtin_aarch64_ushlv16qi ((int8x16_t) __a, __b); ++ return __builtin_aarch64_ushlv16qi_uus (__a, __b); + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vshlq_u16 (uint16x8_t __a, int16x8_t __b) + { +- return (uint16x8_t) __builtin_aarch64_ushlv8hi ((int16x8_t) __a, __b); ++ return __builtin_aarch64_ushlv8hi_uus (__a, __b); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vshlq_u32 (uint32x4_t __a, int32x4_t __b) + { +- return (uint32x4_t) __builtin_aarch64_ushlv4si ((int32x4_t) __a, __b); ++ return __builtin_aarch64_ushlv4si_uus (__a, __b); + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vshlq_u64 (uint64x2_t __a, int64x2_t __b) + { +- return (uint64x2_t) __builtin_aarch64_ushlv2di ((int64x2_t) __a, __b); ++ return __builtin_aarch64_ushlv2di_uus (__a, __b); + } + + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) + vshld_s64 (int64x1_t __a, int64x1_t __b) + { +- return (int64x1_t) __builtin_aarch64_sshldi (__a, __b); ++ return __builtin_aarch64_sshldi (__a, __b); + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vshld_u64 (uint64x1_t __a, uint64x1_t __b) + { +- return (uint64x1_t) __builtin_aarch64_ushldi (__a, __b); ++ return __builtin_aarch64_ushldi_uus (__a, __b); + } + + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +@@ -23296,19 +22092,19 @@ + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vshll_n_u8 (uint8x8_t __a, const int __b) + { +- return (uint16x8_t) __builtin_aarch64_ushll_nv8qi ((int8x8_t) __a, __b); ++ return __builtin_aarch64_ushll_nv8qi_uus (__a, __b); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vshll_n_u16 (uint16x4_t __a, const int __b) + { +- return (uint32x4_t) __builtin_aarch64_ushll_nv4hi ((int16x4_t) __a, __b); ++ return __builtin_aarch64_ushll_nv4hi_uus (__a, __b); + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vshll_n_u32 (uint32x2_t __a, const int __b) + { +- return (uint64x2_t) __builtin_aarch64_ushll_nv2si ((int32x2_t) __a, __b); ++ return __builtin_aarch64_ushll_nv2si_uus (__a, __b); + } + + /* vshr */ +@@ -23450,29 +22246,25 @@ + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) + { +- return (uint8x8_t) __builtin_aarch64_usli_nv8qi ((int8x8_t) __a, +- (int8x8_t) __b, __c); ++ return __builtin_aarch64_usli_nv8qi_uuus (__a, __b, __c); + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) + { +- return (uint16x4_t) __builtin_aarch64_usli_nv4hi ((int16x4_t) __a, +- (int16x4_t) __b, __c); ++ return __builtin_aarch64_usli_nv4hi_uuus (__a, __b, __c); + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) + { +- return (uint32x2_t) __builtin_aarch64_usli_nv2si ((int32x2_t) __a, +- (int32x2_t) __b, __c); ++ return __builtin_aarch64_usli_nv2si_uuus (__a, __b, __c); + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) + { +- return (uint64x1_t) __builtin_aarch64_usli_ndi ((int64x1_t) __a, +- (int64x1_t) __b, __c); ++ return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c); + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +@@ -23502,29 +22294,25 @@ + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) + { +- return (uint8x16_t) __builtin_aarch64_usli_nv16qi ((int8x16_t) __a, +- (int8x16_t) __b, __c); ++ return __builtin_aarch64_usli_nv16qi_uuus (__a, __b, __c); + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) + { +- return (uint16x8_t) __builtin_aarch64_usli_nv8hi ((int16x8_t) __a, +- (int16x8_t) __b, __c); ++ return __builtin_aarch64_usli_nv8hi_uuus (__a, __b, __c); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) + { +- return (uint32x4_t) __builtin_aarch64_usli_nv4si ((int32x4_t) __a, +- (int32x4_t) __b, __c); ++ return __builtin_aarch64_usli_nv4si_uuus (__a, __b, __c); + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) + { +- return (uint64x2_t) __builtin_aarch64_usli_nv2di ((int64x2_t) __a, +- (int64x2_t) __b, __c); ++ return __builtin_aarch64_usli_nv2di_uuus (__a, __b, __c); + } + + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +@@ -23536,7 +22324,7 @@ + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vslid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) + { +- return (uint64x1_t) __builtin_aarch64_usli_ndi (__a, __b, __c); ++ return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c); + } + + /* vsqadd */ +@@ -23544,80 +22332,73 @@ + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vsqadd_u8 (uint8x8_t __a, int8x8_t __b) + { +- return (uint8x8_t) __builtin_aarch64_usqaddv8qi ((int8x8_t) __a, +- (int8x8_t) __b); ++ return __builtin_aarch64_usqaddv8qi_uus (__a, __b); + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vsqadd_u16 (uint16x4_t __a, int16x4_t __b) + { +- return (uint16x4_t) __builtin_aarch64_usqaddv4hi ((int16x4_t) __a, +- (int16x4_t) __b); ++ return __builtin_aarch64_usqaddv4hi_uus (__a, __b); + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vsqadd_u32 (uint32x2_t __a, int32x2_t __b) + { +- return (uint32x2_t) __builtin_aarch64_usqaddv2si ((int32x2_t) __a, +- (int32x2_t) __b); ++ return __builtin_aarch64_usqaddv2si_uus (__a, __b); + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vsqadd_u64 (uint64x1_t __a, int64x1_t __b) + { +- return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b); ++ return __builtin_aarch64_usqadddi_uus (__a, __b); + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vsqaddq_u8 (uint8x16_t __a, int8x16_t __b) + { +- return (uint8x16_t) __builtin_aarch64_usqaddv16qi ((int8x16_t) __a, +- (int8x16_t) __b); ++ return __builtin_aarch64_usqaddv16qi_uus (__a, __b); + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vsqaddq_u16 (uint16x8_t __a, int16x8_t __b) + { +- return (uint16x8_t) __builtin_aarch64_usqaddv8hi ((int16x8_t) __a, +- (int16x8_t) __b); ++ return __builtin_aarch64_usqaddv8hi_uus (__a, __b); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vsqaddq_u32 (uint32x4_t __a, int32x4_t __b) + { +- return (uint32x4_t) __builtin_aarch64_usqaddv4si ((int32x4_t) __a, +- (int32x4_t) __b); ++ return __builtin_aarch64_usqaddv4si_uus (__a, __b); + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vsqaddq_u64 (uint64x2_t __a, int64x2_t __b) + { +- return (uint64x2_t) __builtin_aarch64_usqaddv2di ((int64x2_t) __a, +- (int64x2_t) __b); ++ return __builtin_aarch64_usqaddv2di_uus (__a, __b); + } + + __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) + vsqaddb_u8 (uint8x1_t __a, int8x1_t __b) + { +- return (uint8x1_t) __builtin_aarch64_usqaddqi ((int8x1_t) __a, __b); ++ return __builtin_aarch64_usqaddqi_uus (__a, __b); + } + + __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) + vsqaddh_u16 (uint16x1_t __a, int16x1_t __b) + { +- return (uint16x1_t) __builtin_aarch64_usqaddhi ((int16x1_t) __a, __b); ++ return __builtin_aarch64_usqaddhi_uus (__a, __b); + } + + __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) + vsqadds_u32 (uint32x1_t __a, int32x1_t __b) + { +- return (uint32x1_t) __builtin_aarch64_usqaddsi ((int32x1_t) __a, __b); ++ return __builtin_aarch64_usqaddsi_uus (__a, __b); + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vsqaddd_u64 (uint64x1_t __a, int64x1_t __b) + { +- return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b); ++ return __builtin_aarch64_usqadddi_uus (__a, __b); + } + + /* vsqrt */ +@@ -23668,29 +22449,25 @@ + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) + { +- return (uint8x8_t) __builtin_aarch64_usra_nv8qi ((int8x8_t) __a, +- (int8x8_t) __b, __c); ++ return __builtin_aarch64_usra_nv8qi_uuus (__a, __b, __c); + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) + { +- return (uint16x4_t) __builtin_aarch64_usra_nv4hi ((int16x4_t) __a, +- (int16x4_t) __b, __c); ++ return __builtin_aarch64_usra_nv4hi_uuus (__a, __b, __c); + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) + { +- return (uint32x2_t) __builtin_aarch64_usra_nv2si ((int32x2_t) __a, +- (int32x2_t) __b, __c); ++ return __builtin_aarch64_usra_nv2si_uuus (__a, __b, __c); + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) + { +- return (uint64x1_t) __builtin_aarch64_usra_ndi ((int64x1_t) __a, +- (int64x1_t) __b, __c); ++ return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c); + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +@@ -23720,29 +22497,25 @@ + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) + { +- return (uint8x16_t) __builtin_aarch64_usra_nv16qi ((int8x16_t) __a, +- (int8x16_t) __b, __c); ++ return __builtin_aarch64_usra_nv16qi_uuus (__a, __b, __c); + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) + { +- return (uint16x8_t) __builtin_aarch64_usra_nv8hi ((int16x8_t) __a, +- (int16x8_t) __b, __c); ++ return __builtin_aarch64_usra_nv8hi_uuus (__a, __b, __c); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) + { +- return (uint32x4_t) __builtin_aarch64_usra_nv4si ((int32x4_t) __a, +- (int32x4_t) __b, __c); ++ return __builtin_aarch64_usra_nv4si_uuus (__a, __b, __c); + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) + { +- return (uint64x2_t) __builtin_aarch64_usra_nv2di ((int64x2_t) __a, +- (int64x2_t) __b, __c); ++ return __builtin_aarch64_usra_nv2di_uuus (__a, __b, __c); + } + + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +@@ -23754,7 +22527,7 @@ + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) + { +- return (uint64x1_t) __builtin_aarch64_usra_ndi (__a, __b, __c); ++ return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c); + } + + /* vsri */ +@@ -23786,29 +22559,25 @@ + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) + { +- return (uint8x8_t) __builtin_aarch64_usri_nv8qi ((int8x8_t) __a, +- (int8x8_t) __b, __c); ++ return __builtin_aarch64_usri_nv8qi_uuus (__a, __b, __c); + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) + { +- return (uint16x4_t) __builtin_aarch64_usri_nv4hi ((int16x4_t) __a, +- (int16x4_t) __b, __c); ++ return __builtin_aarch64_usri_nv4hi_uuus (__a, __b, __c); + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) + { +- return (uint32x2_t) __builtin_aarch64_usri_nv2si ((int32x2_t) __a, +- (int32x2_t) __b, __c); ++ return __builtin_aarch64_usri_nv2si_uuus (__a, __b, __c); + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) + { +- return (uint64x1_t) __builtin_aarch64_usri_ndi ((int64x1_t) __a, +- (int64x1_t) __b, __c); ++ return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c); + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +@@ -23838,29 +22607,25 @@ + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) + { +- return (uint8x16_t) __builtin_aarch64_usri_nv16qi ((int8x16_t) __a, +- (int8x16_t) __b, __c); ++ return __builtin_aarch64_usri_nv16qi_uuus (__a, __b, __c); + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) + { +- return (uint16x8_t) __builtin_aarch64_usri_nv8hi ((int16x8_t) __a, +- (int16x8_t) __b, __c); ++ return __builtin_aarch64_usri_nv8hi_uuus (__a, __b, __c); + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) + { +- return (uint32x4_t) __builtin_aarch64_usri_nv4si ((int32x4_t) __a, +- (int32x4_t) __b, __c); ++ return __builtin_aarch64_usri_nv4si_uuus (__a, __b, __c); + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) + { +- return (uint64x2_t) __builtin_aarch64_usri_nv2di ((int64x2_t) __a, +- (int64x2_t) __b, __c); ++ return __builtin_aarch64_usri_nv2di_uuus (__a, __b, __c); + } + + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +@@ -23872,7 +22637,7 @@ + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) + { +- return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c); ++ return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c); + } + + /* vst1 */ +@@ -24976,6 +23741,438 @@ + + /* vtrn */ + ++__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) ++vtrn1_f32 (float32x2_t __a, float32x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); ++#endif ++} ++ ++__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) ++vtrn1_p8 (poly8x8_t __a, poly8x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); ++#endif ++} ++ ++__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) ++vtrn1_p16 (poly16x4_t __a, poly16x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6}); ++#endif ++} ++ ++__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) ++vtrn1_s8 (int8x8_t __a, int8x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); ++#endif ++} ++ ++__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) ++vtrn1_s16 (int16x4_t __a, int16x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6}); ++#endif ++} ++ ++__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) ++vtrn1_s32 (int32x2_t __a, int32x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); ++#endif ++} ++ ++__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) ++vtrn1_u8 (uint8x8_t __a, uint8x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); ++#endif ++} ++ ++__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) ++vtrn1_u16 (uint16x4_t __a, uint16x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6}); ++#endif ++} ++ ++__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) ++vtrn1_u32 (uint32x2_t __a, uint32x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); ++#endif ++} ++ ++__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) ++vtrn1q_f32 (float32x4_t __a, float32x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6}); ++#endif ++} ++ ++__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) ++vtrn1q_f64 (float64x2_t __a, float64x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); ++#endif ++} ++ ++__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) ++vtrn1q_p8 (poly8x16_t __a, poly8x16_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, ++ (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15}); ++#else ++ return __builtin_shuffle (__a, __b, ++ (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}); ++#endif ++} ++ ++__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) ++vtrn1q_p16 (poly16x8_t __a, poly16x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); ++#endif ++} ++ ++__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) ++vtrn1q_s8 (int8x16_t __a, int8x16_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, ++ (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15}); ++#else ++ return __builtin_shuffle (__a, __b, ++ (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}); ++#endif ++} ++ ++__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) ++vtrn1q_s16 (int16x8_t __a, int16x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); ++#endif ++} ++ ++__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) ++vtrn1q_s32 (int32x4_t __a, int32x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6}); ++#endif ++} ++ ++__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) ++vtrn1q_s64 (int64x2_t __a, int64x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); ++#endif ++} ++ ++__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) ++vtrn1q_u8 (uint8x16_t __a, uint8x16_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, ++ (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15}); ++#else ++ return __builtin_shuffle (__a, __b, ++ (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}); ++#endif ++} ++ ++__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) ++vtrn1q_u16 (uint16x8_t __a, uint16x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); ++#endif ++} ++ ++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) ++vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6}); ++#endif ++} ++ ++__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) ++vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); ++#endif ++} ++ ++__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) ++vtrn2_f32 (float32x2_t __a, float32x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); ++#endif ++} ++ ++__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) ++vtrn2_p8 (poly8x8_t __a, poly8x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); ++#endif ++} ++ ++__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) ++vtrn2_p16 (poly16x4_t __a, poly16x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7}); ++#endif ++} ++ ++__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) ++vtrn2_s8 (int8x8_t __a, int8x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); ++#endif ++} ++ ++__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) ++vtrn2_s16 (int16x4_t __a, int16x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7}); ++#endif ++} ++ ++__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) ++vtrn2_s32 (int32x2_t __a, int32x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); ++#endif ++} ++ ++__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) ++vtrn2_u8 (uint8x8_t __a, uint8x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); ++#endif ++} ++ ++__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) ++vtrn2_u16 (uint16x4_t __a, uint16x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7}); ++#endif ++} ++ ++__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) ++vtrn2_u32 (uint32x2_t __a, uint32x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); ++#endif ++} ++ ++__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) ++vtrn2q_f32 (float32x4_t __a, float32x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7}); ++#endif ++} ++ ++__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) ++vtrn2q_f64 (float64x2_t __a, float64x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); ++#else ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); ++#endif ++} ++ ++__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) ++vtrn2q_p8 (poly8x16_t __a, poly8x16_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, ++ (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14}); ++#else ++ return __builtin_shuffle (__a, __b, ++ (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}); ++#endif ++} ++ ++__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) ++vtrn2q_p16 (poly16x8_t __a, poly16x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); ++#endif ++} ++ ++__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) ++vtrn2q_s8 (int8x16_t __a, int8x16_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, ++ (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14}); ++#else ++ return __builtin_shuffle (__a, __b, ++ (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}); ++#endif ++} ++ ++__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) ++vtrn2q_s16 (int16x8_t __a, int16x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); ++#endif ++} ++ ++__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) ++vtrn2q_s32 (int32x4_t __a, int32x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7}); ++#endif ++} ++ ++__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) ++vtrn2q_s64 (int64x2_t __a, int64x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); ++#else ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); ++#endif ++} ++ ++__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) ++vtrn2q_u8 (uint8x16_t __a, uint8x16_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, ++ (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14}); ++#else ++ return __builtin_shuffle (__a, __b, ++ (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}); ++#endif ++} ++ ++__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) ++vtrn2q_u16 (uint16x8_t __a, uint16x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); ++#endif ++} ++ ++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) ++vtrn2q_u32 (uint32x4_t __a, uint32x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7}); ++#endif ++} ++ ++__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) ++vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); ++#else ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); ++#endif ++} ++ + __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) + vtrn_f32 (float32x2_t a, float32x2_t b) + { +@@ -25206,73 +24403,73 @@ + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) + vuqadd_s8 (int8x8_t __a, uint8x8_t __b) + { +- return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b); ++ return __builtin_aarch64_suqaddv8qi_ssu (__a, __b); + } + + __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) + vuqadd_s16 (int16x4_t __a, uint16x4_t __b) + { +- return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b); ++ return __builtin_aarch64_suqaddv4hi_ssu (__a, __b); + } + + __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) + vuqadd_s32 (int32x2_t __a, uint32x2_t __b) + { +- return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b); ++ return __builtin_aarch64_suqaddv2si_ssu (__a, __b); + } + + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) + vuqadd_s64 (int64x1_t __a, uint64x1_t __b) + { +- return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b); ++ return __builtin_aarch64_suqadddi_ssu (__a, __b); + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) + vuqaddq_s8 (int8x16_t __a, uint8x16_t __b) + { +- return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b); ++ return __builtin_aarch64_suqaddv16qi_ssu (__a, __b); + } + + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) + vuqaddq_s16 (int16x8_t __a, uint16x8_t __b) + { +- return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b); ++ return __builtin_aarch64_suqaddv8hi_ssu (__a, __b); + } + + __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) + vuqaddq_s32 (int32x4_t __a, uint32x4_t __b) + { +- return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b); ++ return __builtin_aarch64_suqaddv4si_ssu (__a, __b); + } + + __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) + vuqaddq_s64 (int64x2_t __a, uint64x2_t __b) + { +- return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b); ++ return __builtin_aarch64_suqaddv2di_ssu (__a, __b); + } + + __extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) + vuqaddb_s8 (int8x1_t __a, uint8x1_t __b) + { +- return (int8x1_t) __builtin_aarch64_suqaddqi (__a, (int8x1_t) __b); ++ return __builtin_aarch64_suqaddqi_ssu (__a, __b); + } + + __extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) + vuqaddh_s16 (int16x1_t __a, uint16x1_t __b) + { +- return (int16x1_t) __builtin_aarch64_suqaddhi (__a, (int16x1_t) __b); ++ return __builtin_aarch64_suqaddhi_ssu (__a, __b); + } + + __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) + vuqadds_s32 (int32x1_t __a, uint32x1_t __b) + { +- return (int32x1_t) __builtin_aarch64_suqaddsi (__a, (int32x1_t) __b); ++ return __builtin_aarch64_suqaddsi_ssu (__a, __b); + } + + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) + vuqaddd_s64 (int64x1_t __a, uint64x1_t __b) + { +- return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b); ++ return __builtin_aarch64_suqadddi_ssu (__a, __b); + } + + #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \ +@@ -25306,10 +24503,880 @@ + + /* vuzp */ + ++__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) ++vuzp1_f32 (float32x2_t __a, float32x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); ++#endif ++} ++ ++__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) ++vuzp1_p8 (poly8x8_t __a, poly8x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); ++#endif ++} ++ ++__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) ++vuzp1_p16 (poly16x4_t __a, poly16x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6}); ++#endif ++} ++ ++__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) ++vuzp1_s8 (int8x8_t __a, int8x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); ++#endif ++} ++ ++__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) ++vuzp1_s16 (int16x4_t __a, int16x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6}); ++#endif ++} ++ ++__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) ++vuzp1_s32 (int32x2_t __a, int32x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); ++#endif ++} ++ ++__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) ++vuzp1_u8 (uint8x8_t __a, uint8x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); ++#endif ++} ++ ++__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) ++vuzp1_u16 (uint16x4_t __a, uint16x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6}); ++#endif ++} ++ ++__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) ++vuzp1_u32 (uint32x2_t __a, uint32x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); ++#endif ++} ++ ++__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) ++vuzp1q_f32 (float32x4_t __a, float32x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6}); ++#endif ++} ++ ++__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) ++vuzp1q_f64 (float64x2_t __a, float64x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); ++#endif ++} ++ ++__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) ++vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x16_t) ++ {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x16_t) ++ {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}); ++#endif ++} ++ ++__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) ++vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); ++#endif ++} ++ ++__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) ++vuzp1q_s8 (int8x16_t __a, int8x16_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, ++ (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15}); ++#else ++ return __builtin_shuffle (__a, __b, ++ (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}); ++#endif ++} ++ ++__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) ++vuzp1q_s16 (int16x8_t __a, int16x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); ++#endif ++} ++ ++__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) ++vuzp1q_s32 (int32x4_t __a, int32x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6}); ++#endif ++} ++ ++__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) ++vuzp1q_s64 (int64x2_t __a, int64x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); ++#endif ++} ++ ++__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) ++vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, ++ (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15}); ++#else ++ return __builtin_shuffle (__a, __b, ++ (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}); ++#endif ++} ++ ++__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) ++vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); ++#endif ++} ++ ++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) ++vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6}); ++#endif ++} ++ ++__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) ++vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); ++#endif ++} ++ ++__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) ++vuzp2_f32 (float32x2_t __a, float32x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); ++#endif ++} ++ ++__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) ++vuzp2_p8 (poly8x8_t __a, poly8x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); ++#endif ++} ++ ++__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) ++vuzp2_p16 (poly16x4_t __a, poly16x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7}); ++#endif ++} ++ ++__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) ++vuzp2_s8 (int8x8_t __a, int8x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); ++#endif ++} ++ ++__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) ++vuzp2_s16 (int16x4_t __a, int16x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7}); ++#endif ++} ++ ++__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) ++vuzp2_s32 (int32x2_t __a, int32x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); ++#endif ++} ++ ++__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) ++vuzp2_u8 (uint8x8_t __a, uint8x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); ++#endif ++} ++ ++__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) ++vuzp2_u16 (uint16x4_t __a, uint16x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7}); ++#endif ++} ++ ++__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) ++vuzp2_u32 (uint32x2_t __a, uint32x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); ++#endif ++} ++ ++__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) ++vuzp2q_f32 (float32x4_t __a, float32x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7}); ++#endif ++} ++ ++__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) ++vuzp2q_f64 (float64x2_t __a, float64x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); ++#else ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); ++#endif ++} ++ ++__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) ++vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, ++ (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14}); ++#else ++ return __builtin_shuffle (__a, __b, ++ (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}); ++#endif ++} ++ ++__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) ++vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); ++#endif ++} ++ ++__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) ++vuzp2q_s8 (int8x16_t __a, int8x16_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, ++ (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14}); ++#else ++ return __builtin_shuffle (__a, __b, ++ (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}); ++#endif ++} ++ ++__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) ++vuzp2q_s16 (int16x8_t __a, int16x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); ++#endif ++} ++ ++__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) ++vuzp2q_s32 (int32x4_t __a, int32x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7}); ++#endif ++} ++ ++__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) ++vuzp2q_s64 (int64x2_t __a, int64x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); ++#else ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); ++#endif ++} ++ ++__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) ++vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x16_t) ++ {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x16_t) ++ {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}); ++#endif ++} ++ ++__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) ++vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); ++#endif ++} ++ ++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) ++vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7}); ++#endif ++} ++ ++__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) ++vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); ++#else ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); ++#endif ++} ++ + __INTERLEAVE_LIST (uzp) + + /* vzip */ + ++__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) ++vzip1_f32 (float32x2_t __a, float32x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); ++#endif ++} ++ ++__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) ++vzip1_p8 (poly8x8_t __a, poly8x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); ++#endif ++} ++ ++__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) ++vzip1_p16 (poly16x4_t __a, poly16x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5}); ++#endif ++} ++ ++__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) ++vzip1_s8 (int8x8_t __a, int8x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); ++#endif ++} ++ ++__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) ++vzip1_s16 (int16x4_t __a, int16x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5}); ++#endif ++} ++ ++__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) ++vzip1_s32 (int32x2_t __a, int32x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); ++#endif ++} ++ ++__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) ++vzip1_u8 (uint8x8_t __a, uint8x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); ++#endif ++} ++ ++__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) ++vzip1_u16 (uint16x4_t __a, uint16x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5}); ++#endif ++} ++ ++__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) ++vzip1_u32 (uint32x2_t __a, uint32x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); ++#endif ++} ++ ++__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) ++vzip1q_f32 (float32x4_t __a, float32x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5}); ++#endif ++} ++ ++__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) ++vzip1q_f64 (float64x2_t __a, float64x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); ++#endif ++} ++ ++__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) ++vzip1q_p8 (poly8x16_t __a, poly8x16_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x16_t) ++ {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x16_t) ++ {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}); ++#endif ++} ++ ++__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) ++vzip1q_p16 (poly16x8_t __a, poly16x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x8_t) ++ {12, 4, 13, 5, 14, 6, 15, 7}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); ++#endif ++} ++ ++__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) ++vzip1q_s8 (int8x16_t __a, int8x16_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x16_t) ++ {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x16_t) ++ {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}); ++#endif ++} ++ ++__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) ++vzip1q_s16 (int16x8_t __a, int16x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x8_t) ++ {12, 4, 13, 5, 14, 6, 15, 7}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); ++#endif ++} ++ ++__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) ++vzip1q_s32 (int32x4_t __a, int32x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5}); ++#endif ++} ++ ++__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) ++vzip1q_s64 (int64x2_t __a, int64x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); ++#endif ++} ++ ++__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) ++vzip1q_u8 (uint8x16_t __a, uint8x16_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x16_t) ++ {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x16_t) ++ {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}); ++#endif ++} ++ ++__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) ++vzip1q_u16 (uint16x8_t __a, uint16x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x8_t) ++ {12, 4, 13, 5, 14, 6, 15, 7}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); ++#endif ++} ++ ++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) ++vzip1q_u32 (uint32x4_t __a, uint32x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5}); ++#endif ++} ++ ++__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) ++vzip1q_u64 (uint64x2_t __a, uint64x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); ++#endif ++} ++ ++__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) ++vzip2_f32 (float32x2_t __a, float32x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); ++#endif ++} ++ ++__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) ++vzip2_p8 (poly8x8_t __a, poly8x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15}); ++#endif ++} ++ ++__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) ++vzip2_p16 (poly16x4_t __a, poly16x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7}); ++#endif ++} ++ ++__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) ++vzip2_s8 (int8x8_t __a, int8x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15}); ++#endif ++} ++ ++__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) ++vzip2_s16 (int16x4_t __a, int16x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7}); ++#endif ++} ++ ++__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) ++vzip2_s32 (int32x2_t __a, int32x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); ++#endif ++} ++ ++__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) ++vzip2_u8 (uint8x8_t __a, uint8x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15}); ++#endif ++} ++ ++__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) ++vzip2_u16 (uint16x4_t __a, uint16x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7}); ++#endif ++} ++ ++__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) ++vzip2_u32 (uint32x2_t __a, uint32x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); ++#endif ++} ++ ++__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) ++vzip2q_f32 (float32x4_t __a, float32x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7}); ++#endif ++} ++ ++__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) ++vzip2q_f64 (float64x2_t __a, float64x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); ++#else ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); ++#endif ++} ++ ++__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) ++vzip2q_p8 (poly8x16_t __a, poly8x16_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x16_t) ++ {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x16_t) ++ {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); ++#endif ++} ++ ++__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) ++vzip2q_p16 (poly16x8_t __a, poly16x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x8_t) ++ {4, 12, 5, 13, 6, 14, 7, 15}); ++#endif ++} ++ ++__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) ++vzip2q_s8 (int8x16_t __a, int8x16_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x16_t) ++ {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x16_t) ++ {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); ++#endif ++} ++ ++__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) ++vzip2q_s16 (int16x8_t __a, int16x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x8_t) ++ {4, 12, 5, 13, 6, 14, 7, 15}); ++#endif ++} ++ ++__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) ++vzip2q_s32 (int32x4_t __a, int32x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7}); ++#endif ++} ++ ++__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) ++vzip2q_s64 (int64x2_t __a, int64x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); ++#else ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); ++#endif ++} ++ ++__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) ++vzip2q_u8 (uint8x16_t __a, uint8x16_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint8x16_t) ++ {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}); ++#else ++ return __builtin_shuffle (__a, __b, (uint8x16_t) ++ {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); ++#endif ++} ++ ++__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) ++vzip2q_u16 (uint16x8_t __a, uint16x8_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); ++#else ++ return __builtin_shuffle (__a, __b, (uint16x8_t) ++ {4, 12, 5, 13, 6, 14, 7, 15}); ++#endif ++} ++ ++__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) ++vzip2q_u32 (uint32x4_t __a, uint32x4_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1}); ++#else ++ return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7}); ++#endif ++} ++ ++__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) ++vzip2q_u64 (uint64x2_t __a, uint64x2_t __b) ++{ ++#ifdef __AARCH64EB__ ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); ++#else ++ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); ++#endif ++} ++ + __INTERLEAVE_LIST (zip) + + #undef __INTERLEAVE_LIST +--- a/src/gcc/config/aarch64/t-aarch64-linux ++++ b/src/gcc/config/aarch64/t-aarch64-linux +@@ -22,10 +22,7 @@ + LIB1ASMFUNCS = _aarch64_sync_cache_range + + AARCH_BE = $(if $(findstring TARGET_BIG_ENDIAN_DEFAULT=1, $(tm_defines)),_be) +-MULTILIB_OSDIRNAMES = .=../lib64$(call if_multiarch,:aarch64$(AARCH_BE)-linux-gnu) ++MULTILIB_OSDIRNAMES = mabi.lp64=../lib64$(call if_multiarch,:aarch64$(AARCH_BE)-linux-gnu) + MULTIARCH_DIRNAME = $(call if_multiarch,aarch64$(AARCH_BE)-linux-gnu) + +-# Disable the multilib for linux-gnu targets for the time being; focus +-# on the baremetal targets. +-MULTILIB_OPTIONS = +-MULTILIB_DIRNAMES = ++MULTILIB_OSDIRNAMES += mabi.ilp32=../libilp32 +--- a/src/gcc/config/aarch64/aarch64.md ++++ b/src/gcc/config/aarch64/aarch64.md +@@ -68,6 +68,14 @@ + (define_c_enum "unspec" [ + UNSPEC_CASESI + UNSPEC_CLS ++ UNSPEC_CRC32B ++ UNSPEC_CRC32CB ++ UNSPEC_CRC32CH ++ UNSPEC_CRC32CW ++ UNSPEC_CRC32CX ++ UNSPEC_CRC32H ++ UNSPEC_CRC32W ++ UNSPEC_CRC32X + UNSPEC_FRECPE + UNSPEC_FRECPS + UNSPEC_FRECPX +@@ -98,15 +106,24 @@ + UNSPEC_ST2 + UNSPEC_ST3 + UNSPEC_ST4 ++ UNSPEC_ST2_LANE ++ UNSPEC_ST3_LANE ++ UNSPEC_ST4_LANE + UNSPEC_TLS + UNSPEC_TLSDESC + UNSPEC_USHL_2S + UNSPEC_USHR64 + UNSPEC_VSTRUCTDUMMY ++ UNSPEC_SP_SET ++ UNSPEC_SP_TEST + ]) + + (define_c_enum "unspecv" [ + UNSPECV_EH_RETURN ; Represent EH_RETURN ++ UNSPECV_GET_FPCR ; Represent fetch of FPCR content. ++ UNSPECV_SET_FPCR ; Represent assign of FPCR content. ++ UNSPECV_GET_FPSR ; Represent fetch of FPSR content. ++ UNSPECV_SET_FPSR ; Represent assign of FPSR content. + ] + ) + +@@ -514,6 +531,10 @@ + (use (match_operand 2 "" ""))])] + "" + { ++ if (!REG_P (XEXP (operands[0], 0)) ++ && (GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF)) ++ XEXP (operands[0], 0) = force_reg (Pmode, XEXP (operands[0], 0)); ++ + if (operands[2] == NULL_RTX) + operands[2] = const0_rtx; + } +@@ -527,6 +548,10 @@ + (use (match_operand 3 "" ""))])] + "" + { ++ if (!REG_P (XEXP (operands[1], 0)) ++ && (GET_CODE (XEXP (operands[1], 0)) != SYMBOL_REF)) ++ XEXP (operands[1], 0) = force_reg (Pmode, XEXP (operands[1], 0)); ++ + if (operands[3] == NULL_RTX) + operands[3] = const0_rtx; + } +@@ -533,25 +558,28 @@ + ) + + (define_insn "*sibcall_insn" +- [(call (mem:DI (match_operand:DI 0 "" "X")) ++ [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "Ucs, Usf")) + (match_operand 1 "" "")) + (return) + (use (match_operand 2 "" ""))] +- "GET_CODE (operands[0]) == SYMBOL_REF" +- "b\\t%a0" +- [(set_attr "type" "branch")] +- ++ "SIBLING_CALL_P (insn)" ++ "@ ++ br\\t%0 ++ b\\t%a0" ++ [(set_attr "type" "branch, branch")] + ) + + (define_insn "*sibcall_value_insn" + [(set (match_operand 0 "" "") +- (call (mem:DI (match_operand 1 "" "X")) ++ (call (mem:DI (match_operand 1 "aarch64_call_insn_operand" "Ucs, Usf")) + (match_operand 2 "" ""))) + (return) + (use (match_operand 3 "" ""))] +- "GET_CODE (operands[1]) == SYMBOL_REF" +- "b\\t%a1" +- [(set_attr "type" "branch")] ++ "SIBLING_CALL_P (insn)" ++ "@ ++ br\\t%1 ++ b\\t%a1" ++ [(set_attr "type" "branch, branch")] + ) + + ;; Call subroutine returning any type. +@@ -669,7 +697,7 @@ + fmov\\t%w0, %s1 + fmov\\t%s0, %s1" + [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\ +- adr,adr,fmov,fmov,fmov") ++ adr,adr,f_mcr,f_mrc,fmov") + (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")] + ) + +@@ -694,7 +722,7 @@ + fmov\\t%d0, %d1 + movi\\t%d0, %1" + [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\ +- adr,adr,fmov,fmov,fmov,fmov") ++ adr,adr,f_mcr,f_mrc,fmov,fmov") + (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") + (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] + ) +@@ -789,7 +817,7 @@ + str\\t%w1, %0 + mov\\t%w0, %w1" + [(set_attr "type" "f_mcr,f_mrc,fmov,fconsts,\ +- f_loads,f_stores,f_loads,f_stores,fmov")] ++ f_loads,f_stores,f_loads,f_stores,mov_reg")] + ) + + (define_insn "*movdf_aarch64" +@@ -863,6 +891,24 @@ + } + ) + ++;; 0 is dst ++;; 1 is src ++;; 2 is size of move in bytes ++;; 3 is alignment ++ ++(define_expand "movmemdi" ++ [(match_operand:BLK 0 "memory_operand") ++ (match_operand:BLK 1 "memory_operand") ++ (match_operand:DI 2 "immediate_operand") ++ (match_operand:DI 3 "immediate_operand")] ++ "!STRICT_ALIGNMENT" ++{ ++ if (aarch64_expand_movmem (operands)) ++ DONE; ++ FAIL; ++} ++) ++ + ;; Operands 1 and 3 are tied together by the final condition; so we allow + ;; fairly lax checking on the second memory operation. + (define_insn "load_pair" +@@ -1063,16 +1109,18 @@ + + (define_insn "*addsi3_aarch64" + [(set +- (match_operand:SI 0 "register_operand" "=rk,rk,rk") ++ (match_operand:SI 0 "register_operand" "=rk,rk,w,rk") + (plus:SI +- (match_operand:SI 1 "register_operand" "%rk,rk,rk") +- (match_operand:SI 2 "aarch64_plus_operand" "I,r,J")))] ++ (match_operand:SI 1 "register_operand" "%rk,rk,w,rk") ++ (match_operand:SI 2 "aarch64_plus_operand" "I,r,w,J")))] + "" + "@ + add\\t%w0, %w1, %2 + add\\t%w0, %w1, %w2 ++ add\\t%0.2s, %1.2s, %2.2s + sub\\t%w0, %w1, #%n2" +- [(set_attr "type" "alu_imm,alu_reg,alu_imm")] ++ [(set_attr "type" "alu_imm,alu_reg,neon_add,alu_imm") ++ (set_attr "simd" "*,*,yes,*")] + ) + + ;; zero_extend version of above +@@ -1106,7 +1154,26 @@ + (set_attr "simd" "*,*,*,yes")] + ) + +-(define_insn "*add3_compare0" ++(define_expand "addti3" ++ [(set (match_operand:TI 0 "register_operand" "") ++ (plus:TI (match_operand:TI 1 "register_operand" "") ++ (match_operand:TI 2 "register_operand" "")))] ++ "" ++{ ++ rtx low = gen_reg_rtx (DImode); ++ emit_insn (gen_adddi3_compare0 (low, gen_lowpart (DImode, operands[1]), ++ gen_lowpart (DImode, operands[2]))); ++ ++ rtx high = gen_reg_rtx (DImode); ++ emit_insn (gen_adddi3_carryin (high, gen_highpart (DImode, operands[1]), ++ gen_highpart (DImode, operands[2]))); ++ ++ emit_move_insn (gen_lowpart (DImode, operands[0]), low); ++ emit_move_insn (gen_highpart (DImode, operands[0]), high); ++ DONE; ++}) ++ ++(define_insn "add3_compare0" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ + (plus:GPI (match_operand:GPI 1 "register_operand" "%r,r,r") +@@ -1390,7 +1457,7 @@ + [(set_attr "type" "alu_ext")] + ) + +-(define_insn "*add3_carryin" ++(define_insn "add3_carryin" + [(set + (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (geu:GPI (reg:CC CC_REGNUM) (const_int 0)) +@@ -1558,8 +1625,26 @@ + (set_attr "simd" "*,yes")] + ) + ++(define_expand "subti3" ++ [(set (match_operand:TI 0 "register_operand" "") ++ (minus:TI (match_operand:TI 1 "register_operand" "") ++ (match_operand:TI 2 "register_operand" "")))] ++ "" ++{ ++ rtx low = gen_reg_rtx (DImode); ++ emit_insn (gen_subdi3_compare0 (low, gen_lowpart (DImode, operands[1]), ++ gen_lowpart (DImode, operands[2]))); + +-(define_insn "*sub3_compare0" ++ rtx high = gen_reg_rtx (DImode); ++ emit_insn (gen_subdi3_carryin (high, gen_highpart (DImode, operands[1]), ++ gen_highpart (DImode, operands[2]))); ++ ++ emit_move_insn (gen_lowpart (DImode, operands[0]), low); ++ emit_move_insn (gen_highpart (DImode, operands[0]), high); ++ DONE; ++}) ++ ++(define_insn "sub3_compare0" + [(set (reg:CC_NZ CC_REGNUM) + (compare:CC_NZ (minus:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "register_operand" "r")) +@@ -1706,7 +1791,7 @@ + [(set_attr "type" "alu_ext")] + ) + +-(define_insn "*sub3_carryin" ++(define_insn "sub3_carryin" + [(set + (match_operand:GPI 0 "register_operand" "=r") + (minus:GPI (minus:GPI +@@ -1935,7 +2020,7 @@ + [(set_attr "type" "mul")] + ) + +-(define_insn "*madd" ++(define_insn "madd" + [(set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "register_operand" "r")) +@@ -2045,6 +2130,48 @@ + [(set_attr "type" "mull")] + ) + ++(define_expand "mulditi3" ++ [(set (match_operand:TI 0 "register_operand") ++ (mult:TI (ANY_EXTEND:TI (match_operand:DI 1 "register_operand")) ++ (ANY_EXTEND:TI (match_operand:DI 2 "register_operand"))))] ++ "" ++{ ++ rtx low = gen_reg_rtx (DImode); ++ emit_insn (gen_muldi3 (low, operands[1], operands[2])); ++ ++ rtx high = gen_reg_rtx (DImode); ++ emit_insn (gen_muldi3_highpart (high, operands[1], operands[2])); ++ ++ emit_move_insn (gen_lowpart (DImode, operands[0]), low); ++ emit_move_insn (gen_highpart (DImode, operands[0]), high); ++ DONE; ++}) ++ ++;; The default expansion of multi3 using umuldi3_highpart will perform ++;; the additions in an order that fails to combine into two madd insns. ++(define_expand "multi3" ++ [(set (match_operand:TI 0 "register_operand") ++ (mult:TI (match_operand:TI 1 "register_operand") ++ (match_operand:TI 2 "register_operand")))] ++ "" ++{ ++ rtx l0 = gen_reg_rtx (DImode); ++ rtx l1 = gen_lowpart (DImode, operands[1]); ++ rtx l2 = gen_lowpart (DImode, operands[2]); ++ rtx h0 = gen_reg_rtx (DImode); ++ rtx h1 = gen_highpart (DImode, operands[1]); ++ rtx h2 = gen_highpart (DImode, operands[2]); ++ ++ emit_insn (gen_muldi3 (l0, l1, l2)); ++ emit_insn (gen_umuldi3_highpart (h0, l1, l2)); ++ emit_insn (gen_madddi (h0, h1, l2, h0)); ++ emit_insn (gen_madddi (h0, l1, h2, h0)); ++ ++ emit_move_insn (gen_lowpart (DImode, operands[0]), l0); ++ emit_move_insn (gen_highpart (DImode, operands[0]), h0); ++ DONE; ++}) ++ + (define_insn "muldi3_highpart" + [(set (match_operand:DI 0 "register_operand" "=r") + (truncate:DI +@@ -2345,6 +2472,42 @@ + } + ) + ++(define_expand "movcc" ++ [(set (match_operand:GPF 0 "register_operand" "") ++ (if_then_else:GPF (match_operand 1 "aarch64_comparison_operator" "") ++ (match_operand:GPF 2 "register_operand" "") ++ (match_operand:GPF 3 "register_operand" "")))] ++ "" ++ { ++ rtx ccreg; ++ enum rtx_code code = GET_CODE (operands[1]); ++ ++ if (code == UNEQ || code == LTGT) ++ FAIL; ++ ++ ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0), ++ XEXP (operands[1], 1)); ++ operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx); ++ } ++) ++ ++ ++;; CRC32 instructions. ++(define_insn "aarch64_" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec:SI [(match_operand:SI 1 "register_operand" "r") ++ (match_operand: 2 "register_operand" "r")] ++ CRC))] ++ "TARGET_CRC32" ++ { ++ if (GET_MODE_BITSIZE (GET_MODE (operands[2])) >= 64) ++ return "\\t%w0, %w1, %x2"; ++ else ++ return "\\t%w0, %w1, %w2"; ++ } ++ [(set_attr "type" "crc")] ++) ++ + (define_insn "*csinc2_insn" + [(set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI (match_operator:GPI 2 "aarch64_comparison_operator" +@@ -2486,7 +2649,18 @@ + [(set_attr "type" "logic_shift_imm")] + ) + +-;; zero_extend version of above ++(define_insn "*_rol3" ++ [(set (match_operand:GPI 0 "register_operand" "=r") ++ (LOGICAL:GPI (rotate:GPI ++ (match_operand:GPI 1 "register_operand" "r") ++ (match_operand:QI 2 "aarch64_shift_imm_" "n")) ++ (match_operand:GPI 3 "register_operand" "r")))] ++ "" ++ "\\t%0, %3, %1, ror ( - %2)" ++ [(set_attr "type" "logic_shift_imm")] ++) ++ ++;; zero_extend versions of above + (define_insn "*_si3_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI +@@ -2499,6 +2673,18 @@ + [(set_attr "type" "logic_shift_imm")] + ) + ++(define_insn "*_rolsi3_uxtw" ++ [(set (match_operand:DI 0 "register_operand" "=r") ++ (zero_extend:DI ++ (LOGICAL:SI (rotate:SI ++ (match_operand:SI 1 "register_operand" "r") ++ (match_operand:QI 2 "aarch64_shift_imm_si" "n")) ++ (match_operand:SI 3 "register_operand" "r"))))] ++ "" ++ "\\t%w0, %w3, %w1, ror (32 - %2)" ++ [(set_attr "type" "logic_shift_imm")] ++) ++ + (define_insn "one_cmpl2" + [(set (match_operand:GPI 0 "register_operand" "=r") + (not:GPI (match_operand:GPI 1 "register_operand" "r")))] +@@ -3179,6 +3365,38 @@ + [(set_attr "type" "rev")] + ) + ++;; There are no canonicalisation rules for the position of the lshiftrt, ashift ++;; operations within an IOR/AND RTX, therefore we have two patterns matching ++;; each valid permutation. ++ ++(define_insn "rev162" ++ [(set (match_operand:GPI 0 "register_operand" "=r") ++ (ior:GPI (and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r") ++ (const_int 8)) ++ (match_operand:GPI 3 "const_int_operand" "n")) ++ (and:GPI (lshiftrt:GPI (match_dup 1) ++ (const_int 8)) ++ (match_operand:GPI 2 "const_int_operand" "n"))))] ++ "aarch_rev16_shleft_mask_imm_p (operands[3], mode) ++ && aarch_rev16_shright_mask_imm_p (operands[2], mode)" ++ "rev16\\t%0, %1" ++ [(set_attr "type" "rev")] ++) ++ ++(define_insn "rev162_alt" ++ [(set (match_operand:GPI 0 "register_operand" "=r") ++ (ior:GPI (and:GPI (lshiftrt:GPI (match_operand:GPI 1 "register_operand" "r") ++ (const_int 8)) ++ (match_operand:GPI 2 "const_int_operand" "n")) ++ (and:GPI (ashift:GPI (match_dup 1) ++ (const_int 8)) ++ (match_operand:GPI 3 "const_int_operand" "n"))))] ++ "aarch_rev16_shleft_mask_imm_p (operands[3], mode) ++ && aarch_rev16_shright_mask_imm_p (operands[2], mode)" ++ "rev16\\t%0, %1" ++ [(set_attr "type" "rev")] ++) ++ + ;; zero_extend version of above + (define_insn "*bswapsi2_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") +@@ -3193,7 +3411,7 @@ + ;; ------------------------------------------------------------------- + + ;; frint floating-point round to integral standard patterns. +-;; Expands to btrunc, ceil, floor, nearbyint, rint, round. ++;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. + + (define_insn "2" + [(set (match_operand:GPF 0 "register_operand" "=w") +@@ -3489,7 +3707,7 @@ + (truncate:DI (match_operand:TI 1 "register_operand" "w"))))] + "reload_completed || reload_in_progress" + "fmov\\t%d0, %d1" +- [(set_attr "type" "f_mcr") ++ [(set_attr "type" "fmov") + (set_attr "length" "4") + ]) + +@@ -3587,36 +3805,63 @@ + [(set_attr "type" "call") + (set_attr "length" "16")]) + +-(define_insn "tlsie_small" +- [(set (match_operand:DI 0 "register_operand" "=r") +- (unspec:DI [(match_operand:DI 1 "aarch64_tls_ie_symref" "S")] ++(define_insn "tlsie_small_" ++ [(set (match_operand:PTR 0 "register_operand" "=r") ++ (unspec:PTR [(match_operand 1 "aarch64_tls_ie_symref" "S")] + UNSPEC_GOTSMALLTLS))] + "" +- "adrp\\t%0, %A1\;ldr\\t%0, [%0, #%L1]" ++ "adrp\\t%0, %A1\;ldr\\t%0, [%0, #%L1]" + [(set_attr "type" "load1") + (set_attr "length" "8")] + ) + +-(define_insn "tlsle_small" ++(define_insn "tlsie_small_sidi" + [(set (match_operand:DI 0 "register_operand" "=r") +- (unspec:DI [(match_operand:DI 1 "register_operand" "r") +- (match_operand:DI 2 "aarch64_tls_le_symref" "S")] ++ (zero_extend:DI ++ (unspec:SI [(match_operand 1 "aarch64_tls_ie_symref" "S")] ++ UNSPEC_GOTSMALLTLS)))] ++ "" ++ "adrp\\t%0, %A1\;ldr\\t%w0, [%0, #%L1]" ++ [(set_attr "type" "load1") ++ (set_attr "length" "8")] ++) ++ ++(define_expand "tlsle_small" ++ [(set (match_operand 0 "register_operand" "=r") ++ (unspec [(match_operand 1 "register_operand" "r") ++ (match_operand 2 "aarch64_tls_le_symref" "S")] ++ UNSPEC_GOTSMALLTLS))] ++ "" ++{ ++ enum machine_mode mode = GET_MODE (operands[0]); ++ emit_insn ((mode == DImode ++ ? gen_tlsle_small_di ++ : gen_tlsle_small_si) (operands[0], ++ operands[1], ++ operands[2])); ++ DONE; ++}) ++ ++(define_insn "tlsle_small_" ++ [(set (match_operand:P 0 "register_operand" "=r") ++ (unspec:P [(match_operand:P 1 "register_operand" "r") ++ (match_operand 2 "aarch64_tls_le_symref" "S")] + UNSPEC_GOTSMALLTLS))] + "" +- "add\\t%0, %1, #%G2\;add\\t%0, %0, #%L2" ++ "add\\t%0, %1, #%G2\;add\\t%0, %0, #%L2" + [(set_attr "type" "alu_reg") + (set_attr "length" "8")] + ) + +-(define_insn "tlsdesc_small" +- [(set (reg:DI R0_REGNUM) +- (unspec:DI [(match_operand:DI 0 "aarch64_valid_symref" "S")] ++(define_insn "tlsdesc_small_" ++ [(set (reg:PTR R0_REGNUM) ++ (unspec:PTR [(match_operand 0 "aarch64_valid_symref" "S")] + UNSPEC_TLSDESC)) + (clobber (reg:DI LR_REGNUM)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:DI 1 "=r"))] + "TARGET_TLS_DESC" +- "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\tx0, x0, %L0\;.tlsdesccall\\t%0\;blr\\t%1" ++ "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\t0, 0, %L0\;.tlsdesccall\\t%0\;blr\\t%1" + [(set_attr "type" "call") + (set_attr "length" "16")]) + +@@ -3641,6 +3886,98 @@ + DONE; + }) + ++;; Named patterns for stack smashing protection. ++(define_expand "stack_protect_set" ++ [(match_operand 0 "memory_operand") ++ (match_operand 1 "memory_operand")] ++ "" ++{ ++ enum machine_mode mode = GET_MODE (operands[0]); ++ ++ emit_insn ((mode == DImode ++ ? gen_stack_protect_set_di ++ : gen_stack_protect_set_si) (operands[0], operands[1])); ++ DONE; ++}) ++ ++(define_insn "stack_protect_set_" ++ [(set (match_operand:PTR 0 "memory_operand" "=m") ++ (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")] ++ UNSPEC_SP_SET)) ++ (set (match_scratch:PTR 2 "=&r") (const_int 0))] ++ "" ++ "ldr\\t%2, %1\;str\\t%2, %0\;mov\t%2,0" ++ [(set_attr "length" "12") ++ (set_attr "type" "multiple")]) ++ ++(define_expand "stack_protect_test" ++ [(match_operand 0 "memory_operand") ++ (match_operand 1 "memory_operand") ++ (match_operand 2)] ++ "" ++{ ++ rtx result; ++ enum machine_mode mode = GET_MODE (operands[0]); ++ ++ result = gen_reg_rtx(mode); ++ ++ emit_insn ((mode == DImode ++ ? gen_stack_protect_test_di ++ : gen_stack_protect_test_si) (result, ++ operands[0], ++ operands[1])); ++ ++ if (mode == DImode) ++ emit_jump_insn (gen_cbranchdi4 (gen_rtx_EQ (VOIDmode, result, const0_rtx), ++ result, const0_rtx, operands[2])); ++ else ++ emit_jump_insn (gen_cbranchsi4 (gen_rtx_EQ (VOIDmode, result, const0_rtx), ++ result, const0_rtx, operands[2])); ++ DONE; ++}) ++ ++(define_insn "stack_protect_test_" ++ [(set (match_operand:PTR 0 "register_operand") ++ (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m") ++ (match_operand:PTR 2 "memory_operand" "m")] ++ UNSPEC_SP_TEST)) ++ (clobber (match_scratch:PTR 3 "=&r"))] ++ "" ++ "ldr\t%3, %x1\;ldr\t%0, %x2\;eor\t%0, %3, %0" ++ [(set_attr "length" "12") ++ (set_attr "type" "multiple")]) ++ ++;; Write Floating-point Control Register. ++(define_insn "set_fpcr" ++ [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPCR)] ++ "" ++ "msr\\tfpcr, %0\;isb" ++ [(set_attr "type" "mrs")]) ++ ++;; Read Floating-point Control Register. ++(define_insn "get_fpcr" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPCR))] ++ "" ++ "mrs\\t%0, fpcr" ++ [(set_attr "type" "mrs")]) ++ ++;; Write Floating-point Status Register. ++(define_insn "set_fpsr" ++ [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPSR)] ++ "" ++ "msr\\tfpsr, %0" ++ [(set_attr "type" "mrs")]) ++ ++;; Read Floating-point Status Register. ++(define_insn "get_fpsr" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPSR))] ++ "" ++ "mrs\\t%0, fpsr" ++ [(set_attr "type" "mrs")]) ++ ++ + ;; AdvSIMD Stuff + (include "aarch64-simd.md") + +--- a/src/gcc/config/aarch64/arm_acle.h ++++ b/src/gcc/config/aarch64/arm_acle.h +@@ -0,0 +1,90 @@ ++/* AArch64 Non-NEON ACLE intrinsics include file. ++ ++ Copyright (C) 2014 Free Software Foundation, Inc. ++ Contributed by ARM Ltd. ++ ++ This file is part of GCC. ++ ++ GCC is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published ++ by the Free Software Foundation; either version 3, or (at your ++ option) any later version. ++ ++ GCC is distributed in the hope that it will be useful, but WITHOUT ++ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ++ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ++ License for more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#ifndef _GCC_ARM_ACLE_H ++#define _GCC_ARM_ACLE_H ++ ++#include ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#ifdef __ARM_FEATURE_CRC32 ++__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) ++__crc32b (uint32_t __a, uint8_t __b) ++{ ++ return __builtin_aarch64_crc32b (__a, __b); ++} ++ ++__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) ++__crc32cb (uint32_t __a, uint8_t __b) ++{ ++ return __builtin_aarch64_crc32cb (__a, __b); ++} ++ ++__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) ++__crc32ch (uint32_t __a, uint16_t __b) ++{ ++ return __builtin_aarch64_crc32ch (__a, __b); ++} ++ ++__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) ++__crc32cw (uint32_t __a, uint32_t __b) ++{ ++ return __builtin_aarch64_crc32cw (__a, __b); ++} ++ ++__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) ++__crc32cd (uint32_t __a, uint64_t __b) ++{ ++ return __builtin_aarch64_crc32cx (__a, __b); ++} ++ ++__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) ++__crc32h (uint32_t __a, uint16_t __b) ++{ ++ return __builtin_aarch64_crc32h (__a, __b); ++} ++ ++__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) ++__crc32w (uint32_t __a, uint32_t __b) ++{ ++ return __builtin_aarch64_crc32w (__a, __b); ++} ++ ++__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) ++__crc32d (uint32_t __a, uint64_t __b) ++{ ++ return __builtin_aarch64_crc32x (__a, __b); ++} ++ ++#endif ++ ++#ifdef __cplusplus ++} ++#endif ++ ++#endif +--- a/src/gcc/config/aarch64/aarch64-builtins.c ++++ b/src/gcc/config/aarch64/aarch64-builtins.c +@@ -147,16 +147,44 @@ + = { qualifier_unsigned, qualifier_unsigned }; + #define TYPES_UNOPU (aarch64_types_unopu_qualifiers) + #define TYPES_CREATE (aarch64_types_unop_qualifiers) +-#define TYPES_REINTERP (aarch64_types_unop_qualifiers) ++#define TYPES_REINTERP_SS (aarch64_types_unop_qualifiers) + static enum aarch64_type_qualifiers ++aarch64_types_unop_su_qualifiers[SIMD_MAX_BUILTIN_ARGS] ++ = { qualifier_none, qualifier_unsigned }; ++#define TYPES_REINTERP_SU (aarch64_types_unop_su_qualifiers) ++static enum aarch64_type_qualifiers ++aarch64_types_unop_sp_qualifiers[SIMD_MAX_BUILTIN_ARGS] ++ = { qualifier_none, qualifier_poly }; ++#define TYPES_REINTERP_SP (aarch64_types_unop_sp_qualifiers) ++static enum aarch64_type_qualifiers ++aarch64_types_unop_us_qualifiers[SIMD_MAX_BUILTIN_ARGS] ++ = { qualifier_unsigned, qualifier_none }; ++#define TYPES_REINTERP_US (aarch64_types_unop_us_qualifiers) ++static enum aarch64_type_qualifiers ++aarch64_types_unop_ps_qualifiers[SIMD_MAX_BUILTIN_ARGS] ++ = { qualifier_poly, qualifier_none }; ++#define TYPES_REINTERP_PS (aarch64_types_unop_ps_qualifiers) ++static enum aarch64_type_qualifiers + aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_maybe_immediate }; + #define TYPES_BINOP (aarch64_types_binop_qualifiers) + static enum aarch64_type_qualifiers ++aarch64_types_binopv_qualifiers[SIMD_MAX_BUILTIN_ARGS] ++ = { qualifier_void, qualifier_none, qualifier_none }; ++#define TYPES_BINOPV (aarch64_types_binopv_qualifiers) ++static enum aarch64_type_qualifiers + aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned }; + #define TYPES_BINOPU (aarch64_types_binopu_qualifiers) + static enum aarch64_type_qualifiers ++aarch64_types_binop_uus_qualifiers[SIMD_MAX_BUILTIN_ARGS] ++ = { qualifier_unsigned, qualifier_unsigned, qualifier_none }; ++#define TYPES_BINOP_UUS (aarch64_types_binop_uus_qualifiers) ++static enum aarch64_type_qualifiers ++aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS] ++ = { qualifier_none, qualifier_none, qualifier_unsigned }; ++#define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers) ++static enum aarch64_type_qualifiers + aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_poly, qualifier_poly, qualifier_poly }; + #define TYPES_BINOPP (aarch64_types_binopp_qualifiers) +@@ -183,9 +211,14 @@ + #define TYPES_GETLANE (aarch64_types_getlane_qualifiers) + #define TYPES_SHIFTIMM (aarch64_types_getlane_qualifiers) + static enum aarch64_type_qualifiers ++aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS] ++ = { qualifier_unsigned, qualifier_none, qualifier_immediate }; ++#define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers) ++static enum aarch64_type_qualifiers + aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate }; + #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers) ++ + static enum aarch64_type_qualifiers + aarch64_types_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate }; +@@ -194,6 +227,13 @@ + #define TYPES_SHIFTACC (aarch64_types_setlane_qualifiers) + + static enum aarch64_type_qualifiers ++aarch64_types_unsigned_shiftacc_qualifiers[SIMD_MAX_BUILTIN_ARGS] ++ = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, ++ qualifier_immediate }; ++#define TYPES_USHIFTACC (aarch64_types_unsigned_shiftacc_qualifiers) ++ ++ ++static enum aarch64_type_qualifiers + aarch64_types_combine_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_none }; + #define TYPES_COMBINE (aarch64_types_combine_qualifiers) +@@ -230,6 +270,11 @@ + = { qualifier_void, qualifier_pointer_map_mode, qualifier_none }; + #define TYPES_STORE1 (aarch64_types_store1_qualifiers) + #define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers) ++static enum aarch64_type_qualifiers ++aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] ++ = { qualifier_void, qualifier_pointer_map_mode, ++ qualifier_none, qualifier_none }; ++#define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers) + + #define CF0(N, X) CODE_FOR_aarch64_##N##X + #define CF1(N, X) CODE_FOR_##N##X##1 +@@ -311,6 +356,8 @@ + VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di) + #define BUILTIN_VDQF(T, N, MAP) \ + VAR3 (T, N, MAP, v2sf, v4sf, v2df) ++#define BUILTIN_VDQF_DF(T, N, MAP) \ ++ VAR4 (T, N, MAP, v2sf, v4sf, v2df, df) + #define BUILTIN_VDQH(T, N, MAP) \ + VAR2 (T, N, MAP, v4hi, v8hi) + #define BUILTIN_VDQHS(T, N, MAP) \ +@@ -364,6 +411,28 @@ + #include "aarch64-simd-builtins.def" + }; + ++/* There's only 8 CRC32 builtins. Probably not worth their own .def file. */ ++#define AARCH64_CRC32_BUILTINS \ ++ CRC32_BUILTIN (crc32b, QI) \ ++ CRC32_BUILTIN (crc32h, HI) \ ++ CRC32_BUILTIN (crc32w, SI) \ ++ CRC32_BUILTIN (crc32x, DI) \ ++ CRC32_BUILTIN (crc32cb, QI) \ ++ CRC32_BUILTIN (crc32ch, HI) \ ++ CRC32_BUILTIN (crc32cw, SI) \ ++ CRC32_BUILTIN (crc32cx, DI) ++ ++typedef struct ++{ ++ const char *name; ++ enum machine_mode mode; ++ const enum insn_code icode; ++ unsigned int fcode; ++} aarch64_crc_builtin_datum; ++ ++#define CRC32_BUILTIN(N, M) \ ++ AARCH64_BUILTIN_##N, ++ + #undef VAR1 + #define VAR1(T, N, MAP, A) \ + AARCH64_SIMD_BUILTIN_##T##_##N##A, +@@ -371,13 +440,32 @@ + enum aarch64_builtins + { + AARCH64_BUILTIN_MIN, ++ ++ AARCH64_BUILTIN_GET_FPCR, ++ AARCH64_BUILTIN_SET_FPCR, ++ AARCH64_BUILTIN_GET_FPSR, ++ AARCH64_BUILTIN_SET_FPSR, ++ + AARCH64_SIMD_BUILTIN_BASE, + #include "aarch64-simd-builtins.def" + AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_BUILTIN_BASE + + ARRAY_SIZE (aarch64_simd_builtin_data), ++ AARCH64_CRC32_BUILTIN_BASE, ++ AARCH64_CRC32_BUILTINS ++ AARCH64_CRC32_BUILTIN_MAX, + AARCH64_BUILTIN_MAX + }; + ++#undef CRC32_BUILTIN ++#define CRC32_BUILTIN(N, M) \ ++ {"__builtin_aarch64_"#N, M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N}, ++ ++static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = { ++ AARCH64_CRC32_BUILTINS ++}; ++ ++#undef CRC32_BUILTIN ++ + static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX]; + + #define NUM_DREG_TYPES 6 +@@ -749,11 +837,49 @@ + } + } + ++static void ++aarch64_init_crc32_builtins () ++{ ++ tree usi_type = aarch64_build_unsigned_type (SImode); ++ unsigned int i = 0; ++ ++ for (i = 0; i < ARRAY_SIZE (aarch64_crc_builtin_data); ++i) ++ { ++ aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i]; ++ tree argtype = aarch64_build_unsigned_type (d->mode); ++ tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE); ++ tree fndecl = add_builtin_function (d->name, ftype, d->fcode, ++ BUILT_IN_MD, NULL, NULL_TREE); ++ ++ aarch64_builtin_decls[d->fcode] = fndecl; ++ } ++} ++ + void + aarch64_init_builtins (void) + { ++ tree ftype_set_fpr ++ = build_function_type_list (void_type_node, unsigned_type_node, NULL); ++ tree ftype_get_fpr ++ = build_function_type_list (unsigned_type_node, NULL); ++ ++ aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR] ++ = add_builtin_function ("__builtin_aarch64_get_fpcr", ftype_get_fpr, ++ AARCH64_BUILTIN_GET_FPCR, BUILT_IN_MD, NULL, NULL_TREE); ++ aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR] ++ = add_builtin_function ("__builtin_aarch64_set_fpcr", ftype_set_fpr, ++ AARCH64_BUILTIN_SET_FPCR, BUILT_IN_MD, NULL, NULL_TREE); ++ aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR] ++ = add_builtin_function ("__builtin_aarch64_get_fpsr", ftype_get_fpr, ++ AARCH64_BUILTIN_GET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); ++ aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR] ++ = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr, ++ AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); ++ + if (TARGET_SIMD) + aarch64_init_simd_builtins (); ++ if (TARGET_CRC32) ++ aarch64_init_crc32_builtins (); + } + + tree +@@ -953,6 +1079,41 @@ + SIMD_ARG_STOP); + } + ++rtx ++aarch64_crc32_expand_builtin (int fcode, tree exp, rtx target) ++{ ++ rtx pat; ++ aarch64_crc_builtin_datum *d ++ = &aarch64_crc_builtin_data[fcode - (AARCH64_CRC32_BUILTIN_BASE + 1)]; ++ enum insn_code icode = d->icode; ++ tree arg0 = CALL_EXPR_ARG (exp, 0); ++ tree arg1 = CALL_EXPR_ARG (exp, 1); ++ rtx op0 = expand_normal (arg0); ++ rtx op1 = expand_normal (arg1); ++ enum machine_mode tmode = insn_data[icode].operand[0].mode; ++ enum machine_mode mode0 = insn_data[icode].operand[1].mode; ++ enum machine_mode mode1 = insn_data[icode].operand[2].mode; ++ ++ if (! target ++ || GET_MODE (target) != tmode ++ || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) ++ target = gen_reg_rtx (tmode); ++ ++ gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) ++ && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)); ++ ++ if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) ++ op0 = copy_to_mode_reg (mode0, op0); ++ if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) ++ op1 = copy_to_mode_reg (mode1, op1); ++ ++ pat = GEN_FCN (icode) (target, op0, op1); ++ if (! pat) ++ return 0; ++ emit_insn (pat); ++ return target; ++} ++ + /* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient. */ + rtx +@@ -964,9 +1125,41 @@ + { + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + int fcode = DECL_FUNCTION_CODE (fndecl); ++ int icode; ++ rtx pat, op0; ++ tree arg0; + +- if (fcode >= AARCH64_SIMD_BUILTIN_BASE) ++ switch (fcode) ++ { ++ case AARCH64_BUILTIN_GET_FPCR: ++ case AARCH64_BUILTIN_SET_FPCR: ++ case AARCH64_BUILTIN_GET_FPSR: ++ case AARCH64_BUILTIN_SET_FPSR: ++ if ((fcode == AARCH64_BUILTIN_GET_FPCR) ++ || (fcode == AARCH64_BUILTIN_GET_FPSR)) ++ { ++ icode = (fcode == AARCH64_BUILTIN_GET_FPSR) ? ++ CODE_FOR_get_fpsr : CODE_FOR_get_fpcr; ++ target = gen_reg_rtx (SImode); ++ pat = GEN_FCN (icode) (target); ++ } ++ else ++ { ++ target = NULL_RTX; ++ icode = (fcode == AARCH64_BUILTIN_SET_FPSR) ? ++ CODE_FOR_set_fpsr : CODE_FOR_set_fpcr; ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ op0 = expand_normal (arg0); ++ pat = GEN_FCN (icode) (op0); ++ } ++ emit_insn (pat); ++ return target; ++ } ++ ++ if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX) + return aarch64_simd_expand_builtin (fcode, exp, target); ++ else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= AARCH64_CRC32_BUILTIN_MAX) ++ return aarch64_crc32_expand_builtin (fcode, exp, target); + + return NULL_RTX; + } +@@ -1086,7 +1279,29 @@ + + return aarch64_builtin_decls[builtin]; + } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vzip2q_s16 (int16x8_t a, int16x8_t b) --{ -- int16x8_t result; -- __asm__ ("zip2 %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} ++ case BUILT_IN_BSWAP16: ++#undef AARCH64_CHECK_BUILTIN_MODE ++#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ ++ (out_mode == N##Imode && out_n == C \ ++ && in_mode == N##Imode && in_n == C) ++ if (AARCH64_CHECK_BUILTIN_MODE (4, H)) ++ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4hi]; ++ else if (AARCH64_CHECK_BUILTIN_MODE (8, H)) ++ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv8hi]; ++ else ++ return NULL_TREE; ++ case BUILT_IN_BSWAP32: ++ if (AARCH64_CHECK_BUILTIN_MODE (2, S)) ++ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2si]; ++ else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) ++ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4si]; ++ else ++ return NULL_TREE; ++ case BUILT_IN_BSWAP64: ++ if (AARCH64_CHECK_BUILTIN_MODE (2, D)) ++ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2di]; ++ else ++ return NULL_TREE; + default: + return NULL_TREE; + } +@@ -1127,6 +1342,25 @@ + return fold_build2 (NE_EXPR, type, and_node, vec_zero_node); + break; + } ++ VAR1 (REINTERP_SS, reinterpretdi, 0, df) ++ VAR1 (REINTERP_SS, reinterpretv8qi, 0, df) ++ VAR1 (REINTERP_SS, reinterpretv4hi, 0, df) ++ VAR1 (REINTERP_SS, reinterpretv2si, 0, df) ++ VAR1 (REINTERP_SS, reinterpretv2sf, 0, df) ++ BUILTIN_VD (REINTERP_SS, reinterpretdf, 0) ++ BUILTIN_VD (REINTERP_SU, reinterpretdf, 0) ++ VAR1 (REINTERP_US, reinterpretdi, 0, df) ++ VAR1 (REINTERP_US, reinterpretv8qi, 0, df) ++ VAR1 (REINTERP_US, reinterpretv4hi, 0, df) ++ VAR1 (REINTERP_US, reinterpretv2si, 0, df) ++ VAR1 (REINTERP_US, reinterpretv2sf, 0, df) ++ BUILTIN_VD (REINTERP_SP, reinterpretdf, 0) ++ VAR1 (REINTERP_PS, reinterpretdi, 0, df) ++ VAR1 (REINTERP_PS, reinterpretv8qi, 0, df) ++ VAR1 (REINTERP_PS, reinterpretv4hi, 0, df) ++ VAR1 (REINTERP_PS, reinterpretv2si, 0, df) ++ VAR1 (REINTERP_PS, reinterpretv2sf, 0, df) ++ return fold_build1 (VIEW_CONVERT_EXPR, type, args[0]); + VAR1 (UNOP, floatv2si, 2, v2sf) + VAR1 (UNOP, floatv4si, 2, v4sf) + VAR1 (UNOP, floatv2di, 2, v2df) +@@ -1196,6 +1430,106 @@ + return changed; + } + ++void ++aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) ++{ ++ const unsigned AARCH64_FE_INVALID = 1; ++ const unsigned AARCH64_FE_DIVBYZERO = 2; ++ const unsigned AARCH64_FE_OVERFLOW = 4; ++ const unsigned AARCH64_FE_UNDERFLOW = 8; ++ const unsigned AARCH64_FE_INEXACT = 16; ++ const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID ++ | AARCH64_FE_DIVBYZERO ++ | AARCH64_FE_OVERFLOW ++ | AARCH64_FE_UNDERFLOW ++ | AARCH64_FE_INEXACT); ++ const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8; ++ tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr; ++ tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr; ++ tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr; ++ tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv; ++ ++ /* Generate the equivalence of : ++ unsigned int fenv_cr; ++ fenv_cr = __builtin_aarch64_get_fpcr (); ++ ++ unsigned int fenv_sr; ++ fenv_sr = __builtin_aarch64_get_fpsr (); ++ ++ Now set all exceptions to non-stop ++ unsigned int mask_cr ++ = ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT); ++ unsigned int masked_cr; ++ masked_cr = fenv_cr & mask_cr; ++ ++ And clear all exception flags ++ unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT; ++ unsigned int masked_cr; ++ masked_sr = fenv_sr & mask_sr; ++ ++ __builtin_aarch64_set_cr (masked_cr); ++ __builtin_aarch64_set_sr (masked_sr); */ ++ ++ fenv_cr = create_tmp_var (unsigned_type_node, NULL); ++ fenv_sr = create_tmp_var (unsigned_type_node, NULL); ++ ++ get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]; ++ set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]; ++ get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]; ++ set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]; ++ ++ mask_cr = build_int_cst (unsigned_type_node, ++ ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT)); ++ mask_sr = build_int_cst (unsigned_type_node, ++ ~(AARCH64_FE_ALL_EXCEPT)); ++ ++ ld_fenv_cr = build2 (MODIFY_EXPR, unsigned_type_node, ++ fenv_cr, build_call_expr (get_fpcr, 0)); ++ ld_fenv_sr = build2 (MODIFY_EXPR, unsigned_type_node, ++ fenv_sr, build_call_expr (get_fpsr, 0)); ++ ++ masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr); ++ masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr); ++ ++ hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr); ++ hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr); ++ ++ hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr, ++ hold_fnclex_sr); ++ masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr, ++ masked_fenv_sr); ++ ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr); ++ ++ *hold = build2 (COMPOUND_EXPR, void_type_node, ++ build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv), ++ hold_fnclex); ++ ++ /* Store the value of masked_fenv to clear the exceptions: ++ __builtin_aarch64_set_fpsr (masked_fenv_sr); */ ++ ++ *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr); ++ ++ /* Generate the equivalent of : ++ unsigned int new_fenv_var; ++ new_fenv_var = __builtin_aarch64_get_fpsr (); ++ ++ __builtin_aarch64_set_fpsr (fenv_sr); ++ ++ __atomic_feraiseexcept (new_fenv_var); */ ++ ++ new_fenv_var = create_tmp_var (unsigned_type_node, NULL); ++ reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, ++ new_fenv_var, build_call_expr (get_fpsr, 0)); ++ restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr); ++ atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); ++ update_call = build_call_expr (atomic_feraiseexcept, 1, ++ fold_convert (integer_type_node, new_fenv_var)); ++ *update = build2 (COMPOUND_EXPR, void_type_node, ++ build2 (COMPOUND_EXPR, void_type_node, ++ reload_fenv, restore_fnenv), update_call); ++} ++ ++ + #undef AARCH64_CHECK_BUILTIN_MODE + #undef AARCH64_FIND_FRINT_VARIANT + #undef BUILTIN_DX +--- a/src/gcc/config/aarch64/aarch64-protos.h ++++ b/src/gcc/config/aarch64/aarch64-protos.h +@@ -108,9 +108,22 @@ + cost models and vectors for address cost calculations, register + move costs and memory move costs. */ + ++/* Scaled addressing modes can vary cost depending on the mode of the ++ value to be loaded/stored. QImode values cannot use scaled ++ addressing modes. */ ++ ++struct scale_addr_mode_cost ++{ ++ const int hi; ++ const int si; ++ const int di; ++ const int ti; ++}; ++ + /* Additional cost for addresses. */ + struct cpu_addrcost_table + { ++ const struct scale_addr_mode_cost addr_scale_costs; + const int pre_modify; + const int post_modify; + const int register_offset; +@@ -167,6 +180,7 @@ + enum aarch64_symbol_type + aarch64_classify_symbolic_expression (rtx, enum aarch64_symbol_context); + bool aarch64_constant_address_p (rtx); ++bool aarch64_expand_movmem (rtx *); + bool aarch64_float_const_zero_rtx_p (rtx); + bool aarch64_function_arg_regno_p (unsigned); + bool aarch64_gen_movmemqi (rtx *); +@@ -175,6 +189,8 @@ + bool aarch64_is_long_call_p (rtx); + bool aarch64_label_mentioned_p (rtx); + bool aarch64_legitimate_pic_operand_p (rtx); ++bool aarch64_modes_tieable_p (enum machine_mode mode1, ++ enum machine_mode mode2); + bool aarch64_move_imm (HOST_WIDE_INT, enum machine_mode); + bool aarch64_mov_operand_p (rtx, enum aarch64_symbol_context, + enum machine_mode); +@@ -200,6 +216,8 @@ + enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx); + enum reg_class aarch64_regno_regclass (unsigned); + int aarch64_asm_preferred_eh_data_format (int, int); ++enum machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned, ++ enum machine_mode); + int aarch64_hard_regno_mode_ok (unsigned, enum machine_mode); + int aarch64_hard_regno_nregs (unsigned, enum machine_mode); + int aarch64_simd_attr_length_move (rtx); +@@ -289,4 +307,5 @@ + extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel); + extern bool + aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel); ++void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *); + #endif /* GCC_AARCH64_PROTOS_H */ +--- a/src/gcc/config/aarch64/aarch64-simd-builtins.def ++++ b/src/gcc/config/aarch64/aarch64-simd-builtins.def +@@ -51,32 +51,43 @@ + VAR1 (GETLANE, get_lane, 0, di) + BUILTIN_VALL (GETLANE, be_checked_get_lane, 0) + +- BUILTIN_VD_RE (REINTERP, reinterpretdi, 0) +- BUILTIN_VDC (REINTERP, reinterpretv8qi, 0) +- BUILTIN_VDC (REINTERP, reinterpretv4hi, 0) +- BUILTIN_VDC (REINTERP, reinterpretv2si, 0) +- BUILTIN_VDC (REINTERP, reinterpretv2sf, 0) +- BUILTIN_VQ (REINTERP, reinterpretv16qi, 0) +- BUILTIN_VQ (REINTERP, reinterpretv8hi, 0) +- BUILTIN_VQ (REINTERP, reinterpretv4si, 0) +- BUILTIN_VQ (REINTERP, reinterpretv4sf, 0) +- BUILTIN_VQ (REINTERP, reinterpretv2di, 0) +- BUILTIN_VQ (REINTERP, reinterpretv2df, 0) ++ VAR1 (REINTERP_SS, reinterpretdi, 0, df) ++ VAR1 (REINTERP_SS, reinterpretv8qi, 0, df) ++ VAR1 (REINTERP_SS, reinterpretv4hi, 0, df) ++ VAR1 (REINTERP_SS, reinterpretv2si, 0, df) ++ VAR1 (REINTERP_SS, reinterpretv2sf, 0, df) ++ BUILTIN_VD (REINTERP_SS, reinterpretdf, 0) + ++ BUILTIN_VD (REINTERP_SU, reinterpretdf, 0) ++ ++ VAR1 (REINTERP_US, reinterpretdi, 0, df) ++ VAR1 (REINTERP_US, reinterpretv8qi, 0, df) ++ VAR1 (REINTERP_US, reinterpretv4hi, 0, df) ++ VAR1 (REINTERP_US, reinterpretv2si, 0, df) ++ VAR1 (REINTERP_US, reinterpretv2sf, 0, df) ++ ++ BUILTIN_VD (REINTERP_SP, reinterpretdf, 0) ++ ++ VAR1 (REINTERP_PS, reinterpretdi, 0, df) ++ VAR1 (REINTERP_PS, reinterpretv8qi, 0, df) ++ VAR1 (REINTERP_PS, reinterpretv4hi, 0, df) ++ VAR1 (REINTERP_PS, reinterpretv2si, 0, df) ++ VAR1 (REINTERP_PS, reinterpretv2sf, 0, df) ++ + BUILTIN_VDQ_I (BINOP, dup_lane, 0) + /* Implemented by aarch64_qshl. */ + BUILTIN_VSDQ_I (BINOP, sqshl, 0) +- BUILTIN_VSDQ_I (BINOP, uqshl, 0) ++ BUILTIN_VSDQ_I (BINOP_UUS, uqshl, 0) + BUILTIN_VSDQ_I (BINOP, sqrshl, 0) +- BUILTIN_VSDQ_I (BINOP, uqrshl, 0) ++ BUILTIN_VSDQ_I (BINOP_UUS, uqrshl, 0) + /* Implemented by aarch64_. */ + BUILTIN_VSDQ_I (BINOP, sqadd, 0) +- BUILTIN_VSDQ_I (BINOP, uqadd, 0) ++ BUILTIN_VSDQ_I (BINOPU, uqadd, 0) + BUILTIN_VSDQ_I (BINOP, sqsub, 0) +- BUILTIN_VSDQ_I (BINOP, uqsub, 0) ++ BUILTIN_VSDQ_I (BINOPU, uqsub, 0) + /* Implemented by aarch64_qadd. */ +- BUILTIN_VSDQ_I (BINOP, suqadd, 0) +- BUILTIN_VSDQ_I (BINOP, usqadd, 0) ++ BUILTIN_VSDQ_I (BINOP_SSU, suqadd, 0) ++ BUILTIN_VSDQ_I (BINOP_UUS, usqadd, 0) + + /* Implemented by aarch64_get_dreg. */ + BUILTIN_VDC (GETLANE, get_dregoi, 0) +@@ -107,6 +118,10 @@ + BUILTIN_VQ (STORESTRUCT, st3, 0) + BUILTIN_VQ (STORESTRUCT, st4, 0) + ++ BUILTIN_VQ (STORESTRUCT_LANE, st2_lane, 0) ++ BUILTIN_VQ (STORESTRUCT_LANE, st3_lane, 0) ++ BUILTIN_VQ (STORESTRUCT_LANE, st4_lane, 0) ++ + BUILTIN_VQW (BINOP, saddl2, 0) + BUILTIN_VQW (BINOP, uaddl2, 0) + BUILTIN_VQW (BINOP, ssubl2, 0) +@@ -142,8 +157,8 @@ + BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0) + BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0) + /* Implemented by aarch64_s. */ +- BUILTIN_VSDQ_I_BHSI (UNOP, sqabs, 0) +- BUILTIN_VSDQ_I_BHSI (UNOP, sqneg, 0) ++ BUILTIN_VSDQ_I (UNOP, sqabs, 0) ++ BUILTIN_VSDQ_I (UNOP, sqneg, 0) + + BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane, 0) + BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane, 0) +@@ -186,9 +201,9 @@ + BUILTIN_VSDQ_I_DI (BINOP, ashl, 3) + /* Implemented by aarch64_shl. */ + BUILTIN_VSDQ_I_DI (BINOP, sshl, 0) +- BUILTIN_VSDQ_I_DI (BINOP, ushl, 0) ++ BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0) + BUILTIN_VSDQ_I_DI (BINOP, srshl, 0) +- BUILTIN_VSDQ_I_DI (BINOP, urshl, 0) ++ BUILTIN_VSDQ_I_DI (BINOP_UUS, urshl, 0) + + BUILTIN_VDQ_I (SHIFTIMM, ashr, 3) + VAR1 (SHIFTIMM, ashr_simd, 0, di) +@@ -196,15 +211,15 @@ + VAR1 (USHIFTIMM, lshr_simd, 0, di) + /* Implemented by aarch64_shr_n. */ + BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0) +- BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n, 0) ++ BUILTIN_VSDQ_I_DI (USHIFTIMM, urshr_n, 0) + /* Implemented by aarch64_sra_n. */ + BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n, 0) +- BUILTIN_VSDQ_I_DI (SHIFTACC, usra_n, 0) ++ BUILTIN_VSDQ_I_DI (USHIFTACC, usra_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n, 0) +- BUILTIN_VSDQ_I_DI (SHIFTACC, ursra_n, 0) ++ BUILTIN_VSDQ_I_DI (USHIFTACC, ursra_n, 0) + /* Implemented by aarch64_shll_n. */ + BUILTIN_VDW (SHIFTIMM, sshll_n, 0) +- BUILTIN_VDW (SHIFTIMM, ushll_n, 0) ++ BUILTIN_VDW (USHIFTIMM, ushll_n, 0) + /* Implemented by aarch64_shll2_n. */ + BUILTIN_VQW (SHIFTIMM, sshll2_n, 0) + BUILTIN_VQW (SHIFTIMM, ushll2_n, 0) +@@ -212,18 +227,18 @@ + BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0) +- BUILTIN_VSQN_HSDI (SHIFTIMM, uqshrn_n, 0) ++ BUILTIN_VSQN_HSDI (USHIFTIMM, uqshrn_n, 0) + BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0) +- BUILTIN_VSQN_HSDI (SHIFTIMM, uqrshrn_n, 0) ++ BUILTIN_VSQN_HSDI (USHIFTIMM, uqrshrn_n, 0) + /* Implemented by aarch64_si_n. */ + BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0) +- BUILTIN_VSDQ_I_DI (SHIFTINSERT, usri_n, 0) ++ BUILTIN_VSDQ_I_DI (USHIFTACC, usri_n, 0) + BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0) +- BUILTIN_VSDQ_I_DI (SHIFTINSERT, usli_n, 0) ++ BUILTIN_VSDQ_I_DI (USHIFTACC, usli_n, 0) + /* Implemented by aarch64_qshl_n. */ +- BUILTIN_VSDQ_I (SHIFTIMM, sqshlu_n, 0) ++ BUILTIN_VSDQ_I (SHIFTIMM_USS, sqshlu_n, 0) + BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0) +- BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n, 0) ++ BUILTIN_VSDQ_I (USHIFTIMM, uqshl_n, 0) + + /* Implemented by aarch64_cm. */ + BUILTIN_VALLDI (BINOP, cmeq, 0) +@@ -265,7 +280,7 @@ + BUILTIN_VDQF (UNOP, nearbyint, 2) + BUILTIN_VDQF (UNOP, rint, 2) + BUILTIN_VDQF (UNOP, round, 2) +- BUILTIN_VDQF (UNOP, frintn, 2) ++ BUILTIN_VDQF_DF (UNOP, frintn, 2) + + /* Implemented by l2. */ + VAR1 (UNOP, lbtruncv2sf, 2, v2si) +@@ -330,6 +345,8 @@ + VAR1 (UNOP, floatunsv4si, 2, v4sf) + VAR1 (UNOP, floatunsv2di, 2, v2df) + ++ VAR5 (UNOPU, bswap, 10, v4hi, v8hi, v2si, v4si, v2di) ++ + /* Implemented by + aarch64_. */ + BUILTIN_VALL (BINOP, zip1, 0) +@@ -393,3 +410,6 @@ + /* Implemented by aarch64_crypto_pmull. */ + VAR1 (BINOPP, crypto_pmull, 0, di) + VAR1 (BINOPP, crypto_pmull, 0, v2di) ++ ++ /* Meta-op to check lane bounds of immediate in aarch64_expand_builtin. */ ++ VAR1 (BINOPV, im_lane_bound, 0, si) +--- a/src/gcc/config/aarch64/constraints.md ++++ b/src/gcc/config/aarch64/constraints.md +@@ -21,6 +21,9 @@ + (define_register_constraint "k" "STACK_REG" + "@internal The stack register.") + ++(define_register_constraint "Ucs" "CALLER_SAVE_REGS" ++ "@internal The caller save registers.") ++ + (define_register_constraint "w" "FP_REGS" + "Floating point and SIMD vector registers.") + +@@ -92,6 +95,10 @@ + (and (match_code "const_int") + (match_test "(unsigned HOST_WIDE_INT) ival < 64"))) + ++(define_constraint "Usf" ++ "@internal Usf is a symbol reference." ++ (match_code "symbol_ref")) ++ + (define_constraint "UsM" + "@internal + A constraint that matches the immediate constant -1." +--- a/src/gcc/config/aarch64/aarch64.c ++++ b/src/gcc/config/aarch64/aarch64.c +@@ -63,6 +63,7 @@ + #include "cfgloop.h" + #include "tree-vectorizer.h" + #include "config/arm/aarch-cost-tables.h" ++#include "dumpfile.h" + + /* Defined for convenience. */ + #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT) +@@ -141,6 +142,7 @@ + + static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode, + const unsigned char *sel); ++static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool); + + /* The processor for which instructions should be scheduled. */ + enum aarch64_processor aarch64_tune = cortexa53; +@@ -171,6 +173,15 @@ + #endif + static const struct cpu_addrcost_table generic_addrcost_table = + { ++#if HAVE_DESIGNATED_INITIALIZERS ++ .addr_scale_costs = ++#endif ++ { ++ NAMED_PARAM (qi, 0), ++ NAMED_PARAM (hi, 0), ++ NAMED_PARAM (si, 0), ++ NAMED_PARAM (ti, 0), ++ }, + NAMED_PARAM (pre_modify, 0), + NAMED_PARAM (post_modify, 0), + NAMED_PARAM (register_offset, 0), +@@ -181,6 +192,27 @@ + #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 + __extension__ + #endif ++static const struct cpu_addrcost_table cortexa57_addrcost_table = ++{ ++#if HAVE_DESIGNATED_INITIALIZERS ++ .addr_scale_costs = ++#endif ++ { ++ NAMED_PARAM (qi, 0), ++ NAMED_PARAM (hi, 1), ++ NAMED_PARAM (si, 0), ++ NAMED_PARAM (ti, 1), ++ }, ++ NAMED_PARAM (pre_modify, 0), ++ NAMED_PARAM (post_modify, 0), ++ NAMED_PARAM (register_offset, 0), ++ NAMED_PARAM (register_extend, 0), ++ NAMED_PARAM (imm_offset, 0), ++}; ++ ++#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 ++__extension__ ++#endif + static const struct cpu_regmove_cost generic_regmove_cost = + { + NAMED_PARAM (GP2GP, 1), +@@ -212,9 +244,29 @@ + NAMED_PARAM (cond_not_taken_branch_cost, 1) + }; + ++/* Generic costs for vector insn classes. */ + #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 + __extension__ + #endif ++static const struct cpu_vector_cost cortexa57_vector_cost = ++{ ++ NAMED_PARAM (scalar_stmt_cost, 1), ++ NAMED_PARAM (scalar_load_cost, 4), ++ NAMED_PARAM (scalar_store_cost, 1), ++ NAMED_PARAM (vec_stmt_cost, 3), ++ NAMED_PARAM (vec_to_scalar_cost, 8), ++ NAMED_PARAM (scalar_to_vec_cost, 8), ++ NAMED_PARAM (vec_align_load_cost, 5), ++ NAMED_PARAM (vec_unalign_load_cost, 5), ++ NAMED_PARAM (vec_unalign_store_cost, 1), ++ NAMED_PARAM (vec_store_cost, 1), ++ NAMED_PARAM (cond_taken_branch_cost, 1), ++ NAMED_PARAM (cond_not_taken_branch_cost, 1) ++}; ++ ++#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 ++__extension__ ++#endif + static const struct tune_params generic_tunings = + { + &cortexa57_extra_costs, +@@ -238,9 +290,9 @@ + static const struct tune_params cortexa57_tunings = + { + &cortexa57_extra_costs, +- &generic_addrcost_table, ++ &cortexa57_addrcost_table, + &generic_regmove_cost, +- &generic_vector_cost, ++ &cortexa57_vector_cost, + NAMED_PARAM (memmov_cost, 4), + NAMED_PARAM (issue_rate, 3) + }; +@@ -424,6 +476,24 @@ + return 0; + } + ++/* Implement HARD_REGNO_CALLER_SAVE_MODE. */ ++enum machine_mode ++aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs, ++ enum machine_mode mode) ++{ ++ /* Handle modes that fit within single registers. */ ++ if (nregs == 1 && GET_MODE_SIZE (mode) <= 16) ++ { ++ if (GET_MODE_SIZE (mode) >= 4) ++ return mode; ++ else ++ return SImode; ++ } ++ /* Fall back to generic for multi-reg and very large modes. */ ++ else ++ return choose_hard_reg_mode (regno, nregs, false); ++} ++ + /* Return true if calls to DECL should be treated as + long-calls (ie called via a register). */ + static bool +@@ -444,7 +514,7 @@ + represent an expression that matches an extend operation. The + operands represent the paramters from + +- (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */ ++ (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */ + bool + aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm, + rtx extract_imm) +@@ -636,12 +706,24 @@ + + case SYMBOL_SMALL_TLSDESC: + { +- rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM); ++ enum machine_mode mode = GET_MODE (dest); ++ rtx x0 = gen_rtx_REG (mode, R0_REGNUM); + rtx tp; + +- emit_insn (gen_tlsdesc_small (imm)); ++ gcc_assert (mode == Pmode || mode == ptr_mode); ++ ++ /* In ILP32, the got entry is always of SImode size. Unlike ++ small GOT, the dest is fixed at reg 0. */ ++ if (TARGET_ILP32) ++ emit_insn (gen_tlsdesc_small_si (imm)); ++ else ++ emit_insn (gen_tlsdesc_small_di (imm)); + tp = aarch64_load_tp (NULL); +- emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0))); ++ ++ if (mode != Pmode) ++ tp = gen_lowpart (mode, tp); ++ ++ emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0))); + set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); + return; + } +@@ -648,10 +730,34 @@ + + case SYMBOL_SMALL_GOTTPREL: + { +- rtx tmp_reg = gen_reg_rtx (Pmode); ++ /* In ILP32, the mode of dest can be either SImode or DImode, ++ while the got entry is always of SImode size. The mode of ++ dest depends on how dest is used: if dest is assigned to a ++ pointer (e.g. in the memory), it has SImode; it may have ++ DImode if dest is dereferenced to access the memeory. ++ This is why we have to handle three different tlsie_small ++ patterns here (two patterns for ILP32). */ ++ enum machine_mode mode = GET_MODE (dest); ++ rtx tmp_reg = gen_reg_rtx (mode); + rtx tp = aarch64_load_tp (NULL); +- emit_insn (gen_tlsie_small (tmp_reg, imm)); +- emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg))); ++ ++ if (mode == ptr_mode) ++ { ++ if (mode == DImode) ++ emit_insn (gen_tlsie_small_di (tmp_reg, imm)); ++ else ++ { ++ emit_insn (gen_tlsie_small_si (tmp_reg, imm)); ++ tp = gen_lowpart (mode, tp); ++ } ++ } ++ else ++ { ++ gcc_assert (mode == Pmode); ++ emit_insn (gen_tlsie_small_sidi (tmp_reg, imm)); ++ } ++ ++ emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg))); + set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); + return; + } +@@ -1162,18 +1268,10 @@ + } + + static bool +-aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) ++aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, ++ tree exp ATTRIBUTE_UNUSED) + { +- /* Indirect calls are not currently supported. */ +- if (decl == NULL) +- return false; - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vzip2q_s32 (int32x4_t a, int32x4_t b) --{ -- int32x4_t result; -- __asm__ ("zip2 %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} +- /* Cannot tail-call to long-calls, since these are outside of the +- range of a branch instruction (we could handle this if we added +- support for indirect tail-calls. */ +- if (aarch64_decl_is_long_call_p (decl)) +- return false; - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vzip2q_s64 (int64x2_t a, int64x2_t b) --{ -- int64x2_t result; -- __asm__ ("zip2 %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} ++ /* Currently, always true. */ + return true; + } + +@@ -1716,8 +1814,6 @@ + if (reload_completed && cfun->machine->frame.laid_out) + return; + +- cfun->machine->frame.fp_lr_offset = 0; - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vzip2q_u8 (uint8x16_t a, uint8x16_t b) --{ -- uint8x16_t result; -- __asm__ ("zip2 %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} + /* First mark all the registers that really need to be saved... */ + for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) + cfun->machine->frame.reg_offset[regno] = -1; +@@ -1767,7 +1863,6 @@ + { + cfun->machine->frame.reg_offset[R29_REGNUM] = offset; + offset += UNITS_PER_WORD; +- cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD; + } + + if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1) +@@ -1774,7 +1869,6 @@ + { + cfun->machine->frame.reg_offset[R30_REGNUM] = offset; + offset += UNITS_PER_WORD; +- cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD; + } + + cfun->machine->frame.padding0 = +@@ -1819,7 +1913,6 @@ + rtx (*gen_mem_ref)(enum machine_mode, rtx) + = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM; + - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vzip2q_u16 (uint16x8_t a, uint16x8_t b) --{ -- uint16x8_t result; -- __asm__ ("zip2 %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} + for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) + { + if (aarch64_register_saved_on_entry (regno)) +@@ -1837,10 +1930,12 @@ + { + /* Empty loop. */ + } ++ + if (regno2 <= V31_REGNUM && + aarch64_register_saved_on_entry (regno2)) + { + rtx mem2; ++ + /* Next highest register to be saved. */ + mem2 = gen_mem_ref (DFmode, + plus_constant +@@ -1866,10 +1961,10 @@ + gen_rtx_REG (DFmode, regno2)); + } + +- /* The first part of a frame-related parallel insn +- is always assumed to be relevant to the frame +- calculations; subsequent parts, are only +- frame-related if explicitly marked. */ ++ /* The first part of a frame-related parallel insn is ++ always assumed to be relevant to the frame ++ calculations; subsequent parts, are only ++ frame-related if explicitly marked. */ + RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; + regno = regno2; + start_offset += increment * 2; +@@ -1882,7 +1977,7 @@ + { + insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem); + add_reg_note (insn, REG_CFA_RESTORE, +- gen_rtx_REG (DImode, regno)); ++ gen_rtx_REG (DFmode, regno)); + } + start_offset += increment; + } +@@ -1889,7 +1984,6 @@ + RTX_FRAME_RELATED_P (insn) = 1; + } + } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vzip2q_u32 (uint32x4_t a, uint32x4_t b) --{ -- uint32x4_t result; -- __asm__ ("zip2 %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} + } + + +@@ -1897,7 +1991,7 @@ + restore's have to happen. */ + static void + aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset, +- bool restore) ++ bool restore) + { + rtx insn; + rtx base_rtx = stack_pointer_rtx; +@@ -1929,6 +2023,7 @@ + aarch64_register_saved_on_entry (regno2)) + { + rtx mem2; ++ + /* Next highest register to be saved. */ + mem2 = gen_mem_ref (Pmode, + plus_constant +@@ -1952,12 +2047,11 @@ + add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2)); + } + +- /* The first part of a frame-related parallel insn +- is always assumed to be relevant to the frame +- calculations; subsequent parts, are only +- frame-related if explicitly marked. */ +- RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, +- 1)) = 1; ++ /* The first part of a frame-related parallel insn is ++ always assumed to be relevant to the frame ++ calculations; subsequent parts, are only ++ frame-related if explicitly marked. */ ++ RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; + regno = regno2; + start_offset += increment * 2; + } +@@ -1977,7 +2071,6 @@ + } + + aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx); - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vzip2q_u64 (uint64x2_t a, uint64x2_t b) --{ -- uint64x2_t result; -- __asm__ ("zip2 %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} + } + + /* AArch64 stack frames generated by this compiler look like: +@@ -1986,37 +2079,35 @@ + | | + | incoming stack arguments | + | | +- +-------------------------------+ <-- arg_pointer_rtx +- | | ++ +-------------------------------+ ++ | | <-- incoming stack pointer (aligned) + | callee-allocated save area | + | for register varargs | + | | +- +-------------------------------+ <-- frame_pointer_rtx ++ +-------------------------------+ ++ | local variables | <-- frame_pointer_rtx + | | +- | local variables | +- | | + +-------------------------------+ + | padding0 | \ + +-------------------------------+ | +- | | | +- | | | + | callee-saved registers | | frame.saved_regs_size +- | | | + +-------------------------------+ | + | LR' | | + +-------------------------------+ | +- | FP' | / +- P +-------------------------------+ <-- hard_frame_pointer_rtx ++ | FP' | / <- hard_frame_pointer_rtx (aligned) ++ +-------------------------------+ + | dynamic allocation | + +-------------------------------+ +- | | +- | outgoing stack arguments | +- | | +- +-------------------------------+ <-- stack_pointer_rtx ++ | padding | ++ +-------------------------------+ ++ | outgoing stack arguments | <-- arg_pointer ++ | | ++ +-------------------------------+ ++ | | <-- stack_pointer_rtx (aligned) + +- Dynamic stack allocations such as alloca insert data at point P. +- They decrease stack_pointer_rtx but leave frame_pointer_rtx and +- hard_frame_pointer_rtx unchanged. */ ++ Dynamic stack allocations via alloca() decrease stack_pointer_rtx ++ but leave frame_pointer_rtx and hard_frame_pointer_rtx ++ unchanged. */ + + /* Generate the prologue instructions for entry into a function. + Establish the stack frame by decreasing the stack pointer with a +@@ -2449,12 +2540,22 @@ + - 2 * UNITS_PER_WORD)); + } + +-/* Output code to build up a constant in a register. */ +-static void +-aarch64_build_constant (int regnum, HOST_WIDE_INT val) ++/* Possibly output code to build up a constant in a register. For ++ the benefit of the costs infrastructure, returns the number of ++ instructions which would be emitted. GENERATE inhibits or ++ enables code generation. */ ++ ++static int ++aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate) + { ++ int insns = 0; ++ + if (aarch64_bitmask_imm (val, DImode)) +- emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val)); ++ { ++ if (generate) ++ emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val)); ++ insns = 1; ++ } + else + { + int i; +@@ -2485,15 +2586,19 @@ + the same. */ + if (ncount < zcount) + { +- emit_move_insn (gen_rtx_REG (Pmode, regnum), +- GEN_INT (val | ~(HOST_WIDE_INT) 0xffff)); ++ if (generate) ++ emit_move_insn (gen_rtx_REG (Pmode, regnum), ++ GEN_INT (val | ~(HOST_WIDE_INT) 0xffff)); + tval = 0xffff; ++ insns++; + } + else + { +- emit_move_insn (gen_rtx_REG (Pmode, regnum), +- GEN_INT (val & 0xffff)); ++ if (generate) ++ emit_move_insn (gen_rtx_REG (Pmode, regnum), ++ GEN_INT (val & 0xffff)); + tval = 0; ++ insns++; + } + + val >>= 16; +@@ -2501,11 +2606,17 @@ + for (i = 16; i < 64; i += 16) + { + if ((val & 0xffff) != tval) +- emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum), +- GEN_INT (i), GEN_INT (val & 0xffff))); ++ { ++ if (generate) ++ emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum), ++ GEN_INT (i), ++ GEN_INT (val & 0xffff))); ++ insns++; ++ } + val >>= 16; + } + } ++ return insns; + } + + static void +@@ -2520,7 +2631,7 @@ + + if (mdelta >= 4096 * 4096) + { +- aarch64_build_constant (scratchreg, delta); ++ (void) aarch64_build_constant (scratchreg, delta, true); + emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx)); + } + else if (mdelta > 0) +@@ -2594,7 +2705,7 @@ + addr = plus_constant (Pmode, temp0, vcall_offset); + else + { +- aarch64_build_constant (IP1_REGNUM, vcall_offset); ++ (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true); + addr = gen_rtx_PLUS (Pmode, temp0, temp1); + } + +@@ -3046,11 +3157,11 @@ + enum rtx_code code = GET_CODE (x); + rtx op0, op1; + bool allow_reg_index_p = +- outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16; - - /* End of temporary inline asm implementations. */ ++ outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16 ++ || aarch64_vector_mode_supported_p (mode)); + /* Don't support anything other than POST_INC or REG addressing for + AdvSIMD. */ +- if (aarch64_vector_mode_p (mode) ++ if (aarch64_vect_struct_mode_p (mode) + && (code != POST_INC && code != REG)) + return false; + +@@ -3839,34 +3950,34 @@ + if (addr.offset == const0_rtx) + asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]); + else +- asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)], ++ asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)], + INTVAL (addr.offset)); + return; + + case ADDRESS_REG_REG: + if (addr.shift == 0) +- asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)], ++ asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)], + reg_names [REGNO (addr.offset)]); + else +- asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)], ++ asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)], + reg_names [REGNO (addr.offset)], addr.shift); + return; + + case ADDRESS_REG_UXTW: + if (addr.shift == 0) +- asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)], ++ asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)], + REGNO (addr.offset) - R0_REGNUM); + else +- asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)], ++ asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)], + REGNO (addr.offset) - R0_REGNUM, addr.shift); + return; + + case ADDRESS_REG_SXTW: + if (addr.shift == 0) +- asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)], ++ asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)], + REGNO (addr.offset) - R0_REGNUM); + else +- asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)], ++ asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)], + REGNO (addr.offset) - R0_REGNUM, addr.shift); + return; + +@@ -3874,27 +3985,27 @@ + switch (GET_CODE (x)) + { + case PRE_INC: +- asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)], ++ asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)], + GET_MODE_SIZE (aarch64_memory_reference_mode)); + return; + case POST_INC: +- asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)], ++ asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)], + GET_MODE_SIZE (aarch64_memory_reference_mode)); + return; + case PRE_DEC: +- asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)], ++ asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)], + GET_MODE_SIZE (aarch64_memory_reference_mode)); + return; + case POST_DEC: +- asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)], ++ asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)], + GET_MODE_SIZE (aarch64_memory_reference_mode)); + return; + case PRE_MODIFY: +- asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)], ++ asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)], + INTVAL (addr.offset)); + return; + case POST_MODIFY: +- asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)], ++ asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)], + INTVAL (addr.offset)); + return; + default: +@@ -3903,7 +4014,7 @@ + break; - /* Start of temporary inline asm for vldn, vstn and friends. */ -@@ -14205,132 +13990,225 @@ - __LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q) - __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q) + case ADDRESS_LO_SUM: +- asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]); ++ asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]); + output_addr_const (f, addr.offset); + asm_fprintf (f, "]"); + return; +@@ -3980,8 +4091,8 @@ + { + rtx x = *x_p; --#define __ST2_LANE_FUNC(intype, ptrtype, regsuffix, \ -- lnsuffix, funcsuffix, Q) \ -- typedef struct { ptrtype __x[2]; } __ST2_LANE_STRUCTURE_##intype; \ -- __extension__ static __inline void \ -- __attribute__ ((__always_inline__)) \ -- vst2 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \ -- intype b, const int c) \ -- { \ -- __ST2_LANE_STRUCTURE_##intype *__p = \ -- (__ST2_LANE_STRUCTURE_##intype *)ptr; \ -- __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \ -- "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t" \ -- : "=Q"(*__p) \ -- : "Q"(b), "i"(c) \ -- : "v16", "v17"); \ -- } -+#define __ST2_LANE_FUNC(intype, largetype, ptrtype, \ -+ mode, ptr_mode, funcsuffix, signedtype) \ -+__extension__ static __inline void \ -+__attribute__ ((__always_inline__)) \ -+vst2_lane_ ## funcsuffix (ptrtype *__ptr, \ -+ intype __b, const int __c) \ -+{ \ -+ __builtin_aarch64_simd_oi __o; \ -+ largetype __temp; \ -+ __temp.val[0] \ -+ = vcombine_##funcsuffix (__b.val[0], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __temp.val[1] \ -+ = vcombine_##funcsuffix (__b.val[1], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __o = __builtin_aarch64_set_qregoi##mode (__o, \ -+ (signedtype) __temp.val[0], 0); \ -+ __o = __builtin_aarch64_set_qregoi##mode (__o, \ -+ (signedtype) __temp.val[1], 1); \ -+ __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -+ __ptr, __o, __c); \ -+} +- /* Do not allow mem (plus (reg, const)) if vector mode. */ +- if (aarch64_vector_mode_p (mode) ++ /* Do not allow mem (plus (reg, const)) if vector struct mode. */ ++ if (aarch64_vect_struct_mode_p (mode) + && GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) + && CONST_INT_P (XEXP (x, 1))) +@@ -4150,32 +4261,31 @@ + + crtl->outgoing_args_size + + cfun->machine->saved_varargs_size); --__ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,) --__ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,) --__ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,) --__ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,) --__ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,) --__ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,) --__ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,) --__ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,) --__ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,) --__ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,) --__ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,) --__ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,) --__ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q) --__ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q) --__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q) --__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q) --__ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q) --__ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q) --__ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q) --__ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q) --__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q) --__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q) --__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q) --__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q) -+__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v4sf, sf, f32, -+ float32x4_t) -+__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, v2df, df, f64, -+ float64x2_t) -+__ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v16qi, qi, p8, int8x16_t) -+__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v8hi, hi, p16, -+ int16x8_t) -+__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v16qi, qi, s8, int8x16_t) -+__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v8hi, hi, s16, int16x8_t) -+__ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v4si, si, s32, int32x4_t) -+__ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, v2di, di, s64, int64x2_t) -+__ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v16qi, qi, u8, int8x16_t) -+__ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v8hi, hi, u16, -+ int16x8_t) -+__ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v4si, si, u32, -+ int32x4_t) -+__ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, v2di, di, u64, -+ int64x2_t) +- frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT); +- offset = frame_size; ++ frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT); ++ offset = frame_size; --#define __ST3_LANE_FUNC(intype, ptrtype, regsuffix, \ -- lnsuffix, funcsuffix, Q) \ -- typedef struct { ptrtype __x[3]; } __ST3_LANE_STRUCTURE_##intype; \ -- __extension__ static __inline void \ -- __attribute__ ((__always_inline__)) \ -- vst3 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \ -- intype b, const int c) \ -- { \ -- __ST3_LANE_STRUCTURE_##intype *__p = \ -- (__ST3_LANE_STRUCTURE_##intype *)ptr; \ -- __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \ -- "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t" \ -- : "=Q"(*__p) \ -- : "Q"(b), "i"(c) \ -- : "v16", "v17", "v18"); \ -- } -+#undef __ST2_LANE_FUNC -+#define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ -+__extension__ static __inline void \ -+__attribute__ ((__always_inline__)) \ -+vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \ -+ intype __b, const int __c) \ -+{ \ -+ union { intype __i; \ -+ __builtin_aarch64_simd_oi __o; } __temp = { __b }; \ -+ __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -+ __ptr, __temp.__o, __c); \ -+} +- if (to == HARD_FRAME_POINTER_REGNUM) +- { +- if (from == ARG_POINTER_REGNUM) +- return offset - crtl->outgoing_args_size; ++ if (to == HARD_FRAME_POINTER_REGNUM) ++ { ++ if (from == ARG_POINTER_REGNUM) ++ return offset - crtl->outgoing_args_size; --__ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,) --__ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,) --__ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,) --__ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,) --__ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,) --__ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,) --__ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,) --__ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,) --__ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,) --__ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,) --__ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,) --__ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,) --__ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q) --__ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q) --__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q) --__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q) --__ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q) --__ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q) --__ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q) --__ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q) --__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q) --__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q) --__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q) --__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q) -+__ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32) -+__ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64) -+__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8) -+__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16) -+__ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8) -+__ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16) -+__ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32) -+__ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64) -+__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8) -+__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16) -+__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32) -+__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64) +- if (from == FRAME_POINTER_REGNUM) +- return cfun->machine->frame.saved_regs_size + get_frame_size (); +- } ++ if (from == FRAME_POINTER_REGNUM) ++ return cfun->machine->frame.saved_regs_size + get_frame_size (); ++ } + +- if (to == STACK_POINTER_REGNUM) +- { +- if (from == FRAME_POINTER_REGNUM) +- { +- HOST_WIDE_INT elim = crtl->outgoing_args_size +- + cfun->machine->frame.saved_regs_size +- + get_frame_size () +- - cfun->machine->frame.fp_lr_offset; +- elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT); +- return elim; +- } +- } ++ if (to == STACK_POINTER_REGNUM) ++ { ++ if (from == FRAME_POINTER_REGNUM) ++ { ++ HOST_WIDE_INT elim = crtl->outgoing_args_size ++ + cfun->machine->frame.saved_regs_size ++ + get_frame_size (); ++ elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT); ++ return elim; ++ } ++ } + +- return offset; ++ return offset; + } + + +@@ -4242,6 +4352,7 @@ + { + switch (regclass) + { ++ case CALLER_SAVE_REGS: + case CORE_REGS: + case POINTER_REGS: + case GENERAL_REGS: +@@ -4443,9 +4554,13 @@ + { + rtx op = x; + ++ /* We accept both ROTATERT and ROTATE: since the RHS must be a constant ++ we can convert both to ROR during final output. */ + if ((GET_CODE (op) == ASHIFT + || GET_CODE (op) == ASHIFTRT +- || GET_CODE (op) == LSHIFTRT) ++ || GET_CODE (op) == LSHIFTRT ++ || GET_CODE (op) == ROTATERT ++ || GET_CODE (op) == ROTATE) + && CONST_INT_P (XEXP (op, 1))) + return XEXP (op, 0); --#define __ST4_LANE_FUNC(intype, ptrtype, regsuffix, \ -- lnsuffix, funcsuffix, Q) \ -- typedef struct { ptrtype __x[4]; } __ST4_LANE_STRUCTURE_##intype; \ -- __extension__ static __inline void \ -- __attribute__ ((__always_inline__)) \ -- vst4 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \ -- intype b, const int c) \ -- { \ -- __ST4_LANE_STRUCTURE_##intype *__p = \ -- (__ST4_LANE_STRUCTURE_##intype *)ptr; \ -- __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \ -- "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t" \ -- : "=Q"(*__p) \ -- : "Q"(b), "i"(c) \ -- : "v16", "v17", "v18", "v19"); \ -- } -+#define __ST3_LANE_FUNC(intype, largetype, ptrtype, \ -+ mode, ptr_mode, funcsuffix, signedtype) \ -+__extension__ static __inline void \ -+__attribute__ ((__always_inline__)) \ -+vst3_lane_ ## funcsuffix (ptrtype *__ptr, \ -+ intype __b, const int __c) \ -+{ \ -+ __builtin_aarch64_simd_ci __o; \ -+ largetype __temp; \ -+ __temp.val[0] \ -+ = vcombine_##funcsuffix (__b.val[0], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __temp.val[1] \ -+ = vcombine_##funcsuffix (__b.val[1], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __temp.val[2] \ -+ = vcombine_##funcsuffix (__b.val[2], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __o = __builtin_aarch64_set_qregci##mode (__o, \ -+ (signedtype) __temp.val[0], 0); \ -+ __o = __builtin_aarch64_set_qregci##mode (__o, \ -+ (signedtype) __temp.val[1], 1); \ -+ __o = __builtin_aarch64_set_qregci##mode (__o, \ -+ (signedtype) __temp.val[2], 2); \ -+ __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -+ __ptr, __o, __c); \ -+} +@@ -4457,12 +4572,12 @@ + return x; + } --__ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,) --__ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,) --__ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,) --__ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,) --__ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,) --__ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,) --__ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,) --__ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,) --__ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,) --__ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,) --__ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,) --__ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,) --__ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q) --__ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q) --__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q) --__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q) --__ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q) --__ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q) --__ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q) --__ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q) --__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q) --__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q) --__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q) --__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q) -+__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v4sf, sf, f32, -+ float32x4_t) -+__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, v2df, df, f64, -+ float64x2_t) -+__ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v16qi, qi, p8, int8x16_t) -+__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v8hi, hi, p16, -+ int16x8_t) -+__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v16qi, qi, s8, int8x16_t) -+__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v8hi, hi, s16, int16x8_t) -+__ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v4si, si, s32, int32x4_t) -+__ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, v2di, di, s64, int64x2_t) -+__ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v16qi, qi, u8, int8x16_t) -+__ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v8hi, hi, u16, -+ int16x8_t) -+__ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v4si, si, u32, -+ int32x4_t) -+__ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, v2di, di, u64, -+ int64x2_t) +-/* Helper function for rtx cost calculation. Strip a shift or extend ++/* Helper function for rtx cost calculation. Strip an extend + expression from X. Returns the inner operand if successful, or the + original expression on failure. We deal with a number of possible + canonicalization variations here. */ + static rtx +-aarch64_strip_shift_or_extend (rtx x) ++aarch64_strip_extend (rtx x) + { + rtx op = x; -+#undef __ST3_LANE_FUNC -+#define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ -+__extension__ static __inline void \ -+__attribute__ ((__always_inline__)) \ -+vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \ -+ intype __b, const int __c) \ -+{ \ -+ union { intype __i; \ -+ __builtin_aarch64_simd_ci __o; } __temp = { __b }; \ -+ __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -+ __ptr, __temp.__o, __c); \ -+} +@@ -4469,6 +4584,7 @@ + /* Zero and sign extraction of a widened value. */ + if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT) + && XEXP (op, 2) == const0_rtx ++ && GET_CODE (XEXP (op, 0)) == MULT + && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1), + XEXP (op, 1))) + return XEXP (XEXP (op, 0), 0); +@@ -4497,9 +4613,316 @@ + if (op != x) + return op; + +- return aarch64_strip_shift (x); ++ return x; + } + ++/* Helper function for rtx cost calculation. Calculate the cost of ++ a MULT, which may be part of a multiply-accumulate rtx. Return ++ the calculated cost of the expression, recursing manually in to ++ operands where needed. */ + -+__ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32) -+__ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64) -+__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8) -+__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16) -+__ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8) -+__ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16) -+__ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32) -+__ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64) -+__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8) -+__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16) -+__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32) -+__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64) ++static int ++aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed) ++{ ++ rtx op0, op1; ++ const struct cpu_cost_table *extra_cost ++ = aarch64_tune_params->insn_extra_cost; ++ int cost = 0; ++ bool maybe_fma = (outer == PLUS || outer == MINUS); ++ enum machine_mode mode = GET_MODE (x); + -+#define __ST4_LANE_FUNC(intype, largetype, ptrtype, \ -+ mode, ptr_mode, funcsuffix, signedtype) \ -+__extension__ static __inline void \ -+__attribute__ ((__always_inline__)) \ -+vst4_lane_ ## funcsuffix (ptrtype *__ptr, \ -+ intype __b, const int __c) \ -+{ \ -+ __builtin_aarch64_simd_xi __o; \ -+ largetype __temp; \ -+ __temp.val[0] \ -+ = vcombine_##funcsuffix (__b.val[0], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __temp.val[1] \ -+ = vcombine_##funcsuffix (__b.val[1], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __temp.val[2] \ -+ = vcombine_##funcsuffix (__b.val[2], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __temp.val[3] \ -+ = vcombine_##funcsuffix (__b.val[3], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __o = __builtin_aarch64_set_qregxi##mode (__o, \ -+ (signedtype) __temp.val[0], 0); \ -+ __o = __builtin_aarch64_set_qregxi##mode (__o, \ -+ (signedtype) __temp.val[1], 1); \ -+ __o = __builtin_aarch64_set_qregxi##mode (__o, \ -+ (signedtype) __temp.val[2], 2); \ -+ __o = __builtin_aarch64_set_qregxi##mode (__o, \ -+ (signedtype) __temp.val[3], 3); \ -+ __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -+ __ptr, __o, __c); \ -+} ++ gcc_checking_assert (code == MULT); + -+__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v4sf, sf, f32, -+ float32x4_t) -+__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, v2df, df, f64, -+ float64x2_t) -+__ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v16qi, qi, p8, int8x16_t) -+__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v8hi, hi, p16, -+ int16x8_t) -+__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v16qi, qi, s8, int8x16_t) -+__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v8hi, hi, s16, int16x8_t) -+__ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v4si, si, s32, int32x4_t) -+__ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, v2di, di, s64, int64x2_t) -+__ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v16qi, qi, u8, int8x16_t) -+__ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v8hi, hi, u16, -+ int16x8_t) -+__ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v4si, si, u32, -+ int32x4_t) -+__ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, v2di, di, u64, -+ int64x2_t) ++ op0 = XEXP (x, 0); ++ op1 = XEXP (x, 1); ++ ++ if (VECTOR_MODE_P (mode)) ++ mode = GET_MODE_INNER (mode); ++ ++ /* Integer multiply/fma. */ ++ if (GET_MODE_CLASS (mode) == MODE_INT) ++ { ++ /* The multiply will be canonicalized as a shift, cost it as such. */ ++ if (CONST_INT_P (op1) ++ && exact_log2 (INTVAL (op1)) > 0) ++ { ++ if (speed) ++ { ++ if (maybe_fma) ++ /* ADD (shifted register). */ ++ cost += extra_cost->alu.arith_shift; ++ else ++ /* LSL (immediate). */ ++ cost += extra_cost->alu.shift; ++ } ++ ++ cost += rtx_cost (op0, GET_CODE (op0), 0, speed); ++ ++ return cost; ++ } ++ ++ /* Integer multiplies or FMAs have zero/sign extending variants. */ ++ if ((GET_CODE (op0) == ZERO_EXTEND ++ && GET_CODE (op1) == ZERO_EXTEND) ++ || (GET_CODE (op0) == SIGN_EXTEND ++ && GET_CODE (op1) == SIGN_EXTEND)) ++ { ++ cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed) ++ + rtx_cost (XEXP (op1, 0), MULT, 1, speed); ++ ++ if (speed) ++ { ++ if (maybe_fma) ++ /* MADD/SMADDL/UMADDL. */ ++ cost += extra_cost->mult[0].extend_add; ++ else ++ /* MUL/SMULL/UMULL. */ ++ cost += extra_cost->mult[0].extend; ++ } ++ ++ return cost; ++ } ++ ++ /* This is either an integer multiply or an FMA. In both cases ++ we want to recurse and cost the operands. */ ++ cost += rtx_cost (op0, MULT, 0, speed) ++ + rtx_cost (op1, MULT, 1, speed); ++ ++ if (speed) ++ { ++ if (maybe_fma) ++ /* MADD. */ ++ cost += extra_cost->mult[mode == DImode].add; ++ else ++ /* MUL. */ ++ cost += extra_cost->mult[mode == DImode].simple; ++ } + -+#undef __ST4_LANE_FUNC -+#define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ -+__extension__ static __inline void \ -+__attribute__ ((__always_inline__)) \ -+vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \ -+ intype __b, const int __c) \ -+{ \ -+ union { intype __i; \ -+ __builtin_aarch64_simd_xi __o; } __temp = { __b }; \ -+ __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -+ __ptr, __temp.__o, __c); \ -+} ++ return cost; ++ } ++ else ++ { ++ if (speed) ++ { ++ /* Floating-point FMA/FMUL can also support negations of the ++ operands. */ ++ if (GET_CODE (op0) == NEG) ++ op0 = XEXP (op0, 0); ++ if (GET_CODE (op1) == NEG) ++ op1 = XEXP (op1, 0); + -+__ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32) -+__ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64) -+__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8) -+__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16) -+__ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8) -+__ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16) -+__ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32) -+__ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64) -+__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8) -+__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16) -+__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32) -+__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64) ++ if (maybe_fma) ++ /* FMADD/FNMADD/FNMSUB/FMSUB. */ ++ cost += extra_cost->fp[mode == DFmode].fma; ++ else ++ /* FMUL/FNMUL. */ ++ cost += extra_cost->fp[mode == DFmode].mult; ++ } + - __extension__ static __inline int64_t __attribute__ ((__always_inline__)) - vaddlv_s32 (int32x2_t a) - { -@@ -20943,6 +20821,12 @@ - return (int32x1_t) __builtin_aarch64_sqabssi (__a); - } - -+__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -+vqabsd_s64 (int64_t __a) -+{ -+ return __builtin_aarch64_sqabsdi (__a); ++ cost += rtx_cost (op0, MULT, 0, speed) ++ + rtx_cost (op1, MULT, 1, speed); ++ return cost; ++ } +} + - /* vqadd */ - - __extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) -@@ -21561,6 +21445,12 @@ - return (int32x1_t) __builtin_aarch64_sqnegsi (__a); - } - -+__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -+vqnegd_s64 (int64_t __a) ++static int ++aarch64_address_cost (rtx x, ++ enum machine_mode mode, ++ addr_space_t as ATTRIBUTE_UNUSED, ++ bool speed) +{ -+ return __builtin_aarch64_sqnegdi (__a); -+} ++ enum rtx_code c = GET_CODE (x); ++ const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost; ++ struct aarch64_address_info info; ++ int cost = 0; ++ info.shift = 0; + - /* vqrdmulh */ - - __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -@@ -22481,6 +22371,12 @@ - return __builtin_aarch64_btruncv2sf (__a); - } - -+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+vrnd_f64 (float64x1_t __a) -+{ -+ return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0); -+} ++ if (!aarch64_classify_address (&info, x, mode, c, false)) ++ { ++ if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF) ++ { ++ /* This is a CONST or SYMBOL ref which will be split ++ in a different way depending on the code model in use. ++ Cost it through the generic infrastructure. */ ++ int cost_symbol_ref = rtx_cost (x, MEM, 1, speed); ++ /* Divide through by the cost of one instruction to ++ bring it to the same units as the address costs. */ ++ cost_symbol_ref /= COSTS_N_INSNS (1); ++ /* The cost is then the cost of preparing the address, ++ followed by an immediate (possibly 0) offset. */ ++ return cost_symbol_ref + addr_cost->imm_offset; ++ } ++ else ++ { ++ /* This is most likely a jump table from a case ++ statement. */ ++ return addr_cost->register_offset; ++ } ++ } + - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vrndq_f32 (float32x4_t __a) - { -@@ -22501,6 +22397,12 @@ - return __builtin_aarch64_roundv2sf (__a); - } - -+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+vrnda_f64 (float64x1_t __a) -+{ -+ return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0); -+} ++ switch (info.type) ++ { ++ case ADDRESS_LO_SUM: ++ case ADDRESS_SYMBOLIC: ++ case ADDRESS_REG_IMM: ++ cost += addr_cost->imm_offset; ++ break; + - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vrndaq_f32 (float32x4_t __a) - { -@@ -22521,6 +22423,12 @@ - return __builtin_aarch64_nearbyintv2sf (__a); - } - -+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+vrndi_f64 (float64x1_t __a) -+{ -+ return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0); -+} ++ case ADDRESS_REG_WB: ++ if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY) ++ cost += addr_cost->pre_modify; ++ else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY) ++ cost += addr_cost->post_modify; ++ else ++ gcc_unreachable (); + - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vrndiq_f32 (float32x4_t __a) - { -@@ -22541,6 +22449,12 @@ - return __builtin_aarch64_floorv2sf (__a); - } - -+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+vrndm_f64 (float64x1_t __a) -+{ -+ return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0); -+} ++ break; + - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vrndmq_f32 (float32x4_t __a) - { -@@ -22560,6 +22474,13 @@ - { - return __builtin_aarch64_frintnv2sf (__a); - } ++ case ADDRESS_REG_REG: ++ cost += addr_cost->register_offset; ++ break; + -+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+vrndn_f64 (float64x1_t __a) -+{ -+ return __builtin_aarch64_frintndf (__a); -+} ++ case ADDRESS_REG_UXTW: ++ case ADDRESS_REG_SXTW: ++ cost += addr_cost->register_extend; ++ break; + - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vrndnq_f32 (float32x4_t __a) - { -@@ -22580,6 +22501,12 @@ - return __builtin_aarch64_ceilv2sf (__a); - } - -+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+vrndp_f64 (float64x1_t __a) -+{ -+ return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0); -+} ++ default: ++ gcc_unreachable (); ++ } + - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vrndpq_f32 (float32x4_t __a) - { -@@ -22600,6 +22527,12 @@ - return __builtin_aarch64_rintv2sf (__a); - } - -+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+vrndx_f64 (float64x1_t __a) -+{ -+ return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0); -+} + - __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) - vrndxq_f32 (float32x4_t __a) - { -@@ -25316,6 +25249,444 @@ - - /* vzip */ - -+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+vzip1_f32 (float32x2_t __a, float32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+#endif -+} ++ if (info.shift > 0) ++ { ++ /* For the sake of calculating the cost of the shifted register ++ component, we can treat same sized modes in the same way. */ ++ switch (GET_MODE_BITSIZE (mode)) ++ { ++ case 16: ++ cost += addr_cost->addr_scale_costs.hi; ++ break; + -+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+vzip1_p8 (poly8x8_t __a, poly8x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); -+#endif -+} ++ case 32: ++ cost += addr_cost->addr_scale_costs.si; ++ break; ++ ++ case 64: ++ cost += addr_cost->addr_scale_costs.di; ++ break; ++ ++ /* We can't tell, or this is a 128-bit vector. */ ++ default: ++ cost += addr_cost->addr_scale_costs.ti; ++ break; ++ } ++ } + -+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+vzip1_p16 (poly16x4_t __a, poly16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5}); -+#endif ++ return cost; +} + -+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+vzip1_s8 (int8x8_t __a, int8x8_t __b) ++/* Return true if the RTX X in mode MODE is a zero or sign extract ++ usable in an ADD or SUB (extended register) instruction. */ ++static bool ++aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode) +{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); -+#endif -+} ++ /* Catch add with a sign extract. ++ This is add__multp2. */ ++ if (GET_CODE (x) == SIGN_EXTRACT ++ || GET_CODE (x) == ZERO_EXTRACT) ++ { ++ rtx op0 = XEXP (x, 0); ++ rtx op1 = XEXP (x, 1); ++ rtx op2 = XEXP (x, 2); + -+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+vzip1_s16 (int16x4_t __a, int16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5}); -+#endif -+} ++ if (GET_CODE (op0) == MULT ++ && CONST_INT_P (op1) ++ && op2 == const0_rtx ++ && CONST_INT_P (XEXP (op0, 1)) ++ && aarch64_is_extend_from_extract (mode, ++ XEXP (op0, 1), ++ op1)) ++ { ++ return true; ++ } ++ } + -+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+vzip1_s32 (int32x2_t __a, int32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+#endif ++ return false; +} + -+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+vzip1_u8 (uint8x8_t __a, uint8x8_t __b) ++/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)), ++ storing it in *COST. Result is true if the total cost of the operation ++ has now been calculated. */ ++static bool ++aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed) +{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); -+#endif -+} ++ rtx inner; ++ rtx comparator; ++ enum rtx_code cmpcode; + -+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+vzip1_u16 (uint16x4_t __a, uint16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5}); -+#endif -+} ++ if (COMPARISON_P (op0)) ++ { ++ inner = XEXP (op0, 0); ++ comparator = XEXP (op0, 1); ++ cmpcode = GET_CODE (op0); ++ } ++ else ++ { ++ inner = op0; ++ comparator = const0_rtx; ++ cmpcode = NE; ++ } + -+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+vzip1_u32 (uint32x2_t __a, uint32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+#endif -+} ++ if (GET_CODE (op1) == PC || GET_CODE (op2) == PC) ++ { ++ /* Conditional branch. */ ++ if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC) ++ return true; ++ else ++ { ++ if (cmpcode == NE || cmpcode == EQ) ++ { ++ if (comparator == const0_rtx) ++ { ++ /* TBZ/TBNZ/CBZ/CBNZ. */ ++ if (GET_CODE (inner) == ZERO_EXTRACT) ++ /* TBZ/TBNZ. */ ++ *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT, ++ 0, speed); ++ else ++ /* CBZ/CBNZ. */ ++ *cost += rtx_cost (inner, cmpcode, 0, speed); + -+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+vzip1q_f32 (float32x4_t __a, float32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5}); -+#endif -+} ++ return true; ++ } ++ } ++ else if (cmpcode == LT || cmpcode == GE) ++ { ++ /* TBZ/TBNZ. */ ++ if (comparator == const0_rtx) ++ return true; ++ } ++ } ++ } ++ else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC) ++ { ++ /* It's a conditional operation based on the status flags, ++ so it must be some flavor of CSEL. */ + -+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -+vzip1q_f64 (float64x2_t __a, float64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); -+#endif -+} ++ /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */ ++ if (GET_CODE (op1) == NEG ++ || GET_CODE (op1) == NOT ++ || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx)) ++ op1 = XEXP (op1, 0); + -+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+vzip1q_p8 (poly8x16_t __a, poly8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}); -+#endif -+} ++ *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed); ++ *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed); ++ return true; ++ } + -+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+vzip1q_p16 (poly16x8_t __a, poly16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) -+ {12, 4, 13, 5, 14, 6, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); -+#endif ++ /* We don't know what this is, cost all operands. */ ++ return false; +} + -+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+vzip1q_s8 (int8x16_t __a, int8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}); -+#endif -+} + /* Calculate the cost of calculating X, storing it in *COST. Result + is true if the total cost of the operation has now been calculated. */ + static bool +@@ -4506,13 +4929,31 @@ + aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, + int param ATTRIBUTE_UNUSED, int *cost, bool speed) + { +- rtx op0, op1; ++ rtx op0, op1, op2; + const struct cpu_cost_table *extra_cost + = aarch64_tune_params->insn_extra_cost; ++ enum machine_mode mode = GET_MODE (x); + ++ /* By default, assume that everything has equivalent cost to the ++ cheapest instruction. Any additional costs are applied as a delta ++ above this default. */ ++ *cost = COSTS_N_INSNS (1); + -+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+vzip1q_s16 (int16x8_t __a, int16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) -+ {12, 4, 13, 5, 14, 6, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); -+#endif -+} ++ /* TODO: The cost infrastructure currently does not handle ++ vector operations. Assume that all vector operations ++ are equally expensive. */ ++ if (VECTOR_MODE_P (mode)) ++ { ++ if (speed) ++ *cost += extra_cost->vect.alu; ++ return true; ++ } ++ + switch (code) + { + case SET: ++ /* The cost depends entirely on the operands to SET. */ ++ *cost = 0; + op0 = SET_DEST (x); + op1 = SET_SRC (x); + +@@ -4520,25 +4961,47 @@ + { + case MEM: + if (speed) +- *cost += extra_cost->ldst.store; ++ { ++ rtx address = XEXP (op0, 0); ++ if (GET_MODE_CLASS (mode) == MODE_INT) ++ *cost += extra_cost->ldst.store; ++ else if (mode == SFmode) ++ *cost += extra_cost->ldst.storef; ++ else if (mode == DFmode) ++ *cost += extra_cost->ldst.stored; + +- if (op1 != const0_rtx) +- *cost += rtx_cost (op1, SET, 1, speed); ++ *cost += ++ COSTS_N_INSNS (aarch64_address_cost (address, mode, ++ 0, speed)); ++ } + -+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+vzip1q_s32 (int32x4_t __a, int32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5}); -+#endif -+} ++ *cost += rtx_cost (op1, SET, 1, speed); + return true; + + case SUBREG: + if (! REG_P (SUBREG_REG (op0))) + *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed); + -+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+vzip1q_s64 (int64x2_t __a, int64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); -+#endif -+} + /* Fall through. */ + case REG: +- /* Cost is just the cost of the RHS of the set. */ +- *cost += rtx_cost (op1, SET, 1, true); ++ /* const0_rtx is in general free, but we will use an ++ instruction to set a register to 0. */ ++ if (REG_P (op1) || op1 == const0_rtx) ++ { ++ /* The cost is 1 per register copied. */ ++ int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1) ++ / UNITS_PER_WORD; ++ *cost = COSTS_N_INSNS (n_minus_1 + 1); ++ } ++ else ++ /* Cost is just the cost of the RHS of the set. */ ++ *cost += rtx_cost (op1, SET, 1, speed); + return true; + +- case ZERO_EXTRACT: /* Bit-field insertion. */ ++ case ZERO_EXTRACT: + case SIGN_EXTRACT: +- /* Strip any redundant widening of the RHS to meet the width of +- the target. */ ++ /* Bit-field insertion. Strip any redundant widening of ++ the RHS to meet the width of the target. */ + if (GET_CODE (op1) == SUBREG) + op1 = SUBREG_REG (op1); + if ((GET_CODE (op1) == ZERO_EXTEND +@@ -4547,25 +5010,139 @@ + && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0))) + >= INTVAL (XEXP (op0, 1)))) + op1 = XEXP (op1, 0); +- *cost += rtx_cost (op1, SET, 1, speed); + -+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+vzip1q_u8 (uint8x16_t __a, uint8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}); -+#endif -+} ++ if (CONST_INT_P (op1)) ++ { ++ /* MOV immediate is assumed to always be cheap. */ ++ *cost = COSTS_N_INSNS (1); ++ } ++ else ++ { ++ /* BFM. */ ++ if (speed) ++ *cost += extra_cost->alu.bfi; ++ *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed); ++ } + -+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+vzip1q_u16 (uint16x8_t __a, uint16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) -+ {12, 4, 13, 5, 14, 6, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); -+#endif -+} + return true; + + default: ++ /* We can't make sense of this, assume default cost. */ ++ *cost = COSTS_N_INSNS (1); + break; + } + return false; + ++ case CONST_INT: ++ /* If an instruction can incorporate a constant within the ++ instruction, the instruction's expression avoids calling ++ rtx_cost() on the constant. If rtx_cost() is called on a ++ constant, then it is usually because the constant must be ++ moved into a register by one or more instructions. + -+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+vzip1q_u32 (uint32x4_t __a, uint32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5}); -+#endif -+} ++ The exception is constant 0, which can be expressed ++ as XZR/WZR and is therefore free. The exception to this is ++ if we have (set (reg) (const0_rtx)) in which case we must cost ++ the move. However, we can catch that when we cost the SET, so ++ we don't need to consider that here. */ ++ if (x == const0_rtx) ++ *cost = 0; ++ else ++ { ++ /* To an approximation, building any other constant is ++ proportionally expensive to the number of instructions ++ required to build that constant. This is true whether we ++ are compiling for SPEED or otherwise. */ ++ *cost = COSTS_N_INSNS (aarch64_build_constant (0, ++ INTVAL (x), ++ false)); ++ } ++ return true; + -+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+vzip1q_u64 (uint64x2_t __a, uint64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); -+#endif -+} ++ case CONST_DOUBLE: ++ if (speed) ++ { ++ /* mov[df,sf]_aarch64. */ ++ if (aarch64_float_const_representable_p (x)) ++ /* FMOV (scalar immediate). */ ++ *cost += extra_cost->fp[mode == DFmode].fpconst; ++ else if (!aarch64_float_const_zero_rtx_p (x)) ++ { ++ /* This will be a load from memory. */ ++ if (mode == DFmode) ++ *cost += extra_cost->ldst.loadd; ++ else ++ *cost += extra_cost->ldst.loadf; ++ } ++ else ++ /* Otherwise this is +0.0. We get this using MOVI d0, #0 ++ or MOV v0.s[0], wzr - neither of which are modeled by the ++ cost tables. Just use the default cost. */ ++ { ++ } ++ } + -+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+vzip2_f32 (float32x2_t __a, float32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); -+#endif -+} ++ return true; + -+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+vzip2_p8 (poly8x8_t __a, poly8x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15}); -+#endif -+} + case MEM: + if (speed) +- *cost += extra_cost->ldst.load; ++ { ++ /* For loads we want the base cost of a load, plus an ++ approximation for the additional cost of the addressing ++ mode. */ ++ rtx address = XEXP (x, 0); ++ if (GET_MODE_CLASS (mode) == MODE_INT) ++ *cost += extra_cost->ldst.load; ++ else if (mode == SFmode) ++ *cost += extra_cost->ldst.loadf; ++ else if (mode == DFmode) ++ *cost += extra_cost->ldst.loadd; + ++ *cost += ++ COSTS_N_INSNS (aarch64_address_cost (address, mode, ++ 0, speed)); ++ } + -+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+vzip2_p16 (poly16x4_t __a, poly16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7}); -+#endif -+} + return true; + + case NEG: +- op0 = CONST0_RTX (GET_MODE (x)); +- op1 = XEXP (x, 0); +- goto cost_minus; ++ op0 = XEXP (x, 0); + ++ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) ++ { ++ if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE ++ || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE) ++ { ++ /* CSETM. */ ++ *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed); ++ return true; ++ } + -+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+vzip2_s8 (int8x8_t __a, int8x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15}); -+#endif -+} ++ /* Cost this as SUB wzr, X. */ ++ op0 = CONST0_RTX (GET_MODE (x)); ++ op1 = XEXP (x, 0); ++ goto cost_minus; ++ } ++ ++ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) ++ { ++ /* Support (neg(fma...)) as a single instruction only if ++ sign of zeros is unimportant. This matches the decision ++ making in aarch64.md. */ ++ if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0))) ++ { ++ /* FNMADD. */ ++ *cost = rtx_cost (op0, NEG, 0, speed); ++ return true; ++ } ++ if (speed) ++ /* FNEG. */ ++ *cost += extra_cost->fp[mode == DFmode].neg; ++ return false; ++ } + -+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+vzip2_s16 (int16x4_t __a, int16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7}); -+#endif -+} ++ return false; + -+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+vzip2_s32 (int32x2_t __a, int32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); -+#endif -+} + case COMPARE: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); +@@ -4577,96 +5154,228 @@ + goto cost_logic; + } + +- /* Comparisons can work if the order is swapped. +- Canonicalization puts the more complex operation first, but +- we want it in op1. */ +- if (! (REG_P (op0) +- || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0))))) +- { +- op0 = XEXP (x, 1); +- op1 = XEXP (x, 0); +- } +- goto cost_minus; ++ if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT) ++ { ++ /* TODO: A write to the CC flags possibly costs extra, this ++ needs encoding in the cost tables. */ + ++ /* CC_ZESWPmode supports zero extend for free. */ ++ if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND) ++ op0 = XEXP (op0, 0); + -+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+vzip2_u8 (uint8x8_t __a, uint8x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15}); -+#endif -+} ++ /* ANDS. */ ++ if (GET_CODE (op0) == AND) ++ { ++ x = op0; ++ goto cost_logic; ++ } + -+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+vzip2_u16 (uint16x4_t __a, uint16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7}); -+#endif -+} ++ if (GET_CODE (op0) == PLUS) ++ { ++ /* ADDS (and CMN alias). */ ++ x = op0; ++ goto cost_plus; ++ } + -+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+vzip2_u32 (uint32x2_t __a, uint32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); -+#endif -+} ++ if (GET_CODE (op0) == MINUS) ++ { ++ /* SUBS. */ ++ x = op0; ++ goto cost_minus; ++ } + -+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+vzip2q_f32 (float32x4_t __a, float32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7}); -+#endif -+} ++ if (GET_CODE (op1) == NEG) ++ { ++ /* CMN. */ ++ if (speed) ++ *cost += extra_cost->alu.arith; + -+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -+vzip2q_f64 (float64x2_t __a, float64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); -+#endif -+} ++ *cost += rtx_cost (op0, COMPARE, 0, speed); ++ *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed); ++ return true; ++ } + -+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+vzip2q_p8 (poly8x16_t __a, poly8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); -+#endif -+} ++ /* CMP. + -+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+vzip2q_p16 (poly16x8_t __a, poly16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) -+ {4, 12, 5, 13, 6, 14, 7, 15}); -+#endif -+} ++ Compare can freely swap the order of operands, and ++ canonicalization puts the more complex operation first. ++ But the integer MINUS logic expects the shift/extend ++ operation in op1. */ ++ if (! (REG_P (op0) ++ || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0))))) ++ { ++ op0 = XEXP (x, 1); ++ op1 = XEXP (x, 0); ++ } ++ goto cost_minus; ++ } + -+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+vzip2q_s8 (int8x16_t __a, int8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); -+#endif -+} ++ if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT) ++ { ++ /* FCMP. */ ++ if (speed) ++ *cost += extra_cost->fp[mode == DFmode].compare; + -+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+vzip2q_s16 (int16x8_t __a, int16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) -+ {4, 12, 5, 13, 6, 14, 7, 15}); -+#endif -+} ++ if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1)) ++ { ++ /* FCMP supports constant 0.0 for no extra cost. */ ++ return true; ++ } ++ return false; ++ } + -+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+vzip2q_s32 (int32x4_t __a, int32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7}); -+#endif -+} ++ return false; + -+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+vzip2q_s64 (int64x2_t __a, int64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); -+#endif -+} + case MINUS: +- op0 = XEXP (x, 0); +- op1 = XEXP (x, 1); ++ { ++ op0 = XEXP (x, 0); ++ op1 = XEXP (x, 1); + +- cost_minus: +- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT +- || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC +- && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)) +- { +- if (op0 != const0_rtx) ++cost_minus: ++ /* Detect valid immediates. */ ++ if ((GET_MODE_CLASS (mode) == MODE_INT ++ || (GET_MODE_CLASS (mode) == MODE_CC ++ && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)) ++ && CONST_INT_P (op1) ++ && aarch64_uimm12_shift (INTVAL (op1))) ++ { + *cost += rtx_cost (op0, MINUS, 0, speed); + +- if (CONST_INT_P (op1)) +- { +- if (!aarch64_uimm12_shift (INTVAL (op1))) +- *cost += rtx_cost (op1, MINUS, 1, speed); +- } +- else +- { +- op1 = aarch64_strip_shift_or_extend (op1); +- *cost += rtx_cost (op1, MINUS, 1, speed); +- } +- return true; +- } ++ if (speed) ++ /* SUB(S) (immediate). */ ++ *cost += extra_cost->alu.arith; ++ return true; + +- return false; ++ } + ++ /* Look for SUB (extended register). */ ++ if (aarch64_rtx_arith_op_extract_p (op1, mode)) ++ { ++ if (speed) ++ *cost += extra_cost->alu.arith_shift; + -+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+vzip2q_u8 (uint8x16_t __a, uint8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); -+#endif -+} ++ *cost += rtx_cost (XEXP (XEXP (op1, 0), 0), ++ (enum rtx_code) GET_CODE (op1), ++ 0, speed); ++ return true; ++ } + -+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+vzip2q_u16 (uint16x8_t __a, uint16x8_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) -+ {4, 12, 5, 13, 6, 14, 7, 15}); -+#endif -+} ++ rtx new_op1 = aarch64_strip_extend (op1); + -+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+vzip2q_u32 (uint32x4_t __a, uint32x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7}); -+#endif -+} ++ /* Cost this as an FMA-alike operation. */ ++ if ((GET_CODE (new_op1) == MULT ++ || GET_CODE (new_op1) == ASHIFT) ++ && code != COMPARE) ++ { ++ *cost += aarch64_rtx_mult_cost (new_op1, MULT, ++ (enum rtx_code) code, ++ speed); ++ *cost += rtx_cost (op0, MINUS, 0, speed); ++ return true; ++ } + -+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+vzip2q_u64 (uint64x2_t __a, uint64x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); -+#endif -+} ++ *cost += rtx_cost (new_op1, MINUS, 1, speed); + - __INTERLEAVE_LIST (zip) ++ if (speed) ++ { ++ if (GET_MODE_CLASS (mode) == MODE_INT) ++ /* SUB(S). */ ++ *cost += extra_cost->alu.arith; ++ else if (GET_MODE_CLASS (mode) == MODE_FLOAT) ++ /* FSUB. */ ++ *cost += extra_cost->fp[mode == DFmode].addsub; ++ } ++ return true; ++ } ++ + case PLUS: +- op0 = XEXP (x, 0); +- op1 = XEXP (x, 1); ++ { ++ rtx new_op0; - #undef __INTERLEAVE_LIST ---- a/src/gcc/config/aarch64/aarch64.md -+++ b/src/gcc/config/aarch64/aarch64.md -@@ -98,11 +98,16 @@ - UNSPEC_ST2 - UNSPEC_ST3 - UNSPEC_ST4 -+ UNSPEC_ST2_LANE -+ UNSPEC_ST3_LANE -+ UNSPEC_ST4_LANE - UNSPEC_TLS - UNSPEC_TLSDESC - UNSPEC_USHL_2S - UNSPEC_USHR64 - UNSPEC_VSTRUCTDUMMY -+ UNSPEC_SP_SET -+ UNSPEC_SP_TEST - ]) +- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) +- { +- if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1))) +- { +- *cost += rtx_cost (op0, PLUS, 0, speed); +- } +- else +- { +- rtx new_op0 = aarch64_strip_shift_or_extend (op0); ++ op0 = XEXP (x, 0); ++ op1 = XEXP (x, 1); - (define_c_enum "unspecv" [ -@@ -1106,7 +1111,26 @@ - (set_attr "simd" "*,*,*,yes")] - ) +- if (new_op0 == op0 +- && GET_CODE (op0) == MULT) +- { +- if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND +- && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND) +- || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND +- && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND)) +- { +- *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0, +- speed) +- + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1, +- speed) +- + rtx_cost (op1, PLUS, 1, speed)); +- if (speed) +- *cost += +- extra_cost->mult[GET_MODE (x) == DImode].extend_add; +- return true; +- } ++cost_plus: ++ if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE ++ || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE) ++ { ++ /* CSINC. */ ++ *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed); ++ *cost += rtx_cost (op1, PLUS, 1, speed); ++ return true; ++ } --(define_insn "*add3_compare0" -+(define_expand "addti3" -+ [(set (match_operand:TI 0 "register_operand" "") -+ (plus:TI (match_operand:TI 1 "register_operand" "") -+ (match_operand:TI 2 "register_operand" "")))] -+ "" -+{ -+ rtx low = gen_reg_rtx (DImode); -+ emit_insn (gen_adddi3_compare0 (low, gen_lowpart (DImode, operands[1]), -+ gen_lowpart (DImode, operands[2]))); +- *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed) +- + rtx_cost (XEXP (op0, 1), MULT, 1, speed) +- + rtx_cost (op1, PLUS, 1, speed)); ++ if (GET_MODE_CLASS (mode) == MODE_INT ++ && CONST_INT_P (op1) ++ && aarch64_uimm12_shift (INTVAL (op1))) ++ { ++ *cost += rtx_cost (op0, PLUS, 0, speed); + +- if (speed) +- *cost += extra_cost->mult[GET_MODE (x) == DImode].add; ++ if (speed) ++ /* ADD (immediate). */ ++ *cost += extra_cost->alu.arith; ++ return true; ++ } + +- return true; +- } ++ /* Look for ADD (extended register). */ ++ if (aarch64_rtx_arith_op_extract_p (op0, mode)) ++ { ++ if (speed) ++ *cost += extra_cost->alu.arith_shift; + +- *cost += (rtx_cost (new_op0, PLUS, 0, speed) +- + rtx_cost (op1, PLUS, 1, speed)); +- } +- return true; +- } ++ *cost += rtx_cost (XEXP (XEXP (op0, 0), 0), ++ (enum rtx_code) GET_CODE (op0), ++ 0, speed); ++ return true; ++ } + ++ /* Strip any extend, leave shifts behind as we will ++ cost them through mult_cost. */ ++ new_op0 = aarch64_strip_extend (op0); + -+ rtx high = gen_reg_rtx (DImode); -+ emit_insn (gen_adddi3_carryin (high, gen_highpart (DImode, operands[1]), -+ gen_highpart (DImode, operands[2]))); ++ if (GET_CODE (new_op0) == MULT ++ || GET_CODE (new_op0) == ASHIFT) ++ { ++ *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS, ++ speed); ++ *cost += rtx_cost (op1, PLUS, 1, speed); ++ return true; ++ } + -+ emit_move_insn (gen_lowpart (DImode, operands[0]), low); -+ emit_move_insn (gen_highpart (DImode, operands[0]), high); -+ DONE; -+}) ++ *cost += (rtx_cost (new_op0, PLUS, 0, speed) ++ + rtx_cost (op1, PLUS, 1, speed)); + -+(define_insn "add3_compare0" - [(set (reg:CC_NZ CC_REGNUM) - (compare:CC_NZ - (plus:GPI (match_operand:GPI 1 "register_operand" "%r,r,r") -@@ -1390,7 +1414,7 @@ - [(set_attr "type" "alu_ext")] - ) - --(define_insn "*add3_carryin" -+(define_insn "add3_carryin" - [(set - (match_operand:GPI 0 "register_operand" "=r") - (plus:GPI (geu:GPI (reg:CC CC_REGNUM) (const_int 0)) -@@ -1558,8 +1582,26 @@ - (set_attr "simd" "*,yes")] - ) ++ if (speed) ++ { ++ if (GET_MODE_CLASS (mode) == MODE_INT) ++ /* ADD. */ ++ *cost += extra_cost->alu.arith; ++ else if (GET_MODE_CLASS (mode) == MODE_FLOAT) ++ /* FADD. */ ++ *cost += extra_cost->fp[mode == DFmode].addsub; ++ } ++ return true; ++ } ++ ++ case BSWAP: ++ *cost = COSTS_N_INSNS (1); ++ ++ if (speed) ++ *cost += extra_cost->alu.rev; ++ + return false; -+(define_expand "subti3" -+ [(set (match_operand:TI 0 "register_operand" "") -+ (minus:TI (match_operand:TI 1 "register_operand" "") -+ (match_operand:TI 2 "register_operand" "")))] -+ "" -+{ -+ rtx low = gen_reg_rtx (DImode); -+ emit_insn (gen_subdi3_compare0 (low, gen_lowpart (DImode, operands[1]), -+ gen_lowpart (DImode, operands[2]))); + case IOR: ++ if (aarch_rev16_p (x)) ++ { ++ *cost = COSTS_N_INSNS (1); ++ ++ if (speed) ++ *cost += extra_cost->alu.rev; ++ ++ return true; ++ } ++ /* Fall through. */ + case XOR: + case AND: + cost_logic: +@@ -4673,117 +5382,252 @@ + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); --(define_insn "*sub3_compare0" -+ rtx high = gen_reg_rtx (DImode); -+ emit_insn (gen_subdi3_carryin (high, gen_highpart (DImode, operands[1]), -+ gen_highpart (DImode, operands[2]))); ++ if (code == AND ++ && GET_CODE (op0) == MULT ++ && CONST_INT_P (XEXP (op0, 1)) ++ && CONST_INT_P (op1) ++ && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))), ++ INTVAL (op1)) != 0) ++ { ++ /* This is a UBFM/SBFM. */ ++ *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed); ++ if (speed) ++ *cost += extra_cost->alu.bfx; ++ return true; ++ } + -+ emit_move_insn (gen_lowpart (DImode, operands[0]), low); -+ emit_move_insn (gen_highpart (DImode, operands[0]), high); -+ DONE; -+}) + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) + { ++ /* We possibly get the immediate for free, this is not ++ modelled. */ + if (CONST_INT_P (op1) + && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x))) + { +- *cost += rtx_cost (op0, AND, 0, speed); ++ *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed); + -+(define_insn "sub3_compare0" - [(set (reg:CC_NZ CC_REGNUM) - (compare:CC_NZ (minus:GPI (match_operand:GPI 1 "register_operand" "r") - (match_operand:GPI 2 "register_operand" "r")) -@@ -1706,7 +1748,7 @@ - [(set_attr "type" "alu_ext")] - ) - --(define_insn "*sub3_carryin" -+(define_insn "sub3_carryin" - [(set - (match_operand:GPI 0 "register_operand" "=r") - (minus:GPI (minus:GPI -@@ -1935,7 +1977,7 @@ - [(set_attr "type" "mul")] - ) - --(define_insn "*madd" -+(define_insn "madd" - [(set (match_operand:GPI 0 "register_operand" "=r") - (plus:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r") - (match_operand:GPI 2 "register_operand" "r")) -@@ -2045,6 +2087,48 @@ - [(set_attr "type" "mull")] - ) - -+(define_expand "mulditi3" -+ [(set (match_operand:TI 0 "register_operand") -+ (mult:TI (ANY_EXTEND:TI (match_operand:DI 1 "register_operand")) -+ (ANY_EXTEND:TI (match_operand:DI 2 "register_operand"))))] -+ "" -+{ -+ rtx low = gen_reg_rtx (DImode); -+ emit_insn (gen_muldi3 (low, operands[1], operands[2])); ++ if (speed) ++ *cost += extra_cost->alu.logical; + -+ rtx high = gen_reg_rtx (DImode); -+ emit_insn (gen_muldi3_highpart (high, operands[1], operands[2])); ++ return true; + } + else + { ++ rtx new_op0 = op0; + -+ emit_move_insn (gen_lowpart (DImode, operands[0]), low); -+ emit_move_insn (gen_highpart (DImode, operands[0]), high); -+ DONE; -+}) ++ /* Handle ORN, EON, or BIC. */ + if (GET_CODE (op0) == NOT) + op0 = XEXP (op0, 0); +- op0 = aarch64_strip_shift (op0); +- *cost += (rtx_cost (op0, AND, 0, speed) +- + rtx_cost (op1, AND, 1, speed)); + -+;; The default expansion of multi3 using umuldi3_highpart will perform -+;; the additions in an order that fails to combine into two madd insns. -+(define_expand "multi3" -+ [(set (match_operand:TI 0 "register_operand") -+ (mult:TI (match_operand:TI 1 "register_operand") -+ (match_operand:TI 2 "register_operand")))] -+ "" -+{ -+ rtx l0 = gen_reg_rtx (DImode); -+ rtx l1 = gen_lowpart (DImode, operands[1]); -+ rtx l2 = gen_lowpart (DImode, operands[2]); -+ rtx h0 = gen_reg_rtx (DImode); -+ rtx h1 = gen_highpart (DImode, operands[1]); -+ rtx h2 = gen_highpart (DImode, operands[2]); ++ new_op0 = aarch64_strip_shift (op0); + -+ emit_insn (gen_muldi3 (l0, l1, l2)); -+ emit_insn (gen_umuldi3_highpart (h0, l1, l2)); -+ emit_insn (gen_madddi (h0, h1, l2, h0)); -+ emit_insn (gen_madddi (h0, l1, h2, h0)); ++ /* If we had a shift on op0 then this is a logical-shift- ++ by-register/immediate operation. Otherwise, this is just ++ a logical operation. */ ++ if (speed) ++ { ++ if (new_op0 != op0) ++ { ++ /* Shift by immediate. */ ++ if (CONST_INT_P (XEXP (op0, 1))) ++ *cost += extra_cost->alu.log_shift; ++ else ++ *cost += extra_cost->alu.log_shift_reg; ++ } ++ else ++ *cost += extra_cost->alu.logical; ++ } + -+ emit_move_insn (gen_lowpart (DImode, operands[0]), l0); -+ emit_move_insn (gen_highpart (DImode, operands[0]), h0); -+ DONE; -+}) ++ /* In both cases we want to cost both operands. */ ++ *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed) ++ + rtx_cost (op1, (enum rtx_code) code, 1, speed); + - (define_insn "muldi3_highpart" - [(set (match_operand:DI 0 "register_operand" "=r") - (truncate:DI -@@ -2345,6 +2429,25 @@ - } - ) ++ return true; + } +- return true; + } + return false; -+(define_expand "movcc" -+ [(set (match_operand:GPF 0 "register_operand" "") -+ (if_then_else:GPF (match_operand 1 "aarch64_comparison_operator" "") -+ (match_operand:GPF 2 "register_operand" "") -+ (match_operand:GPF 3 "register_operand" "")))] -+ "" -+ { -+ rtx ccreg; -+ enum rtx_code code = GET_CODE (operands[1]); ++ case NOT: ++ /* MVN. */ ++ if (speed) ++ *cost += extra_cost->alu.logical; + -+ if (code == UNEQ || code == LTGT) -+ FAIL; ++ /* The logical instruction could have the shifted register form, ++ but the cost is the same if the shift is processed as a separate ++ instruction, so we don't bother with it here. */ ++ return false; + -+ ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0), -+ XEXP (operands[1], 1)); -+ operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx); -+ } -+) + case ZERO_EXTEND: +- if ((GET_MODE (x) == DImode +- && GET_MODE (XEXP (x, 0)) == SImode) +- || GET_CODE (XEXP (x, 0)) == MEM) + - (define_insn "*csinc2_insn" - [(set (match_operand:GPI 0 "register_operand" "=r") - (plus:GPI (match_operator:GPI 2 "aarch64_comparison_operator" -@@ -2486,7 +2589,18 @@ - [(set_attr "type" "logic_shift_imm")] - ) - --;; zero_extend version of above -+(define_insn "*_rol3" -+ [(set (match_operand:GPI 0 "register_operand" "=r") -+ (LOGICAL:GPI (rotate:GPI -+ (match_operand:GPI 1 "register_operand" "r") -+ (match_operand:QI 2 "aarch64_shift_imm_" "n")) -+ (match_operand:GPI 3 "register_operand" "r")))] -+ "" -+ "\\t%0, %3, %1, ror ( - %2)" -+ [(set_attr "type" "logic_shift_imm")] -+) ++ op0 = XEXP (x, 0); ++ /* If a value is written in SI mode, then zero extended to DI ++ mode, the operation will in general be free as a write to ++ a 'w' register implicitly zeroes the upper bits of an 'x' ++ register. However, if this is + -+;; zero_extend versions of above - (define_insn "*_si3_uxtw" - [(set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI -@@ -2499,6 +2613,18 @@ - [(set_attr "type" "logic_shift_imm")] - ) - -+(define_insn "*_rolsi3_uxtw" -+ [(set (match_operand:DI 0 "register_operand" "=r") -+ (zero_extend:DI -+ (LOGICAL:SI (rotate:SI -+ (match_operand:SI 1 "register_operand" "r") -+ (match_operand:QI 2 "aarch64_shift_imm_si" "n")) -+ (match_operand:SI 3 "register_operand" "r"))))] -+ "" -+ "\\t%w0, %w3, %w1, ror (32 - %2)" -+ [(set_attr "type" "logic_shift_imm")] -+) ++ (set (reg) (zero_extend (reg))) + - (define_insn "one_cmpl2" - [(set (match_operand:GPI 0 "register_operand" "=r") - (not:GPI (match_operand:GPI 1 "register_operand" "r")))] -@@ -3174,6 +3300,38 @@ - [(set_attr "type" "rev")] - ) - -+;; There are no canonicalisation rules for the position of the lshiftrt, ashift -+;; operations within an IOR/AND RTX, therefore we have two patterns matching -+;; each valid permutation. ++ we must cost the explicit register move. */ ++ if (mode == DImode ++ && GET_MODE (op0) == SImode ++ && outer == SET) + { +- *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed); ++ int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed); + -+(define_insn "rev162" -+ [(set (match_operand:GPI 0 "register_operand" "=r") -+ (ior:GPI (and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r") -+ (const_int 8)) -+ (match_operand:GPI 3 "const_int_operand" "n")) -+ (and:GPI (lshiftrt:GPI (match_dup 1) -+ (const_int 8)) -+ (match_operand:GPI 2 "const_int_operand" "n"))))] -+ "aarch_rev16_shleft_mask_imm_p (operands[3], mode) -+ && aarch_rev16_shright_mask_imm_p (operands[2], mode)" -+ "rev16\\t%0, %1" -+ [(set_attr "type" "rev")] -+) ++ if (!op_cost && speed) ++ /* MOV. */ ++ *cost += extra_cost->alu.extend; ++ else ++ /* Free, the cost is that of the SI mode operation. */ ++ *cost = op_cost; + -+(define_insn "rev162_alt" -+ [(set (match_operand:GPI 0 "register_operand" "=r") -+ (ior:GPI (and:GPI (lshiftrt:GPI (match_operand:GPI 1 "register_operand" "r") -+ (const_int 8)) -+ (match_operand:GPI 2 "const_int_operand" "n")) -+ (and:GPI (ashift:GPI (match_dup 1) -+ (const_int 8)) -+ (match_operand:GPI 3 "const_int_operand" "n"))))] -+ "aarch_rev16_shleft_mask_imm_p (operands[3], mode) -+ && aarch_rev16_shright_mask_imm_p (operands[2], mode)" -+ "rev16\\t%0, %1" -+ [(set_attr "type" "rev")] -+) + return true; + } ++ else if (MEM_P (XEXP (x, 0))) ++ { ++ /* All loads can zero extend to any size for free. */ ++ *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed); ++ return true; ++ } + - ;; zero_extend version of above - (define_insn "*bswapsi2_uxtw" - [(set (match_operand:DI 0 "register_operand" "=r") -@@ -3188,7 +3346,7 @@ - ;; ------------------------------------------------------------------- - - ;; frint floating-point round to integral standard patterns. --;; Expands to btrunc, ceil, floor, nearbyint, rint, round. -+;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. - - (define_insn "2" - [(set (match_operand:GPF 0 "register_operand" "=w") -@@ -3582,36 +3740,63 @@ - [(set_attr "type" "call") - (set_attr "length" "16")]) - --(define_insn "tlsie_small" -- [(set (match_operand:DI 0 "register_operand" "=r") -- (unspec:DI [(match_operand:DI 1 "aarch64_tls_ie_symref" "S")] -+(define_insn "tlsie_small_" -+ [(set (match_operand:PTR 0 "register_operand" "=r") -+ (unspec:PTR [(match_operand 1 "aarch64_tls_ie_symref" "S")] - UNSPEC_GOTSMALLTLS))] - "" -- "adrp\\t%0, %A1\;ldr\\t%0, [%0, #%L1]" -+ "adrp\\t%0, %A1\;ldr\\t%0, [%0, #%L1]" - [(set_attr "type" "load1") - (set_attr "length" "8")] - ) ++ /* UXTB/UXTH. */ ++ if (speed) ++ *cost += extra_cost->alu.extend; ++ + return false; --(define_insn "tlsle_small" -+(define_insn "tlsie_small_sidi" - [(set (match_operand:DI 0 "register_operand" "=r") -- (unspec:DI [(match_operand:DI 1 "register_operand" "r") -- (match_operand:DI 2 "aarch64_tls_le_symref" "S")] -+ (zero_extend:DI -+ (unspec:SI [(match_operand 1 "aarch64_tls_ie_symref" "S")] -+ UNSPEC_GOTSMALLTLS)))] -+ "" -+ "adrp\\t%0, %A1\;ldr\\t%w0, [%0, #%L1]" -+ [(set_attr "type" "load1") -+ (set_attr "length" "8")] -+) + case SIGN_EXTEND: +- if (GET_CODE (XEXP (x, 0)) == MEM) ++ if (MEM_P (XEXP (x, 0))) + { +- *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed); ++ /* LDRSH. */ ++ if (speed) ++ { ++ rtx address = XEXP (XEXP (x, 0), 0); ++ *cost += extra_cost->ldst.load_sign_extend; + -+(define_expand "tlsle_small" -+ [(set (match_operand 0 "register_operand" "=r") -+ (unspec [(match_operand 1 "register_operand" "r") -+ (match_operand 2 "aarch64_tls_le_symref" "S")] -+ UNSPEC_GOTSMALLTLS))] -+ "" -+{ -+ enum machine_mode mode = GET_MODE (operands[0]); -+ emit_insn ((mode == DImode -+ ? gen_tlsle_small_di -+ : gen_tlsle_small_si) (operands[0], -+ operands[1], -+ operands[2])); -+ DONE; -+}) ++ *cost += ++ COSTS_N_INSNS (aarch64_address_cost (address, mode, ++ 0, speed)); ++ } + return true; + } + -+(define_insn "tlsle_small_" -+ [(set (match_operand:P 0 "register_operand" "=r") -+ (unspec:P [(match_operand:P 1 "register_operand" "r") -+ (match_operand 2 "aarch64_tls_le_symref" "S")] - UNSPEC_GOTSMALLTLS))] - "" -- "add\\t%0, %1, #%G2\;add\\t%0, %0, #%L2" -+ "add\\t%0, %1, #%G2\;add\\t%0, %0, #%L2" - [(set_attr "type" "alu_reg") - (set_attr "length" "8")] - ) - --(define_insn "tlsdesc_small" -- [(set (reg:DI R0_REGNUM) -- (unspec:DI [(match_operand:DI 0 "aarch64_valid_symref" "S")] -+(define_insn "tlsdesc_small_" -+ [(set (reg:PTR R0_REGNUM) -+ (unspec:PTR [(match_operand 0 "aarch64_valid_symref" "S")] - UNSPEC_TLSDESC)) - (clobber (reg:DI LR_REGNUM)) - (clobber (reg:CC CC_REGNUM)) - (clobber (match_scratch:DI 1 "=r"))] - "TARGET_TLS_DESC" -- "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\tx0, x0, %L0\;.tlsdesccall\\t%0\;blr\\t%1" -+ "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\t0, 0, %L0\;.tlsdesccall\\t%0\;blr\\t%1" - [(set_attr "type" "call") - (set_attr "length" "16")]) - -@@ -3636,6 +3821,67 @@ - DONE; - }) ++ if (speed) ++ *cost += extra_cost->alu.extend; + return false; -+;; Named patterns for stack smashing protection. -+(define_expand "stack_protect_set" -+ [(match_operand 0 "memory_operand") -+ (match_operand 1 "memory_operand")] -+ "" -+{ -+ enum machine_mode mode = GET_MODE (operands[0]); ++ case ASHIFT: ++ op0 = XEXP (x, 0); ++ op1 = XEXP (x, 1); + -+ emit_insn ((mode == DImode -+ ? gen_stack_protect_set_di -+ : gen_stack_protect_set_si) (operands[0], operands[1])); -+ DONE; -+}) ++ if (CONST_INT_P (op1)) ++ { ++ /* LSL (immediate), UBMF, UBFIZ and friends. These are all ++ aliases. */ ++ if (speed) ++ *cost += extra_cost->alu.shift; + -+(define_insn "stack_protect_set_" -+ [(set (match_operand:PTR 0 "memory_operand" "=m") -+ (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")] -+ UNSPEC_SP_SET)) -+ (set (match_scratch:PTR 2 "=&r") (const_int 0))] -+ "" -+ "ldr\\t%x2, %1\;str\\t%x2, %0\;mov\t%x2,0" -+ [(set_attr "length" "12") -+ (set_attr "type" "multiple")]) ++ /* We can incorporate zero/sign extend for free. */ ++ if (GET_CODE (op0) == ZERO_EXTEND ++ || GET_CODE (op0) == SIGN_EXTEND) ++ op0 = XEXP (op0, 0); + -+(define_expand "stack_protect_test" -+ [(match_operand 0 "memory_operand") -+ (match_operand 1 "memory_operand") -+ (match_operand 2)] -+ "" -+{ ++ *cost += rtx_cost (op0, ASHIFT, 0, speed); ++ return true; ++ } ++ else ++ { ++ /* LSLV. */ ++ if (speed) ++ *cost += extra_cost->alu.shift_reg; + -+ rtx result = gen_reg_rtx (Pmode); ++ return false; /* All arguments need to be in registers. */ ++ } + -+ enum machine_mode mode = GET_MODE (operands[0]); + case ROTATE: +- if (!CONST_INT_P (XEXP (x, 1))) +- *cost += COSTS_N_INSNS (2); +- /* Fall through. */ + case ROTATERT: + case LSHIFTRT: +- case ASHIFT: + case ASHIFTRT: ++ op0 = XEXP (x, 0); ++ op1 = XEXP (x, 1); + +- /* Shifting by a register often takes an extra cycle. */ +- if (speed && !CONST_INT_P (XEXP (x, 1))) +- *cost += extra_cost->alu.arith_shift_reg; ++ if (CONST_INT_P (op1)) ++ { ++ /* ASR (immediate) and friends. */ ++ if (speed) ++ *cost += extra_cost->alu.shift; + +- *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed); ++ *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed); ++ return true; ++ } ++ else ++ { + -+ emit_insn ((mode == DImode -+ ? gen_stack_protect_test_di -+ : gen_stack_protect_test_si) (result, -+ operands[0], -+ operands[1])); ++ /* ASR (register) and friends. */ ++ if (speed) ++ *cost += extra_cost->alu.shift_reg; + -+ if (mode == DImode) -+ emit_jump_insn (gen_cbranchdi4 (gen_rtx_EQ (VOIDmode, result, const0_rtx), -+ result, const0_rtx, operands[2])); -+ else -+ emit_jump_insn (gen_cbranchsi4 (gen_rtx_EQ (VOIDmode, result, const0_rtx), -+ result, const0_rtx, operands[2])); -+ DONE; -+}) ++ return false; /* All arguments need to be in registers. */ ++ } + -+(define_insn "stack_protect_test_" -+ [(set (match_operand:PTR 0 "register_operand") -+ (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m") -+ (match_operand:PTR 2 "memory_operand" "m")] -+ UNSPEC_SP_TEST)) -+ (clobber (match_scratch:PTR 3 "=&r"))] -+ "" -+ "ldr\t%x3, %x1\;ldr\t%x0, %x2\;eor\t%x0, %x3, %x0" -+ [(set_attr "length" "12") -+ (set_attr "type" "multiple")]) ++ case SYMBOL_REF: + - ;; AdvSIMD Stuff - (include "aarch64-simd.md") ++ if (aarch64_cmodel == AARCH64_CMODEL_LARGE) ++ { ++ /* LDR. */ ++ if (speed) ++ *cost += extra_cost->ldst.load; ++ } ++ else if (aarch64_cmodel == AARCH64_CMODEL_SMALL ++ || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC) ++ { ++ /* ADRP, followed by ADD. */ ++ *cost += COSTS_N_INSNS (1); ++ if (speed) ++ *cost += 2 * extra_cost->alu.arith; ++ } ++ else if (aarch64_cmodel == AARCH64_CMODEL_TINY ++ || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC) ++ { ++ /* ADR. */ ++ if (speed) ++ *cost += extra_cost->alu.arith; ++ } ++ ++ if (flag_pic) ++ { ++ /* One extra load instruction, after accessing the GOT. */ ++ *cost += COSTS_N_INSNS (1); ++ if (speed) ++ *cost += extra_cost->ldst.load; ++ } + return true; + + case HIGH: +- if (!CONSTANT_P (XEXP (x, 0))) +- *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed); +- return true; +- + case LO_SUM: +- if (!CONSTANT_P (XEXP (x, 1))) +- *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed); +- *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed); ++ /* ADRP/ADD (immediate). */ ++ if (speed) ++ *cost += extra_cost->alu.arith; + return true; ---- a/src/gcc/config/aarch64/aarch64-builtins.c -+++ b/src/gcc/config/aarch64/aarch64-builtins.c -@@ -147,8 +147,24 @@ - = { qualifier_unsigned, qualifier_unsigned }; - #define TYPES_UNOPU (aarch64_types_unopu_qualifiers) - #define TYPES_CREATE (aarch64_types_unop_qualifiers) --#define TYPES_REINTERP (aarch64_types_unop_qualifiers) -+#define TYPES_REINTERP_SS (aarch64_types_unop_qualifiers) - static enum aarch64_type_qualifiers -+aarch64_types_unop_su_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_none, qualifier_unsigned }; -+#define TYPES_REINTERP_SU (aarch64_types_unop_su_qualifiers) -+static enum aarch64_type_qualifiers -+aarch64_types_unop_sp_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_none, qualifier_poly }; -+#define TYPES_REINTERP_SP (aarch64_types_unop_sp_qualifiers) -+static enum aarch64_type_qualifiers -+aarch64_types_unop_us_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_unsigned, qualifier_none }; -+#define TYPES_REINTERP_US (aarch64_types_unop_us_qualifiers) -+static enum aarch64_type_qualifiers -+aarch64_types_unop_ps_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_poly, qualifier_none }; -+#define TYPES_REINTERP_PS (aarch64_types_unop_ps_qualifiers) -+static enum aarch64_type_qualifiers - aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_none, qualifier_none, qualifier_maybe_immediate }; - #define TYPES_BINOP (aarch64_types_binop_qualifiers) -@@ -230,6 +246,11 @@ - = { qualifier_void, qualifier_pointer_map_mode, qualifier_none }; - #define TYPES_STORE1 (aarch64_types_store1_qualifiers) - #define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers) -+static enum aarch64_type_qualifiers -+aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_void, qualifier_pointer_map_mode, -+ qualifier_none, qualifier_none }; -+#define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers) + case ZERO_EXTRACT: + case SIGN_EXTRACT: +- *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed); ++ /* UBFX/SBFX. */ ++ if (speed) ++ *cost += extra_cost->alu.bfx; ++ ++ /* We can trust that the immediates used will be correct (there ++ are no by-register forms), so we need only cost op0. */ ++ *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed); + return true; - #define CF0(N, X) CODE_FOR_aarch64_##N##X - #define CF1(N, X) CODE_FOR_##N##X##1 -@@ -311,6 +332,8 @@ - VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di) - #define BUILTIN_VDQF(T, N, MAP) \ - VAR3 (T, N, MAP, v2sf, v4sf, v2df) -+#define BUILTIN_VDQF_DF(T, N, MAP) \ -+ VAR4 (T, N, MAP, v2sf, v4sf, v2df, df) - #define BUILTIN_VDQH(T, N, MAP) \ - VAR2 (T, N, MAP, v4hi, v8hi) - #define BUILTIN_VDQHS(T, N, MAP) \ -@@ -1086,7 +1109,29 @@ + case MULT: +- op0 = XEXP (x, 0); +- op1 = XEXP (x, 1); ++ *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed); ++ /* aarch64_rtx_mult_cost always handles recursion to its ++ operands. */ ++ return true; - return aarch64_builtin_decls[builtin]; - } +- *cost = COSTS_N_INSNS (1); +- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) +- { +- if (CONST_INT_P (op1) +- && exact_log2 (INTVAL (op1)) > 0) +- { +- *cost += rtx_cost (op0, ASHIFT, 0, speed); +- return true; +- } - -+ case BUILT_IN_BSWAP16: -+#undef AARCH64_CHECK_BUILTIN_MODE -+#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ -+ (out_mode == N##Imode && out_n == C \ -+ && in_mode == N##Imode && in_n == C) -+ if (AARCH64_CHECK_BUILTIN_MODE (4, H)) -+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4hi]; -+ else if (AARCH64_CHECK_BUILTIN_MODE (8, H)) -+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv8hi]; -+ else -+ return NULL_TREE; -+ case BUILT_IN_BSWAP32: -+ if (AARCH64_CHECK_BUILTIN_MODE (2, S)) -+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2si]; -+ else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) -+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4si]; -+ else -+ return NULL_TREE; -+ case BUILT_IN_BSWAP64: -+ if (AARCH64_CHECK_BUILTIN_MODE (2, D)) -+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2di]; +- if ((GET_CODE (op0) == ZERO_EXTEND +- && GET_CODE (op1) == ZERO_EXTEND) +- || (GET_CODE (op0) == SIGN_EXTEND +- && GET_CODE (op1) == SIGN_EXTEND)) +- { +- *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed) +- + rtx_cost (XEXP (op1, 0), MULT, 1, speed)); +- if (speed) +- *cost += extra_cost->mult[GET_MODE (x) == DImode].extend; +- return true; +- } +- +- if (speed) +- *cost += extra_cost->mult[GET_MODE (x) == DImode].simple; +- } +- else if (speed) +- { +- if (GET_MODE (x) == DFmode) +- *cost += extra_cost->fp[1].mult; +- else if (GET_MODE (x) == SFmode) +- *cost += extra_cost->fp[0].mult; +- } +- +- return false; /* All arguments need to be in registers. */ +- + case MOD: + case UMOD: +- *cost = COSTS_N_INSNS (2); + if (speed) + { + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) +@@ -4800,53 +5644,179 @@ + + case DIV: + case UDIV: +- *cost = COSTS_N_INSNS (1); ++ case SQRT: + if (speed) + { +- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) +- *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv; +- else if (GET_MODE (x) == DFmode) +- *cost += extra_cost->fp[1].div; +- else if (GET_MODE (x) == SFmode) +- *cost += extra_cost->fp[0].div; ++ if (GET_MODE_CLASS (mode) == MODE_INT) ++ /* There is no integer SQRT, so only DIV and UDIV can get ++ here. */ ++ *cost += extra_cost->mult[mode == DImode].idiv; + else -+ return NULL_TREE; - default: - return NULL_TREE; - } -@@ -1127,6 +1172,25 @@ - return fold_build2 (NE_EXPR, type, and_node, vec_zero_node); - break; ++ *cost += extra_cost->fp[mode == DFmode].div; } -+ VAR1 (REINTERP_SS, reinterpretdi, 0, df) -+ VAR1 (REINTERP_SS, reinterpretv8qi, 0, df) -+ VAR1 (REINTERP_SS, reinterpretv4hi, 0, df) -+ VAR1 (REINTERP_SS, reinterpretv2si, 0, df) -+ VAR1 (REINTERP_SS, reinterpretv2sf, 0, df) -+ BUILTIN_VD (REINTERP_SS, reinterpretdf, 0) -+ BUILTIN_VD (REINTERP_SU, reinterpretdf, 0) -+ VAR1 (REINTERP_US, reinterpretdi, 0, df) -+ VAR1 (REINTERP_US, reinterpretv8qi, 0, df) -+ VAR1 (REINTERP_US, reinterpretv4hi, 0, df) -+ VAR1 (REINTERP_US, reinterpretv2si, 0, df) -+ VAR1 (REINTERP_US, reinterpretv2sf, 0, df) -+ BUILTIN_VD (REINTERP_SP, reinterpretdf, 0) -+ VAR1 (REINTERP_PS, reinterpretdi, 0, df) -+ VAR1 (REINTERP_PS, reinterpretv8qi, 0, df) -+ VAR1 (REINTERP_PS, reinterpretv4hi, 0, df) -+ VAR1 (REINTERP_PS, reinterpretv2si, 0, df) -+ VAR1 (REINTERP_PS, reinterpretv2sf, 0, df) -+ return fold_build1 (VIEW_CONVERT_EXPR, type, args[0]); - VAR1 (UNOP, floatv2si, 2, v2sf) - VAR1 (UNOP, floatv4si, 2, v4sf) - VAR1 (UNOP, floatv2di, 2, v2df) ---- a/src/gcc/config/aarch64/aarch64-protos.h -+++ b/src/gcc/config/aarch64/aarch64-protos.h -@@ -108,9 +108,22 @@ - cost models and vectors for address cost calculations, register - move costs and memory move costs. */ + return false; /* All arguments need to be in registers. */ -+/* Scaled addressing modes can vary cost depending on the mode of the -+ value to be loaded/stored. QImode values cannot use scaled -+ addressing modes. */ ++ case IF_THEN_ELSE: ++ return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1), ++ XEXP (x, 2), cost, speed); + -+struct scale_addr_mode_cost -+{ -+ const int hi; -+ const int si; -+ const int di; -+ const int ti; -+}; ++ case EQ: ++ case NE: ++ case GT: ++ case GTU: ++ case LT: ++ case LTU: ++ case GE: ++ case GEU: ++ case LE: ++ case LEU: + - /* Additional cost for addresses. */ - struct cpu_addrcost_table - { -+ const struct scale_addr_mode_cost addr_scale_costs; - const int pre_modify; - const int post_modify; - const int register_offset; -@@ -175,6 +188,8 @@ - bool aarch64_is_long_call_p (rtx); - bool aarch64_label_mentioned_p (rtx); - bool aarch64_legitimate_pic_operand_p (rtx); -+bool aarch64_modes_tieable_p (enum machine_mode mode1, -+ enum machine_mode mode2); - bool aarch64_move_imm (HOST_WIDE_INT, enum machine_mode); - bool aarch64_mov_operand_p (rtx, enum aarch64_symbol_context, - enum machine_mode); ---- a/src/gcc/config/aarch64/aarch64-simd-builtins.def -+++ b/src/gcc/config/aarch64/aarch64-simd-builtins.def -@@ -51,18 +51,29 @@ - VAR1 (GETLANE, get_lane, 0, di) - BUILTIN_VALL (GETLANE, be_checked_get_lane, 0) - -- BUILTIN_VD_RE (REINTERP, reinterpretdi, 0) -- BUILTIN_VDC (REINTERP, reinterpretv8qi, 0) -- BUILTIN_VDC (REINTERP, reinterpretv4hi, 0) -- BUILTIN_VDC (REINTERP, reinterpretv2si, 0) -- BUILTIN_VDC (REINTERP, reinterpretv2sf, 0) -- BUILTIN_VQ (REINTERP, reinterpretv16qi, 0) -- BUILTIN_VQ (REINTERP, reinterpretv8hi, 0) -- BUILTIN_VQ (REINTERP, reinterpretv4si, 0) -- BUILTIN_VQ (REINTERP, reinterpretv4sf, 0) -- BUILTIN_VQ (REINTERP, reinterpretv2di, 0) -- BUILTIN_VQ (REINTERP, reinterpretv2df, 0) -+ VAR1 (REINTERP_SS, reinterpretdi, 0, df) -+ VAR1 (REINTERP_SS, reinterpretv8qi, 0, df) -+ VAR1 (REINTERP_SS, reinterpretv4hi, 0, df) -+ VAR1 (REINTERP_SS, reinterpretv2si, 0, df) -+ VAR1 (REINTERP_SS, reinterpretv2sf, 0, df) -+ BUILTIN_VD (REINTERP_SS, reinterpretdf, 0) - -+ BUILTIN_VD (REINTERP_SU, reinterpretdf, 0) ++ return false; /* All arguments must be in registers. */ + -+ VAR1 (REINTERP_US, reinterpretdi, 0, df) -+ VAR1 (REINTERP_US, reinterpretv8qi, 0, df) -+ VAR1 (REINTERP_US, reinterpretv4hi, 0, df) -+ VAR1 (REINTERP_US, reinterpretv2si, 0, df) -+ VAR1 (REINTERP_US, reinterpretv2sf, 0, df) ++ case FMA: ++ op0 = XEXP (x, 0); ++ op1 = XEXP (x, 1); ++ op2 = XEXP (x, 2); ++ ++ if (speed) ++ *cost += extra_cost->fp[mode == DFmode].fma; + -+ BUILTIN_VD (REINTERP_SP, reinterpretdf, 0) ++ /* FMSUB, FNMADD, and FNMSUB are free. */ ++ if (GET_CODE (op0) == NEG) ++ op0 = XEXP (op0, 0); + -+ VAR1 (REINTERP_PS, reinterpretdi, 0, df) -+ VAR1 (REINTERP_PS, reinterpretv8qi, 0, df) -+ VAR1 (REINTERP_PS, reinterpretv4hi, 0, df) -+ VAR1 (REINTERP_PS, reinterpretv2si, 0, df) -+ VAR1 (REINTERP_PS, reinterpretv2sf, 0, df) ++ if (GET_CODE (op2) == NEG) ++ op2 = XEXP (op2, 0); + - BUILTIN_VDQ_I (BINOP, dup_lane, 0) - /* Implemented by aarch64_qshl. */ - BUILTIN_VSDQ_I (BINOP, sqshl, 0) -@@ -107,6 +118,10 @@ - BUILTIN_VQ (STORESTRUCT, st3, 0) - BUILTIN_VQ (STORESTRUCT, st4, 0) - -+ BUILTIN_VQ (STORESTRUCT_LANE, st2_lane, 0) -+ BUILTIN_VQ (STORESTRUCT_LANE, st3_lane, 0) -+ BUILTIN_VQ (STORESTRUCT_LANE, st4_lane, 0) ++ /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1, ++ and the by-element operand as operand 0. */ ++ if (GET_CODE (op1) == NEG) ++ op1 = XEXP (op1, 0); + - BUILTIN_VQW (BINOP, saddl2, 0) - BUILTIN_VQW (BINOP, uaddl2, 0) - BUILTIN_VQW (BINOP, ssubl2, 0) -@@ -142,8 +157,8 @@ - BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0) - BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0) - /* Implemented by aarch64_s. */ -- BUILTIN_VSDQ_I_BHSI (UNOP, sqabs, 0) -- BUILTIN_VSDQ_I_BHSI (UNOP, sqneg, 0) -+ BUILTIN_VSDQ_I (UNOP, sqabs, 0) -+ BUILTIN_VSDQ_I (UNOP, sqneg, 0) - - BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane, 0) - BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane, 0) -@@ -265,7 +280,7 @@ - BUILTIN_VDQF (UNOP, nearbyint, 2) - BUILTIN_VDQF (UNOP, rint, 2) - BUILTIN_VDQF (UNOP, round, 2) -- BUILTIN_VDQF (UNOP, frintn, 2) -+ BUILTIN_VDQF_DF (UNOP, frintn, 2) - - /* Implemented by l2. */ - VAR1 (UNOP, lbtruncv2sf, 2, v2si) -@@ -330,6 +345,8 @@ - VAR1 (UNOP, floatunsv4si, 2, v4sf) - VAR1 (UNOP, floatunsv2di, 2, v2df) - -+ VAR5 (UNOPU, bswap, 10, v4hi, v8hi, v2si, v4si, v2di) ++ /* Catch vector-by-element operations. The by-element operand can ++ either be (vec_duplicate (vec_select (x))) or just ++ (vec_select (x)), depending on whether we are multiplying by ++ a vector or a scalar. + - /* Implemented by - aarch64_. */ - BUILTIN_VALL (BINOP, zip1, 0) ---- a/src/gcc/config/aarch64/aarch64.c -+++ b/src/gcc/config/aarch64/aarch64.c -@@ -63,6 +63,7 @@ - #include "cfgloop.h" - #include "tree-vectorizer.h" - #include "config/arm/aarch-cost-tables.h" -+#include "dumpfile.h" - - /* Defined for convenience. */ - #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT) -@@ -141,6 +142,7 @@ - - static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode, - const unsigned char *sel); -+static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool); - - /* The processor for which instructions should be scheduled. */ - enum aarch64_processor aarch64_tune = cortexa53; -@@ -171,6 +173,15 @@ - #endif - static const struct cpu_addrcost_table generic_addrcost_table = - { -+#if HAVE_DESIGNATED_INITIALIZERS -+ .addr_scale_costs = -+#endif -+ { -+ NAMED_PARAM (qi, 0), -+ NAMED_PARAM (hi, 0), -+ NAMED_PARAM (si, 0), -+ NAMED_PARAM (ti, 0), -+ }, - NAMED_PARAM (pre_modify, 0), - NAMED_PARAM (post_modify, 0), - NAMED_PARAM (register_offset, 0), -@@ -181,6 +192,27 @@ - #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 - __extension__ - #endif -+static const struct cpu_addrcost_table cortexa57_addrcost_table = -+{ -+#if HAVE_DESIGNATED_INITIALIZERS -+ .addr_scale_costs = -+#endif -+ { -+ NAMED_PARAM (qi, 0), -+ NAMED_PARAM (hi, 1), -+ NAMED_PARAM (si, 0), -+ NAMED_PARAM (ti, 1), -+ }, -+ NAMED_PARAM (pre_modify, 0), -+ NAMED_PARAM (post_modify, 0), -+ NAMED_PARAM (register_offset, 0), -+ NAMED_PARAM (register_extend, 0), -+ NAMED_PARAM (imm_offset, 0), -+}; ++ Canonicalization is not very good in these cases, FMA4 will put the ++ by-element operand as operand 0, FNMA4 will have it as operand 1. */ ++ if (GET_CODE (op0) == VEC_DUPLICATE) ++ op0 = XEXP (op0, 0); ++ else if (GET_CODE (op1) == VEC_DUPLICATE) ++ op1 = XEXP (op1, 0); + -+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 -+__extension__ -+#endif - static const struct cpu_regmove_cost generic_regmove_cost = - { - NAMED_PARAM (GP2GP, 1), -@@ -212,9 +244,29 @@ - NAMED_PARAM (cond_not_taken_branch_cost, 1) - }; - -+/* Generic costs for vector insn classes. */ - #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 - __extension__ - #endif -+static const struct cpu_vector_cost cortexa57_vector_cost = -+{ -+ NAMED_PARAM (scalar_stmt_cost, 1), -+ NAMED_PARAM (scalar_load_cost, 4), -+ NAMED_PARAM (scalar_store_cost, 1), -+ NAMED_PARAM (vec_stmt_cost, 3), -+ NAMED_PARAM (vec_to_scalar_cost, 8), -+ NAMED_PARAM (scalar_to_vec_cost, 8), -+ NAMED_PARAM (vec_align_load_cost, 5), -+ NAMED_PARAM (vec_unalign_load_cost, 5), -+ NAMED_PARAM (vec_unalign_store_cost, 1), -+ NAMED_PARAM (vec_store_cost, 1), -+ NAMED_PARAM (cond_taken_branch_cost, 1), -+ NAMED_PARAM (cond_not_taken_branch_cost, 1) -+}; ++ if (GET_CODE (op0) == VEC_SELECT) ++ op0 = XEXP (op0, 0); ++ else if (GET_CODE (op1) == VEC_SELECT) ++ op1 = XEXP (op1, 0); + -+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 -+__extension__ -+#endif - static const struct tune_params generic_tunings = - { - &cortexa57_extra_costs, -@@ -238,9 +290,9 @@ - static const struct tune_params cortexa57_tunings = - { - &cortexa57_extra_costs, -- &generic_addrcost_table, -+ &cortexa57_addrcost_table, - &generic_regmove_cost, -- &generic_vector_cost, -+ &cortexa57_vector_cost, - NAMED_PARAM (memmov_cost, 4), - NAMED_PARAM (issue_rate, 3) - }; -@@ -444,7 +496,7 @@ - represent an expression that matches an extend operation. The - operands represent the paramters from - -- (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */ -+ (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */ - bool - aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm, - rtx extract_imm) -@@ -636,12 +688,24 @@ - - case SYMBOL_SMALL_TLSDESC: - { -- rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM); -+ enum machine_mode mode = GET_MODE (dest); -+ rtx x0 = gen_rtx_REG (mode, R0_REGNUM); - rtx tp; - -- emit_insn (gen_tlsdesc_small (imm)); -+ gcc_assert (mode == Pmode || mode == ptr_mode); ++ /* If the remaining parameters are not registers, ++ get the cost to put them into registers. */ ++ *cost += rtx_cost (op0, FMA, 0, speed); ++ *cost += rtx_cost (op1, FMA, 1, speed); ++ *cost += rtx_cost (op2, FMA, 2, speed); ++ return true; + -+ /* In ILP32, the got entry is always of SImode size. Unlike -+ small GOT, the dest is fixed at reg 0. */ -+ if (TARGET_ILP32) -+ emit_insn (gen_tlsdesc_small_si (imm)); -+ else -+ emit_insn (gen_tlsdesc_small_di (imm)); - tp = aarch64_load_tp (NULL); -- emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0))); ++ case FLOAT_EXTEND: ++ if (speed) ++ *cost += extra_cost->fp[mode == DFmode].widen; ++ return false; + -+ if (mode != Pmode) -+ tp = gen_lowpart (mode, tp); ++ case FLOAT_TRUNCATE: ++ if (speed) ++ *cost += extra_cost->fp[mode == DFmode].narrow; ++ return false; + -+ emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0))); - set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); - return; - } -@@ -648,10 +712,34 @@ - - case SYMBOL_SMALL_GOTTPREL: - { -- rtx tmp_reg = gen_reg_rtx (Pmode); -+ /* In ILP32, the mode of dest can be either SImode or DImode, -+ while the got entry is always of SImode size. The mode of -+ dest depends on how dest is used: if dest is assigned to a -+ pointer (e.g. in the memory), it has SImode; it may have -+ DImode if dest is dereferenced to access the memeory. -+ This is why we have to handle three different tlsie_small -+ patterns here (two patterns for ILP32). */ -+ enum machine_mode mode = GET_MODE (dest); -+ rtx tmp_reg = gen_reg_rtx (mode); - rtx tp = aarch64_load_tp (NULL); -- emit_insn (gen_tlsie_small (tmp_reg, imm)); -- emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg))); ++ case ABS: ++ if (GET_MODE_CLASS (mode) == MODE_FLOAT) ++ { ++ /* FABS and FNEG are analogous. */ ++ if (speed) ++ *cost += extra_cost->fp[mode == DFmode].neg; ++ } ++ else ++ { ++ /* Integer ABS will either be split to ++ two arithmetic instructions, or will be an ABS ++ (scalar), which we don't model. */ ++ *cost = COSTS_N_INSNS (2); ++ if (speed) ++ *cost += 2 * extra_cost->alu.arith; ++ } ++ return false; ++ ++ case SMAX: ++ case SMIN: ++ if (speed) ++ { ++ /* FMAXNM/FMINNM/FMAX/FMIN. ++ TODO: This may not be accurate for all implementations, but ++ we do not model this in the cost tables. */ ++ *cost += extra_cost->fp[mode == DFmode].addsub; ++ } ++ return false; + -+ if (mode == ptr_mode) -+ { -+ if (mode == DImode) -+ emit_insn (gen_tlsie_small_di (tmp_reg, imm)); -+ else -+ { -+ emit_insn (gen_tlsie_small_si (tmp_reg, imm)); -+ tp = gen_lowpart (mode, tp); -+ } -+ } -+ else -+ { -+ gcc_assert (mode == Pmode); -+ emit_insn (gen_tlsie_small_sidi (tmp_reg, imm)); -+ } ++ case TRUNCATE: + -+ emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg))); - set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); - return; - } -@@ -1716,8 +1804,6 @@ - if (reload_completed && cfun->machine->frame.laid_out) - return; - -- cfun->machine->frame.fp_lr_offset = 0; -- - /* First mark all the registers that really need to be saved... */ - for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) - cfun->machine->frame.reg_offset[regno] = -1; -@@ -1767,7 +1853,6 @@ - { - cfun->machine->frame.reg_offset[R29_REGNUM] = offset; - offset += UNITS_PER_WORD; -- cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD; - } - - if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1) -@@ -1774,7 +1859,6 @@ - { - cfun->machine->frame.reg_offset[R30_REGNUM] = offset; - offset += UNITS_PER_WORD; -- cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD; - } - - cfun->machine->frame.padding0 = -@@ -2449,12 +2533,22 @@ - - 2 * UNITS_PER_WORD)); - } - --/* Output code to build up a constant in a register. */ --static void --aarch64_build_constant (int regnum, HOST_WIDE_INT val) -+/* Possibly output code to build up a constant in a register. For -+ the benefit of the costs infrastructure, returns the number of -+ instructions which would be emitted. GENERATE inhibits or -+ enables code generation. */ ++ /* Decompose muldi3_highpart. */ ++ if (/* (truncate:DI */ ++ mode == DImode ++ /* (lshiftrt:TI */ ++ && GET_MODE (XEXP (x, 0)) == TImode ++ && GET_CODE (XEXP (x, 0)) == LSHIFTRT ++ /* (mult:TI */ ++ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT ++ /* (ANY_EXTEND:TI (reg:DI)) ++ (ANY_EXTEND:TI (reg:DI))) */ ++ && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND ++ && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND) ++ || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND ++ && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)) ++ && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode ++ && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode ++ /* (const_int 64) */ ++ && CONST_INT_P (XEXP (XEXP (x, 0), 1)) ++ && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64) ++ { ++ /* UMULH/SMULH. */ ++ if (speed) ++ *cost += extra_cost->mult[mode == DImode].extend; ++ *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0), ++ MULT, 0, speed); ++ *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0), ++ MULT, 1, speed); ++ return true; ++ } + -+static int -+aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate) - { -+ int insns = 0; ++ /* Fall through. */ + default: +- break; ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, ++ "\nFailed to cost RTX. Assuming default cost.\n"); + - if (aarch64_bitmask_imm (val, DImode)) -- emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val)); -+ { -+ if (generate) -+ emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val)); -+ insns = 1; -+ } - else - { - int i; -@@ -2485,15 +2579,19 @@ - the same. */ - if (ncount < zcount) - { -- emit_move_insn (gen_rtx_REG (Pmode, regnum), -- GEN_INT (val | ~(HOST_WIDE_INT) 0xffff)); -+ if (generate) -+ emit_move_insn (gen_rtx_REG (Pmode, regnum), -+ GEN_INT (val | ~(HOST_WIDE_INT) 0xffff)); - tval = 0xffff; -+ insns++; - } - else - { -- emit_move_insn (gen_rtx_REG (Pmode, regnum), -- GEN_INT (val & 0xffff)); -+ if (generate) -+ emit_move_insn (gen_rtx_REG (Pmode, regnum), -+ GEN_INT (val & 0xffff)); - tval = 0; -+ insns++; - } - - val >>= 16; -@@ -2501,11 +2599,17 @@ - for (i = 16; i < 64; i += 16) - { - if ((val & 0xffff) != tval) -- emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum), -- GEN_INT (i), GEN_INT (val & 0xffff))); -+ { -+ if (generate) -+ emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum), -+ GEN_INT (i), -+ GEN_INT (val & 0xffff))); -+ insns++; -+ } - val >>= 16; - } - } -+ return insns; - } - - static void -@@ -2520,7 +2624,7 @@ - - if (mdelta >= 4096 * 4096) - { -- aarch64_build_constant (scratchreg, delta); -+ (void) aarch64_build_constant (scratchreg, delta, true); - emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx)); ++ return true; } - else if (mdelta > 0) -@@ -2594,7 +2698,7 @@ - addr = plus_constant (Pmode, temp0, vcall_offset); - else - { -- aarch64_build_constant (IP1_REGNUM, vcall_offset); -+ (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true); - addr = gen_rtx_PLUS (Pmode, temp0, temp1); - } - -@@ -3046,11 +3150,11 @@ - enum rtx_code code = GET_CODE (x); - rtx op0, op1; - bool allow_reg_index_p = -- outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16; -- -+ outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16 -+ || aarch64_vector_mode_supported_p (mode)); - /* Don't support anything other than POST_INC or REG addressing for - AdvSIMD. */ -- if (aarch64_vector_mode_p (mode) -+ if (aarch64_vect_struct_mode_p (mode) - && (code != POST_INC && code != REG)) - return false; - -@@ -3839,34 +3943,34 @@ - if (addr.offset == const0_rtx) - asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]); - else -- asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)], - INTVAL (addr.offset)); - return; - - case ADDRESS_REG_REG: - if (addr.shift == 0) -- asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)], - reg_names [REGNO (addr.offset)]); - else -- asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)], - reg_names [REGNO (addr.offset)], addr.shift); - return; - - case ADDRESS_REG_UXTW: - if (addr.shift == 0) -- asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)], - REGNO (addr.offset) - R0_REGNUM); - else -- asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)], - REGNO (addr.offset) - R0_REGNUM, addr.shift); - return; - - case ADDRESS_REG_SXTW: - if (addr.shift == 0) -- asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)], - REGNO (addr.offset) - R0_REGNUM); - else -- asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)], - REGNO (addr.offset) - R0_REGNUM, addr.shift); - return; - -@@ -3874,27 +3978,27 @@ - switch (GET_CODE (x)) - { - case PRE_INC: -- asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)], - GET_MODE_SIZE (aarch64_memory_reference_mode)); - return; - case POST_INC: -- asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)], - GET_MODE_SIZE (aarch64_memory_reference_mode)); - return; - case PRE_DEC: -- asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)], - GET_MODE_SIZE (aarch64_memory_reference_mode)); - return; - case POST_DEC: -- asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)], - GET_MODE_SIZE (aarch64_memory_reference_mode)); - return; - case PRE_MODIFY: -- asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)], - INTVAL (addr.offset)); - return; - case POST_MODIFY: -- asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)], -+ asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)], - INTVAL (addr.offset)); - return; - default: -@@ -3903,7 +4007,7 @@ - break; + return false; + } - case ADDRESS_LO_SUM: -- asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]); -+ asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]); - output_addr_const (f, addr.offset); - asm_fprintf (f, "]"); - return; -@@ -3980,8 +4084,8 @@ +-static int +-aarch64_address_cost (rtx x ATTRIBUTE_UNUSED, +- enum machine_mode mode ATTRIBUTE_UNUSED, +- addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED) ++/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost ++ calculated for X. This cost is stored in *COST. Returns true ++ if the total cost of X was calculated. */ ++static bool ++aarch64_rtx_costs_wrapper (rtx x, int code, int outer, ++ int param, int *cost, bool speed) { - rtx x = *x_p; +- enum rtx_code c = GET_CODE (x); +- const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost; ++ bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed); -- /* Do not allow mem (plus (reg, const)) if vector mode. */ -- if (aarch64_vector_mode_p (mode) -+ /* Do not allow mem (plus (reg, const)) if vector struct mode. */ -+ if (aarch64_vect_struct_mode_p (mode) - && GET_CODE (x) == PLUS - && REG_P (XEXP (x, 0)) - && CONST_INT_P (XEXP (x, 1))) -@@ -4150,32 +4254,31 @@ - + crtl->outgoing_args_size - + cfun->machine->saved_varargs_size); +- if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY) +- return addr_cost->pre_modify; +- +- if (c == POST_INC || c == POST_DEC || c == POST_MODIFY) +- return addr_cost->post_modify; +- +- if (c == PLUS) ++ if (dump_file && (dump_flags & TDF_DETAILS)) + { +- if (GET_CODE (XEXP (x, 1)) == CONST_INT) +- return addr_cost->imm_offset; +- else if (GET_CODE (XEXP (x, 0)) == MULT +- || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND +- || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND) +- return addr_cost->register_extend; +- +- return addr_cost->register_offset; ++ print_rtl_single (dump_file, x); ++ fprintf (dump_file, "\n%s cost: %d (%s)\n", ++ speed ? "Hot" : "Cold", ++ *cost, result ? "final" : "partial"); + } +- else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF) +- return addr_cost->imm_offset; -- frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT); -- offset = frame_size; -+ frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT); -+ offset = frame_size; +- return 0; ++ return result; + } -- if (to == HARD_FRAME_POINTER_REGNUM) -- { -- if (from == ARG_POINTER_REGNUM) -- return offset - crtl->outgoing_args_size; -+ if (to == HARD_FRAME_POINTER_REGNUM) -+ { -+ if (from == ARG_POINTER_REGNUM) -+ return offset - crtl->outgoing_args_size; + static int +@@ -8038,7 +9008,145 @@ + return true; + } -- if (from == FRAME_POINTER_REGNUM) -- return cfun->machine->frame.saved_regs_size + get_frame_size (); -- } -+ if (from == FRAME_POINTER_REGNUM) -+ return cfun->machine->frame.saved_regs_size + get_frame_size (); ++/* Recognize patterns for the EXT insn. */ ++ + static bool ++aarch64_evpc_ext (struct expand_vec_perm_d *d) ++{ ++ unsigned int i, nelt = d->nelt; ++ rtx (*gen) (rtx, rtx, rtx, rtx); ++ rtx offset; ++ ++ unsigned int location = d->perm[0]; /* Always < nelt. */ ++ ++ /* Check if the extracted indices are increasing by one. */ ++ for (i = 1; i < nelt; i++) ++ { ++ unsigned int required = location + i; ++ if (d->one_vector_p) ++ { ++ /* We'll pass the same vector in twice, so allow indices to wrap. */ ++ required &= (nelt - 1); ++ } ++ if (d->perm[i] != required) ++ return false; + } - -- if (to == STACK_POINTER_REGNUM) -- { -- if (from == FRAME_POINTER_REGNUM) -- { -- HOST_WIDE_INT elim = crtl->outgoing_args_size -- + cfun->machine->frame.saved_regs_size -- + get_frame_size () -- - cfun->machine->frame.fp_lr_offset; -- elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT); -- return elim; -- } -- } -+ if (to == STACK_POINTER_REGNUM) ++ ++ switch (d->vmode) + { -+ if (from == FRAME_POINTER_REGNUM) ++ case V16QImode: gen = gen_aarch64_extv16qi; break; ++ case V8QImode: gen = gen_aarch64_extv8qi; break; ++ case V4HImode: gen = gen_aarch64_extv4hi; break; ++ case V8HImode: gen = gen_aarch64_extv8hi; break; ++ case V2SImode: gen = gen_aarch64_extv2si; break; ++ case V4SImode: gen = gen_aarch64_extv4si; break; ++ case V2SFmode: gen = gen_aarch64_extv2sf; break; ++ case V4SFmode: gen = gen_aarch64_extv4sf; break; ++ case V2DImode: gen = gen_aarch64_extv2di; break; ++ case V2DFmode: gen = gen_aarch64_extv2df; break; ++ default: ++ return false; ++ } ++ ++ /* Success! */ ++ if (d->testing_p) ++ return true; ++ ++ /* The case where (location == 0) is a no-op for both big- and little-endian, ++ and is removed by the mid-end at optimization levels -O1 and higher. */ ++ ++ if (BYTES_BIG_ENDIAN && (location != 0)) ++ { ++ /* After setup, we want the high elements of the first vector (stored ++ at the LSB end of the register), and the low elements of the second ++ vector (stored at the MSB end of the register). So swap. */ ++ rtx temp = d->op0; ++ d->op0 = d->op1; ++ d->op1 = temp; ++ /* location != 0 (above), so safe to assume (nelt - location) < nelt. */ ++ location = nelt - location; ++ } ++ ++ offset = GEN_INT (location); ++ emit_insn (gen (d->target, d->op0, d->op1, offset)); ++ return true; ++} ++ ++/* Recognize patterns for the REV insns. */ ++ ++static bool ++aarch64_evpc_rev (struct expand_vec_perm_d *d) ++{ ++ unsigned int i, j, diff, nelt = d->nelt; ++ rtx (*gen) (rtx, rtx); ++ ++ if (!d->one_vector_p) ++ return false; ++ ++ diff = d->perm[0]; ++ switch (diff) ++ { ++ case 7: ++ switch (d->vmode) + { -+ HOST_WIDE_INT elim = crtl->outgoing_args_size -+ + cfun->machine->frame.saved_regs_size -+ + get_frame_size (); -+ elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT); -+ return elim; ++ case V16QImode: gen = gen_aarch64_rev64v16qi; break; ++ case V8QImode: gen = gen_aarch64_rev64v8qi; break; ++ default: ++ return false; ++ } ++ break; ++ case 3: ++ switch (d->vmode) ++ { ++ case V16QImode: gen = gen_aarch64_rev32v16qi; break; ++ case V8QImode: gen = gen_aarch64_rev32v8qi; break; ++ case V8HImode: gen = gen_aarch64_rev64v8hi; break; ++ case V4HImode: gen = gen_aarch64_rev64v4hi; break; ++ default: ++ return false; ++ } ++ break; ++ case 1: ++ switch (d->vmode) ++ { ++ case V16QImode: gen = gen_aarch64_rev16v16qi; break; ++ case V8QImode: gen = gen_aarch64_rev16v8qi; break; ++ case V8HImode: gen = gen_aarch64_rev32v8hi; break; ++ case V4HImode: gen = gen_aarch64_rev32v4hi; break; ++ case V4SImode: gen = gen_aarch64_rev64v4si; break; ++ case V2SImode: gen = gen_aarch64_rev64v2si; break; ++ case V4SFmode: gen = gen_aarch64_rev64v4sf; break; ++ case V2SFmode: gen = gen_aarch64_rev64v2sf; break; ++ default: ++ return false; + } ++ break; ++ default: ++ return false; + } - -- return offset; -+ return offset; - } - - -@@ -4443,9 +4546,13 @@ ++ ++ for (i = 0; i < nelt ; i += diff + 1) ++ for (j = 0; j <= diff; j += 1) ++ { ++ /* This is guaranteed to be true as the value of diff ++ is 7, 3, 1 and we should have enough elements in the ++ queue to generate this. Getting a vector mask with a ++ value of diff other than these values implies that ++ something is wrong by the time we get here. */ ++ gcc_assert (i + j < nelt); ++ if (d->perm[i + j] != i + diff - j) ++ return false; ++ } ++ ++ /* Success! */ ++ if (d->testing_p) ++ return true; ++ ++ emit_insn (gen (d->target, d->op0)); ++ return true; ++} ++ ++static bool + aarch64_evpc_dup (struct expand_vec_perm_d *d) { - rtx op = x; + rtx (*gen) (rtx, rtx, rtx); +@@ -8094,11 +9202,6 @@ + enum machine_mode vmode = d->vmode; + unsigned int i, nelt = d->nelt; -+ /* We accept both ROTATERT and ROTATE: since the RHS must be a constant -+ we can convert both to ROR during final output. */ - if ((GET_CODE (op) == ASHIFT - || GET_CODE (op) == ASHIFTRT -- || GET_CODE (op) == LSHIFTRT) -+ || GET_CODE (op) == LSHIFTRT -+ || GET_CODE (op) == ROTATERT -+ || GET_CODE (op) == ROTATE) - && CONST_INT_P (XEXP (op, 1))) - return XEXP (op, 0); +- /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's +- numbering of elements for big-endian, we must reverse the order. */ +- if (BYTES_BIG_ENDIAN) +- return false; +- + if (d->testing_p) + return true; -@@ -4457,12 +4564,12 @@ - return x; - } +@@ -8109,7 +9212,15 @@ + return false; --/* Helper function for rtx cost calculation. Strip a shift or extend -+/* Helper function for rtx cost calculation. Strip an extend - expression from X. Returns the inner operand if successful, or the - original expression on failure. We deal with a number of possible - canonicalization variations here. */ - static rtx --aarch64_strip_shift_or_extend (rtx x) -+aarch64_strip_extend (rtx x) - { - rtx op = x; + for (i = 0; i < nelt; ++i) +- rperm[i] = GEN_INT (d->perm[i]); ++ { ++ int nunits = GET_MODE_NUNITS (vmode); ++ ++ /* If big-endian and two vectors we end up with a weird mixed-endian ++ mode on NEON. Reverse the index within each word but not the word ++ itself. */ ++ rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1) ++ : d->perm[i]); ++ } + sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm)); + sel = force_reg (vmode, sel); -@@ -4469,6 +4576,7 @@ - /* Zero and sign extraction of a widened value. */ - if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT) - && XEXP (op, 2) == const0_rtx -+ && GET_CODE (XEXP (op, 0)) == MULT - && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1), - XEXP (op, 1))) - return XEXP (XEXP (op, 0), 0); -@@ -4497,9 +4605,316 @@ - if (op != x) - return op; +@@ -8138,8 +9249,12 @@ -- return aarch64_strip_shift (x); -+ return x; + if (TARGET_SIMD) + { +- if (aarch64_evpc_zip (d)) ++ if (aarch64_evpc_rev (d)) + return true; ++ else if (aarch64_evpc_ext (d)) ++ return true; ++ else if (aarch64_evpc_zip (d)) ++ return true; + else if (aarch64_evpc_uzp (d)) + return true; + else if (aarch64_evpc_trn (d)) +@@ -8264,7 +9379,8 @@ + /* Limited combinations of subregs are safe on FPREGs. Particularly, + 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed. + 2. Scalar to Scalar for integer modes or same size float modes. +- 3. Vector to Vector modes. */ ++ 3. Vector to Vector modes. ++ 4. On little-endian only, Vector-Structure to Vector modes. */ + if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to)) + { + if (aarch64_vector_mode_supported_p (from) +@@ -8280,11 +9396,199 @@ + if (aarch64_vector_mode_supported_p (from) + && aarch64_vector_mode_supported_p (to)) + return false; ++ ++ /* Within an vector structure straddling multiple vector registers ++ we are in a mixed-endian representation. As such, we can't ++ easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can ++ switch between vectors and vector structures cheaply. */ ++ if (!BYTES_BIG_ENDIAN) ++ if ((aarch64_vector_mode_supported_p (from) ++ && aarch64_vect_struct_mode_p (to)) ++ || (aarch64_vector_mode_supported_p (to) ++ && aarch64_vect_struct_mode_p (from))) ++ return false; + } + + return true; } -+/* Helper function for rtx cost calculation. Calculate the cost of -+ a MULT, which may be part of a multiply-accumulate rtx. Return -+ the calculated cost of the expression, recursing manually in to -+ operands where needed. */ ++/* Implement MODES_TIEABLE_P. */ ++ ++bool ++aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) ++{ ++ if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)) ++ return true; ++ ++ /* We specifically want to allow elements of "structure" modes to ++ be tieable to the structure. This more general condition allows ++ other rarer situations too. */ ++ if (TARGET_SIMD ++ && aarch64_vector_mode_p (mode1) ++ && aarch64_vector_mode_p (mode2)) ++ return true; ++ ++ return false; ++} + -+static int -+aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed) ++/* Return a new RTX holding the result of moving POINTER forward by ++ AMOUNT bytes. */ ++ ++static rtx ++aarch64_move_pointer (rtx pointer, int amount) +{ -+ rtx op0, op1; -+ const struct cpu_cost_table *extra_cost -+ = aarch64_tune_params->insn_extra_cost; -+ int cost = 0; -+ bool maybe_fma = (outer == PLUS || outer == MINUS); -+ enum machine_mode mode = GET_MODE (x); ++ rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount); + -+ gcc_checking_assert (code == MULT); ++ return adjust_automodify_address (pointer, GET_MODE (pointer), ++ next, amount); ++} + -+ op0 = XEXP (x, 0); -+ op1 = XEXP (x, 1); ++/* Return a new RTX holding the result of moving POINTER forward by the ++ size of the mode it points to. */ + -+ if (VECTOR_MODE_P (mode)) -+ mode = GET_MODE_INNER (mode); ++static rtx ++aarch64_progress_pointer (rtx pointer) ++{ ++ HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer)); + -+ /* Integer multiply/fma. */ -+ if (GET_MODE_CLASS (mode) == MODE_INT) -+ { -+ /* The multiply will be canonicalized as a shift, cost it as such. */ -+ if (CONST_INT_P (op1) -+ && exact_log2 (INTVAL (op1)) > 0) -+ { -+ if (speed) -+ { -+ if (maybe_fma) -+ /* ADD (shifted register). */ -+ cost += extra_cost->alu.arith_shift; -+ else -+ /* LSL (immediate). */ -+ cost += extra_cost->alu.shift; -+ } ++ return aarch64_move_pointer (pointer, amount); ++} + -+ cost += rtx_cost (op0, GET_CODE (op0), 0, speed); ++/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by ++ MODE bytes. */ + -+ return cost; -+ } ++static void ++aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst, ++ enum machine_mode mode) ++{ ++ rtx reg = gen_reg_rtx (mode); + -+ /* Integer multiplies or FMAs have zero/sign extending variants. */ -+ if ((GET_CODE (op0) == ZERO_EXTEND -+ && GET_CODE (op1) == ZERO_EXTEND) -+ || (GET_CODE (op0) == SIGN_EXTEND -+ && GET_CODE (op1) == SIGN_EXTEND)) -+ { -+ cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed) -+ + rtx_cost (XEXP (op1, 0), MULT, 1, speed); ++ /* "Cast" the pointers to the correct mode. */ ++ *src = adjust_address (*src, mode, 0); ++ *dst = adjust_address (*dst, mode, 0); ++ /* Emit the memcpy. */ ++ emit_move_insn (reg, *src); ++ emit_move_insn (*dst, reg); ++ /* Move the pointers forward. */ ++ *src = aarch64_progress_pointer (*src); ++ *dst = aarch64_progress_pointer (*dst); ++} + -+ if (speed) -+ { -+ if (maybe_fma) -+ /* MADD/SMADDL/UMADDL. */ -+ cost += extra_cost->mult[0].extend_add; -+ else -+ /* MUL/SMULL/UMULL. */ -+ cost += extra_cost->mult[0].extend; -+ } ++/* Expand movmem, as if from a __builtin_memcpy. Return true if ++ we succeed, otherwise return false. */ + -+ return cost; -+ } ++bool ++aarch64_expand_movmem (rtx *operands) ++{ ++ unsigned int n; ++ rtx dst = operands[0]; ++ rtx src = operands[1]; ++ rtx base; ++ bool speed_p = !optimize_function_for_size_p (cfun); + -+ /* This is either an integer multiply or an FMA. In both cases -+ we want to recurse and cost the operands. */ -+ cost += rtx_cost (op0, MULT, 0, speed) -+ + rtx_cost (op1, MULT, 1, speed); ++ /* When optimizing for size, give a better estimate of the length of a ++ memcpy call, but use the default otherwise. */ ++ unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2; + -+ if (speed) ++ /* We can't do anything smart if the amount to copy is not constant. */ ++ if (!CONST_INT_P (operands[2])) ++ return false; ++ ++ n = UINTVAL (operands[2]); ++ ++ /* Try to keep the number of instructions low. For cases below 16 bytes we ++ need to make at most two moves. For cases above 16 bytes it will be one ++ move for each 16 byte chunk, then at most two additional moves. */ ++ if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions) ++ return false; ++ ++ base = copy_to_mode_reg (Pmode, XEXP (dst, 0)); ++ dst = adjust_automodify_address (dst, VOIDmode, base, 0); ++ ++ base = copy_to_mode_reg (Pmode, XEXP (src, 0)); ++ src = adjust_automodify_address (src, VOIDmode, base, 0); ++ ++ /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a ++ 1-byte chunk. */ ++ if (n < 4) ++ { ++ if (n >= 2) + { -+ if (maybe_fma) -+ /* MADD. */ -+ cost += extra_cost->mult[mode == DImode].add; -+ else -+ /* MUL. */ -+ cost += extra_cost->mult[mode == DImode].simple; ++ aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode); ++ n -= 2; + } + -+ return cost; ++ if (n == 1) ++ aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode); ++ ++ return true; + } -+ else ++ ++ /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second ++ 4-byte chunk, partially overlapping with the previously copied chunk. */ ++ if (n < 8) + { -+ if (speed) ++ aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode); ++ n -= 4; ++ if (n > 0) + { -+ /* Floating-point FMA/FMUL can also support negations of the -+ operands. */ -+ if (GET_CODE (op0) == NEG) -+ op0 = XEXP (op0, 0); -+ if (GET_CODE (op1) == NEG) -+ op1 = XEXP (op1, 0); ++ int move = n - 4; + -+ if (maybe_fma) -+ /* FMADD/FNMADD/FNMSUB/FMSUB. */ -+ cost += extra_cost->fp[mode == DFmode].fma; -+ else -+ /* FMUL/FNMUL. */ -+ cost += extra_cost->fp[mode == DFmode].mult; ++ src = aarch64_move_pointer (src, move); ++ dst = aarch64_move_pointer (dst, move); ++ aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode); + } -+ -+ cost += rtx_cost (op0, MULT, 0, speed) -+ + rtx_cost (op1, MULT, 1, speed); -+ return cost; ++ return true; + } -+} -+ -+static int -+aarch64_address_cost (rtx x, -+ enum machine_mode mode, -+ addr_space_t as ATTRIBUTE_UNUSED, -+ bool speed) -+{ -+ enum rtx_code c = GET_CODE (x); -+ const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost; -+ struct aarch64_address_info info; -+ int cost = 0; -+ info.shift = 0; + -+ if (!aarch64_classify_address (&info, x, mode, c, false)) ++ /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of ++ them, then (if applicable) an 8-byte chunk. */ ++ while (n >= 8) + { -+ if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF) ++ if (n / 16) + { -+ /* This is a CONST or SYMBOL ref which will be split -+ in a different way depending on the code model in use. -+ Cost it through the generic infrastructure. */ -+ int cost_symbol_ref = rtx_cost (x, MEM, 1, speed); -+ /* Divide through by the cost of one instruction to -+ bring it to the same units as the address costs. */ -+ cost_symbol_ref /= COSTS_N_INSNS (1); -+ /* The cost is then the cost of preparing the address, -+ followed by an immediate (possibly 0) offset. */ -+ return cost_symbol_ref + addr_cost->imm_offset; ++ aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode); ++ n -= 16; + } + else + { -+ /* This is most likely a jump table from a case -+ statement. */ -+ return addr_cost->register_offset; ++ aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode); ++ n -= 8; + } + } + -+ switch (info.type) ++ /* Finish the final bytes of the copy. We can always do this in one ++ instruction. We either copy the exact amount we need, or partially ++ overlap with the previous chunk we copied and copy 8-bytes. */ ++ if (n == 0) ++ return true; ++ else if (n == 1) ++ aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode); ++ else if (n == 2) ++ aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode); ++ else if (n == 4) ++ aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode); ++ else + { -+ case ADDRESS_LO_SUM: -+ case ADDRESS_SYMBOLIC: -+ case ADDRESS_REG_IMM: -+ cost += addr_cost->imm_offset; -+ break; ++ if (n == 3) ++ { ++ src = aarch64_move_pointer (src, -1); ++ dst = aarch64_move_pointer (dst, -1); ++ aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode); ++ } ++ else ++ { ++ int move = n - 8; + -+ case ADDRESS_REG_WB: -+ if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY) -+ cost += addr_cost->pre_modify; -+ else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY) -+ cost += addr_cost->post_modify; -+ else -+ gcc_unreachable (); ++ src = aarch64_move_pointer (src, move); ++ dst = aarch64_move_pointer (dst, move); ++ aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode); ++ } ++ } + -+ break; ++ return true; ++} + -+ case ADDRESS_REG_REG: -+ cost += addr_cost->register_offset; -+ break; + #undef TARGET_ADDRESS_COST + #define TARGET_ADDRESS_COST aarch64_address_cost + +@@ -8455,7 +9759,7 @@ + #define TARGET_RETURN_IN_MSB aarch64_return_in_msb + + #undef TARGET_RTX_COSTS +-#define TARGET_RTX_COSTS aarch64_rtx_costs ++#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper + + #undef TARGET_SCHED_ISSUE_RATE + #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate +@@ -8493,6 +9797,10 @@ + #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \ + aarch64_autovectorize_vector_sizes + ++#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV ++#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \ ++ aarch64_atomic_assign_expand_fenv + -+ case ADDRESS_REG_UXTW: -+ case ADDRESS_REG_SXTW: -+ cost += addr_cost->register_extend; -+ break; + /* Section anchor support. */ + + #undef TARGET_MIN_ANCHOR_OFFSET +--- a/src/gcc/config/aarch64/aarch64-linux.h ++++ b/src/gcc/config/aarch64/aarch64-linux.h +@@ -21,7 +21,7 @@ + #ifndef GCC_AARCH64_LINUX_H + #define GCC_AARCH64_LINUX_H + +-#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}.so.1" ++#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}%{mabi=ilp32:_ilp32}.so.1" + + #define CPP_SPEC "%{pthread:-D_REENTRANT}" + +@@ -33,7 +33,7 @@ + -dynamic-linker " GNU_USER_DYNAMIC_LINKER " \ + -X \ + %{mbig-endian:-EB} %{mlittle-endian:-EL} \ +- -maarch64linux%{mbig-endian:b}" ++ -maarch64linux%{mabi=ilp32:32}%{mbig-endian:b}" + + #define LINK_SPEC LINUX_TARGET_LINK_SPEC + +--- a/src/gcc/config/aarch64/iterators.md ++++ b/src/gcc/config/aarch64/iterators.md +@@ -150,6 +150,9 @@ + ;; Vector modes for H and S types. + (define_mode_iterator VDQHS [V4HI V8HI V2SI V4SI]) + ++;; Vector modes for H, S and D types. ++(define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI]) + -+ default: -+ gcc_unreachable (); -+ } + ;; Vector modes for Q, H and S types. + (define_mode_iterator VDQQHS [V8QI V16QI V4HI V8HI V2SI V4SI]) + +@@ -267,6 +270,10 @@ + UNSPEC_UZP2 ; Used in vector permute patterns. + UNSPEC_TRN1 ; Used in vector permute patterns. + UNSPEC_TRN2 ; Used in vector permute patterns. ++ UNSPEC_EXT ; Used in aarch64-simd.md. ++ UNSPEC_REV64 ; Used in vector reverse patterns (permute). ++ UNSPEC_REV32 ; Used in vector reverse patterns (permute). ++ UNSPEC_REV16 ; Used in vector reverse patterns (permute). + UNSPEC_AESE ; Used in aarch64-simd.md. + UNSPEC_AESD ; Used in aarch64-simd.md. + UNSPEC_AESMC ; Used in aarch64-simd.md. +@@ -352,6 +359,9 @@ + (V2DI "2d") (V2SF "2s") + (V4SF "4s") (V2DF "2d")]) + ++(define_mode_attr Vrevsuff [(V4HI "16") (V8HI "16") (V2SI "32") ++ (V4SI "32") (V2DI "64")]) + + (define_mode_attr Vmtype [(V8QI ".8b") (V16QI ".16b") + (V4HI ".4h") (V8HI ".8h") + (V2SI ".2s") (V4SI ".4s") +@@ -546,6 +556,32 @@ + + (define_mode_attr VSTRUCT_DREG [(OI "TI") (CI "EI") (XI "OI")]) + ++;; Mode of pair of elements for each vector mode, to define transfer ++;; size for structure lane/dup loads and stores. ++(define_mode_attr V_TWO_ELEM [(V8QI "HI") (V16QI "HI") ++ (V4HI "SI") (V8HI "SI") ++ (V2SI "V2SI") (V4SI "V2SI") ++ (DI "V2DI") (V2DI "V2DI") ++ (V2SF "V2SF") (V4SF "V2SF") ++ (DF "V2DI") (V2DF "V2DI")]) + -+ if (info.shift > 0) -+ { -+ /* For the sake of calculating the cost of the shifted register -+ component, we can treat same sized modes in the same way. */ -+ switch (GET_MODE_BITSIZE (mode)) -+ { -+ case 16: -+ cost += addr_cost->addr_scale_costs.hi; -+ break; ++;; Similar, for three elements. ++(define_mode_attr V_THREE_ELEM [(V8QI "BLK") (V16QI "BLK") ++ (V4HI "BLK") (V8HI "BLK") ++ (V2SI "BLK") (V4SI "BLK") ++ (DI "EI") (V2DI "EI") ++ (V2SF "BLK") (V4SF "BLK") ++ (DF "EI") (V2DF "EI")]) + -+ case 32: -+ cost += addr_cost->addr_scale_costs.si; -+ break; ++;; Similar, for four elements. ++(define_mode_attr V_FOUR_ELEM [(V8QI "SI") (V16QI "SI") ++ (V4HI "V4HI") (V8HI "V4HI") ++ (V2SI "V4SI") (V4SI "V4SI") ++ (DI "OI") (V2DI "OI") ++ (V2SF "V4SF") (V4SF "V4SF") ++ (DF "OI") (V2DF "OI")]) + -+ case 64: -+ cost += addr_cost->addr_scale_costs.di; -+ break; + -+ /* We can't tell, or this is a 128-bit vector. */ -+ default: -+ cost += addr_cost->addr_scale_costs.ti; -+ break; -+ } -+ } + ;; Mode for atomic operation suffixes + (define_mode_attr atomic_sfx + [(QI "b") (HI "h") (SI "") (DI "")]) +@@ -847,6 +883,8 @@ + UNSPEC_TRN1 UNSPEC_TRN2 + UNSPEC_UZP1 UNSPEC_UZP2]) + ++(define_int_iterator REVERSE [UNSPEC_REV64 UNSPEC_REV32 UNSPEC_REV16]) + -+ return cost; -+} + (define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM + UNSPEC_FRINTN UNSPEC_FRINTI UNSPEC_FRINTX + UNSPEC_FRINTA]) +@@ -856,6 +894,10 @@ + + (define_int_iterator FRECP [UNSPEC_FRECPE UNSPEC_FRECPX]) + ++(define_int_iterator CRC [UNSPEC_CRC32B UNSPEC_CRC32H UNSPEC_CRC32W ++ UNSPEC_CRC32X UNSPEC_CRC32CB UNSPEC_CRC32CH ++ UNSPEC_CRC32CW UNSPEC_CRC32CX]) ++ + (define_int_iterator CRYPTO_AES [UNSPEC_AESE UNSPEC_AESD]) + (define_int_iterator CRYPTO_AESMC [UNSPEC_AESMC UNSPEC_AESIMC]) + +@@ -974,6 +1016,10 @@ + (UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn") + (UNSPEC_UZP1 "uzp") (UNSPEC_UZP2 "uzp")]) + ++; op code for REV instructions (size within which elements are reversed). ++(define_int_attr rev_op [(UNSPEC_REV64 "64") (UNSPEC_REV32 "32") ++ (UNSPEC_REV16 "16")]) + -+/* Return true if the RTX X in mode MODE is a zero or sign extract -+ usable in an ADD or SUB (extended register) instruction. */ -+static bool -+aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode) -+{ -+ /* Catch add with a sign extract. -+ This is add__multp2. */ -+ if (GET_CODE (x) == SIGN_EXTRACT -+ || GET_CODE (x) == ZERO_EXTRACT) -+ { -+ rtx op0 = XEXP (x, 0); -+ rtx op1 = XEXP (x, 1); -+ rtx op2 = XEXP (x, 2); + (define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2") + (UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2") + (UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")]) +@@ -980,6 +1026,16 @@ + + (define_int_attr frecp_suffix [(UNSPEC_FRECPE "e") (UNSPEC_FRECPX "x")]) + ++(define_int_attr crc_variant [(UNSPEC_CRC32B "crc32b") (UNSPEC_CRC32H "crc32h") ++ (UNSPEC_CRC32W "crc32w") (UNSPEC_CRC32X "crc32x") ++ (UNSPEC_CRC32CB "crc32cb") (UNSPEC_CRC32CH "crc32ch") ++ (UNSPEC_CRC32CW "crc32cw") (UNSPEC_CRC32CX "crc32cx")]) + -+ if (GET_CODE (op0) == MULT -+ && CONST_INT_P (op1) -+ && op2 == const0_rtx -+ && CONST_INT_P (XEXP (op0, 1)) -+ && aarch64_is_extend_from_extract (mode, -+ XEXP (op0, 1), -+ op1)) -+ { -+ return true; -+ } -+ } ++(define_int_attr crc_mode [(UNSPEC_CRC32B "QI") (UNSPEC_CRC32H "HI") ++ (UNSPEC_CRC32W "SI") (UNSPEC_CRC32X "DI") ++ (UNSPEC_CRC32CB "QI") (UNSPEC_CRC32CH "HI") ++ (UNSPEC_CRC32CW "SI") (UNSPEC_CRC32CX "DI")]) + -+ return false; -+} + (define_int_attr aes_op [(UNSPEC_AESE "e") (UNSPEC_AESD "d")]) + (define_int_attr aesmc_op [(UNSPEC_AESMC "mc") (UNSPEC_AESIMC "imc")]) + +--- a/src/gcc/config/aarch64/aarch64.h ++++ b/src/gcc/config/aarch64/aarch64.h +@@ -35,6 +35,9 @@ + if (TARGET_SIMD) \ + builtin_define ("__ARM_NEON"); \ + \ ++ if (TARGET_CRC32) \ ++ builtin_define ("__ARM_FEATURE_CRC32"); \ ++ \ + switch (aarch64_cmodel) \ + { \ + case AARCH64_CMODEL_TINY: \ +@@ -188,6 +191,9 @@ + /* Crypto is an optional extension to AdvSIMD. */ + #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO) + ++/* CRC instructions that can be enabled through +crc arch extension. */ ++#define TARGET_CRC32 (AARCH64_ISA_CRC) ++ + /* Standard register usage. */ + + /* 31 64-bit general purpose registers R0-R30: +@@ -365,8 +371,7 @@ + + #define HARD_REGNO_MODE_OK(REGNO, MODE) aarch64_hard_regno_mode_ok (REGNO, MODE) + +-#define MODES_TIEABLE_P(MODE1, MODE2) \ +- (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2)) ++#define MODES_TIEABLE_P(MODE1, MODE2) aarch64_modes_tieable_p (MODE1, MODE2) + + #define DWARF2_UNWIND_INFO 1 + +@@ -409,6 +414,7 @@ + enum reg_class + { + NO_REGS, ++ CALLER_SAVE_REGS, + CORE_REGS, + GENERAL_REGS, + STACK_REG, +@@ -424,6 +430,7 @@ + #define REG_CLASS_NAMES \ + { \ + "NO_REGS", \ ++ "CALLER_SAVE_REGS", \ + "CORE_REGS", \ + "GENERAL_REGS", \ + "STACK_REG", \ +@@ -436,6 +443,7 @@ + #define REG_CLASS_CONTENTS \ + { \ + { 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \ ++ { 0x0007ffff, 0x00000000, 0x00000000 }, /* CALLER_SAVE_REGS */ \ + { 0x7fffffff, 0x00000000, 0x00000003 }, /* CORE_REGS */ \ + { 0x7fffffff, 0x00000000, 0x00000003 }, /* GENERAL_REGS */ \ + { 0x80000000, 0x00000000, 0x00000000 }, /* STACK_REG */ \ +@@ -520,7 +528,6 @@ + been saved. */ + HOST_WIDE_INT padding0; + HOST_WIDE_INT hardfp_offset; /* HARD_FRAME_POINTER_REGNUM */ +- HOST_WIDE_INT fp_lr_offset; /* Space needed for saving fp and/or lr */ + + bool laid_out; + }; +@@ -661,12 +668,14 @@ + /* The base cost overhead of a memcpy call, for MOVE_RATIO and friends. */ + #define AARCH64_CALL_RATIO 8 + +-/* When optimizing for size, give a better estimate of the length of a memcpy +- call, but use the default otherwise. But move_by_pieces_ninsns() counts +- memory-to-memory moves, and we'll have to generate a load & store for each, +- so halve the value to take that into account. */ ++/* MOVE_RATIO dictates when we will use the move_by_pieces infrastructure. ++ move_by_pieces will continually copy the largest safe chunks. So a ++ 7-byte copy is a 4-byte + 2-byte + byte copy. This proves inefficient ++ for both size and speed of copy, so we will instead use the "movmem" ++ standard name to implement the copy. This logic does not apply when ++ targeting -mstrict-align, so keep a sensible default in that case. */ + #define MOVE_RATIO(speed) \ +- (((speed) ? 15 : AARCH64_CALL_RATIO) / 2) ++ (!STRICT_ALIGNMENT ? 2 : (((speed) ? 15 : AARCH64_CALL_RATIO) / 2)) + + /* For CLEAR_RATIO, when optimizing for size, give a better estimate + of the length of a memset call, but use the default otherwise. */ +@@ -826,6 +835,11 @@ + + #define SHIFT_COUNT_TRUNCATED !TARGET_SIMD + ++/* Choose appropriate mode for caller saves, so we do the minimum ++ required size of load/store. */ ++#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \ ++ aarch64_hard_regno_caller_save_mode ((REGNO), (NREGS), (MODE)) ++ + /* Callee only saves lower 64-bits of a 128-bit register. Tell the + compiler the callee clobbers the top 64-bits when restoring the + bottom 64-bits. */ +--- a/src/gcc/config/arm/aarch-cost-tables.h ++++ b/src/gcc/config/arm/aarch-cost-tables.h +@@ -39,6 +39,7 @@ + 0, /* bfi. */ + 0, /* bfx. */ + 0, /* clz. */ ++ 0, /* rev. */ + COSTS_N_INSNS (1), /* non_exec. */ + false /* non_exec_costs_exec. */ + }, +@@ -139,6 +140,7 @@ + COSTS_N_INSNS (1), /* bfi. */ + COSTS_N_INSNS (1), /* bfx. */ + 0, /* clz. */ ++ 0, /* rev. */ + 0, /* non_exec. */ + true /* non_exec_costs_exec. */ + }, +@@ -239,6 +241,7 @@ + COSTS_N_INSNS (1), /* bfi. */ + 0, /* bfx. */ + 0, /* clz. */ ++ 0, /* rev. */ + 0, /* non_exec. */ + true /* non_exec_costs_exec. */ + }, +--- a/src/gcc/config/arm/thumb2.md ++++ b/src/gcc/config/arm/thumb2.md +@@ -329,7 +329,7 @@ + movw%?\\t%0, %L1\\t%@ movhi + str%(h%)\\t%1, %0\\t%@ movhi + ldr%(h%)\\t%0, %1\\t%@ movhi" +- [(set_attr "type" "mov_reg,mov_imm,mov_imm,mov_reg,store1,load1") ++ [(set_attr "type" "mov_reg,mov_imm,mov_imm,mov_imm,store1,load1") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no,yes,no,no,no") + (set_attr "length" "2,4,2,4,4,4") +@@ -1370,6 +1370,103 @@ + (set_attr "type" "alu_reg")] + ) + ++; Constants for op 2 will never be given to these patterns. ++(define_insn_and_split "*iordi_notdi_di" ++ [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") ++ (ior:DI (not:DI (match_operand:DI 1 "s_register_operand" "0,r")) ++ (match_operand:DI 2 "s_register_operand" "r,0")))] ++ "TARGET_THUMB2" ++ "#" ++ "TARGET_THUMB2 && reload_completed" ++ [(set (match_dup 0) (ior:SI (not:SI (match_dup 1)) (match_dup 2))) ++ (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))] ++ " ++ { ++ operands[3] = gen_highpart (SImode, operands[0]); ++ operands[0] = gen_lowpart (SImode, operands[0]); ++ operands[4] = gen_highpart (SImode, operands[1]); ++ operands[1] = gen_lowpart (SImode, operands[1]); ++ operands[5] = gen_highpart (SImode, operands[2]); ++ operands[2] = gen_lowpart (SImode, operands[2]); ++ }" ++ [(set_attr "length" "8") ++ (set_attr "predicable" "yes") ++ (set_attr "predicable_short_it" "no") ++ (set_attr "type" "multiple")] ++) + -+/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)), -+ storing it in *COST. Result is true if the total cost of the operation -+ has now been calculated. */ -+static bool -+aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed) -+{ -+ rtx inner; -+ rtx comparator; -+ enum rtx_code cmpcode; ++(define_insn_and_split "*iordi_notzesidi_di" ++ [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") ++ (ior:DI (not:DI (zero_extend:DI ++ (match_operand:SI 2 "s_register_operand" "r,r"))) ++ (match_operand:DI 1 "s_register_operand" "0,?r")))] ++ "TARGET_THUMB2" ++ "#" ++ ; (not (zero_extend...)) means operand0 will always be 0xffffffff ++ "TARGET_THUMB2 && reload_completed" ++ [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1))) ++ (set (match_dup 3) (const_int -1))] ++ " ++ { ++ operands[3] = gen_highpart (SImode, operands[0]); ++ operands[0] = gen_lowpart (SImode, operands[0]); ++ operands[1] = gen_lowpart (SImode, operands[1]); ++ }" ++ [(set_attr "length" "4,8") ++ (set_attr "predicable" "yes") ++ (set_attr "predicable_short_it" "no") ++ (set_attr "type" "multiple")] ++) + -+ if (COMPARISON_P (op0)) -+ { -+ inner = XEXP (op0, 0); -+ comparator = XEXP (op0, 1); -+ cmpcode = GET_CODE (op0); -+ } -+ else -+ { -+ inner = op0; -+ comparator = const0_rtx; -+ cmpcode = NE; -+ } ++(define_insn_and_split "*iordi_notdi_zesidi" ++ [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") ++ (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "0,?r")) ++ (zero_extend:DI ++ (match_operand:SI 1 "s_register_operand" "r,r"))))] ++ "TARGET_THUMB2" ++ "#" ++ "TARGET_THUMB2 && reload_completed" ++ [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1))) ++ (set (match_dup 3) (not:SI (match_dup 4)))] ++ " ++ { ++ operands[3] = gen_highpart (SImode, operands[0]); ++ operands[0] = gen_lowpart (SImode, operands[0]); ++ operands[1] = gen_lowpart (SImode, operands[1]); ++ operands[4] = gen_highpart (SImode, operands[2]); ++ operands[2] = gen_lowpart (SImode, operands[2]); ++ }" ++ [(set_attr "length" "8") ++ (set_attr "predicable" "yes") ++ (set_attr "predicable_short_it" "no") ++ (set_attr "type" "multiple")] ++) + -+ if (GET_CODE (op1) == PC || GET_CODE (op2) == PC) -+ { -+ /* Conditional branch. */ -+ if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC) -+ return true; -+ else -+ { -+ if (cmpcode == NE || cmpcode == EQ) -+ { -+ if (comparator == const0_rtx) -+ { -+ /* TBZ/TBNZ/CBZ/CBNZ. */ -+ if (GET_CODE (inner) == ZERO_EXTRACT) -+ /* TBZ/TBNZ. */ -+ *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT, -+ 0, speed); -+ else -+ /* CBZ/CBNZ. */ -+ *cost += rtx_cost (inner, cmpcode, 0, speed); ++(define_insn_and_split "*iordi_notsesidi_di" ++ [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") ++ (ior:DI (not:DI (sign_extend:DI ++ (match_operand:SI 2 "s_register_operand" "r,r"))) ++ (match_operand:DI 1 "s_register_operand" "0,r")))] ++ "TARGET_THUMB2" ++ "#" ++ "TARGET_THUMB2 && reload_completed" ++ [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1))) ++ (set (match_dup 3) (ior:SI (not:SI ++ (ashiftrt:SI (match_dup 2) (const_int 31))) ++ (match_dup 4)))] ++ " ++ { ++ operands[3] = gen_highpart (SImode, operands[0]); ++ operands[0] = gen_lowpart (SImode, operands[0]); ++ operands[4] = gen_highpart (SImode, operands[1]); ++ operands[1] = gen_lowpart (SImode, operands[1]); ++ }" ++ [(set_attr "length" "8") ++ (set_attr "predicable" "yes") ++ (set_attr "predicable_short_it" "no") ++ (set_attr "type" "multiple")] ++) + -+ return true; -+ } -+ } -+ else if (cmpcode == LT || cmpcode == GE) -+ { -+ /* TBZ/TBNZ. */ -+ if (comparator == const0_rtx) -+ return true; -+ } -+ } + (define_insn "*orsi_notsi_si" + [(set (match_operand:SI 0 "s_register_operand" "=r") + (ior:SI (not:SI (match_operand:SI 2 "s_register_operand" "r")) +--- a/src/gcc/config/arm/arm.c ++++ b/src/gcc/config/arm/arm.c +@@ -50,6 +50,7 @@ + #include "except.h" + #include "tm_p.h" + #include "target.h" ++#include "sched-int.h" + #include "target-def.h" + #include "debug.h" + #include "langhooks.h" +@@ -59,6 +60,7 @@ + #include "params.h" + #include "opts.h" + #include "dumpfile.h" ++#include "gimple-expr.h" + + /* Forward definitions of types. */ + typedef struct minipool_node Mnode; +@@ -94,6 +96,7 @@ + static bool thumb_force_lr_save (void); + static unsigned arm_size_return_regs (void); + static bool arm_assemble_integer (rtx, unsigned int, int); ++static void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update); + static void arm_print_operand (FILE *, rtx, int); + static void arm_print_operand_address (FILE *, rtx); + static bool arm_print_operand_punct_valid_p (unsigned char code); +@@ -585,6 +588,9 @@ + #undef TARGET_MANGLE_TYPE + #define TARGET_MANGLE_TYPE arm_mangle_type + ++#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV ++#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv ++ + #undef TARGET_BUILD_BUILTIN_VA_LIST + #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list + #undef TARGET_EXPAND_BUILTIN_VA_START +@@ -986,6 +992,7 @@ + COSTS_N_INSNS (1), /* bfi. */ + COSTS_N_INSNS (1), /* bfx. */ + 0, /* clz. */ ++ 0, /* rev. */ + 0, /* non_exec. */ + true /* non_exec_costs_exec. */ + }, +@@ -1069,7 +1076,109 @@ + } + }; + ++const struct cpu_cost_table cortexa8_extra_costs = ++{ ++ /* ALU */ ++ { ++ 0, /* arith. */ ++ 0, /* logical. */ ++ COSTS_N_INSNS (1), /* shift. */ ++ 0, /* shift_reg. */ ++ COSTS_N_INSNS (1), /* arith_shift. */ ++ 0, /* arith_shift_reg. */ ++ COSTS_N_INSNS (1), /* log_shift. */ ++ 0, /* log_shift_reg. */ ++ 0, /* extend. */ ++ 0, /* extend_arith. */ ++ 0, /* bfi. */ ++ 0, /* bfx. */ ++ 0, /* clz. */ ++ 0, /* rev. */ ++ 0, /* non_exec. */ ++ true /* non_exec_costs_exec. */ ++ }, ++ { ++ /* MULT SImode */ ++ { ++ COSTS_N_INSNS (1), /* simple. */ ++ COSTS_N_INSNS (1), /* flag_setting. */ ++ COSTS_N_INSNS (1), /* extend. */ ++ COSTS_N_INSNS (1), /* add. */ ++ COSTS_N_INSNS (1), /* extend_add. */ ++ COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */ ++ }, ++ /* MULT DImode */ ++ { ++ 0, /* simple (N/A). */ ++ 0, /* flag_setting (N/A). */ ++ COSTS_N_INSNS (2), /* extend. */ ++ 0, /* add (N/A). */ ++ COSTS_N_INSNS (2), /* extend_add. */ ++ 0 /* idiv (N/A). */ + } -+ else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC) ++ }, ++ /* LD/ST */ ++ { ++ COSTS_N_INSNS (1), /* load. */ ++ COSTS_N_INSNS (1), /* load_sign_extend. */ ++ COSTS_N_INSNS (1), /* ldrd. */ ++ COSTS_N_INSNS (1), /* ldm_1st. */ ++ 1, /* ldm_regs_per_insn_1st. */ ++ 2, /* ldm_regs_per_insn_subsequent. */ ++ COSTS_N_INSNS (1), /* loadf. */ ++ COSTS_N_INSNS (1), /* loadd. */ ++ COSTS_N_INSNS (1), /* load_unaligned. */ ++ COSTS_N_INSNS (1), /* store. */ ++ COSTS_N_INSNS (1), /* strd. */ ++ COSTS_N_INSNS (1), /* stm_1st. */ ++ 1, /* stm_regs_per_insn_1st. */ ++ 2, /* stm_regs_per_insn_subsequent. */ ++ COSTS_N_INSNS (1), /* storef. */ ++ COSTS_N_INSNS (1), /* stored. */ ++ COSTS_N_INSNS (1) /* store_unaligned. */ ++ }, ++ { ++ /* FP SFmode */ + { -+ /* It's a conditional operation based on the status flags, -+ so it must be some flavor of CSEL. */ -+ -+ /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */ -+ if (GET_CODE (op1) == NEG -+ || GET_CODE (op1) == NOT -+ || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx)) -+ op1 = XEXP (op1, 0); -+ -+ *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed); -+ *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed); -+ return true; ++ COSTS_N_INSNS (36), /* div. */ ++ COSTS_N_INSNS (11), /* mult. */ ++ COSTS_N_INSNS (20), /* mult_addsub. */ ++ COSTS_N_INSNS (30), /* fma. */ ++ COSTS_N_INSNS (9), /* addsub. */ ++ COSTS_N_INSNS (3), /* fpconst. */ ++ COSTS_N_INSNS (3), /* neg. */ ++ COSTS_N_INSNS (6), /* compare. */ ++ COSTS_N_INSNS (4), /* widen. */ ++ COSTS_N_INSNS (4), /* narrow. */ ++ COSTS_N_INSNS (8), /* toint. */ ++ COSTS_N_INSNS (8), /* fromint. */ ++ COSTS_N_INSNS (8) /* roundint. */ ++ }, ++ /* FP DFmode */ ++ { ++ COSTS_N_INSNS (64), /* div. */ ++ COSTS_N_INSNS (16), /* mult. */ ++ COSTS_N_INSNS (25), /* mult_addsub. */ ++ COSTS_N_INSNS (30), /* fma. */ ++ COSTS_N_INSNS (9), /* addsub. */ ++ COSTS_N_INSNS (3), /* fpconst. */ ++ COSTS_N_INSNS (3), /* neg. */ ++ COSTS_N_INSNS (6), /* compare. */ ++ COSTS_N_INSNS (6), /* widen. */ ++ COSTS_N_INSNS (6), /* narrow. */ ++ COSTS_N_INSNS (8), /* toint. */ ++ COSTS_N_INSNS (8), /* fromint. */ ++ COSTS_N_INSNS (8) /* roundint. */ + } ++ }, ++ /* Vector */ ++ { ++ COSTS_N_INSNS (1) /* alu. */ ++ } ++}; + + -+ /* We don't know what this is, cost all operands. */ -+ return false; -+} + - /* Calculate the cost of calculating X, storing it in *COST. Result - is true if the total cost of the operation has now been calculated. */ - static bool -@@ -4506,13 +4921,31 @@ - aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, - int param ATTRIBUTE_UNUSED, int *cost, bool speed) + const struct cpu_cost_table cortexa7_extra_costs = { -- rtx op0, op1; -+ rtx op0, op1, op2; - const struct cpu_cost_table *extra_cost - = aarch64_tune_params->insn_extra_cost; -+ enum machine_mode mode = GET_MODE (x); + /* ALU */ +@@ -1087,6 +1196,7 @@ + COSTS_N_INSNS (1), /* bfi. */ + COSTS_N_INSNS (1), /* bfx. */ + COSTS_N_INSNS (1), /* clz. */ ++ COSTS_N_INSNS (1), /* rev. */ + 0, /* non_exec. */ + true /* non_exec_costs_exec. */ + }, +@@ -1188,6 +1298,7 @@ + 0, /* bfi. */ + COSTS_N_INSNS (1), /* bfx. */ + COSTS_N_INSNS (1), /* clz. */ ++ COSTS_N_INSNS (1), /* rev. */ + 0, /* non_exec. */ + true /* non_exec_costs_exec. */ + }, +@@ -1288,6 +1399,7 @@ + COSTS_N_INSNS (1), /* bfi. */ + 0, /* bfx. */ + 0, /* clz. */ ++ 0, /* rev. */ + 0, /* non_exec. */ + true /* non_exec_costs_exec. */ + }, +@@ -1388,6 +1500,7 @@ + 0, /* bfi. */ + 0, /* bfx. */ + 0, /* clz. */ ++ 0, /* rev. */ + COSTS_N_INSNS (1), /* non_exec. */ + false /* non_exec_costs_exec. */ + }, +@@ -1484,7 +1597,8 @@ + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ +- false /* Prefer Neon for 64-bits bitops. */ ++ false, /* Prefer Neon for 64-bits bitops. */ ++ false, false /* Prefer 32-bit encodings. */ + }; -+ /* By default, assume that everything has equivalent cost to the -+ cheapest instruction. Any additional costs are applied as a delta -+ above this default. */ -+ *cost = COSTS_N_INSNS (1); -+ -+ /* TODO: The cost infrastructure currently does not handle -+ vector operations. Assume that all vector operations -+ are equally expensive. */ -+ if (VECTOR_MODE_P (mode)) -+ { -+ if (speed) -+ *cost += extra_cost->vect.alu; -+ return true; -+ } -+ - switch (code) - { - case SET: -+ /* The cost depends entirely on the operands to SET. */ -+ *cost = 0; - op0 = SET_DEST (x); - op1 = SET_SRC (x); + const struct tune_params arm_fastmul_tune = +@@ -1500,7 +1614,8 @@ + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ +- false /* Prefer Neon for 64-bits bitops. */ ++ false, /* Prefer Neon for 64-bits bitops. */ ++ false, false /* Prefer 32-bit encodings. */ + }; -@@ -4520,25 +4953,47 @@ - { - case MEM: - if (speed) -- *cost += extra_cost->ldst.store; -+ { -+ rtx address = XEXP (op0, 0); -+ if (GET_MODE_CLASS (mode) == MODE_INT) -+ *cost += extra_cost->ldst.store; -+ else if (mode == SFmode) -+ *cost += extra_cost->ldst.storef; -+ else if (mode == DFmode) -+ *cost += extra_cost->ldst.stored; + /* StrongARM has early execution of branches, so a sequence that is worth +@@ -1519,7 +1634,8 @@ + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ +- false /* Prefer Neon for 64-bits bitops. */ ++ false, /* Prefer Neon for 64-bits bitops. */ ++ false, false /* Prefer 32-bit encodings. */ + }; + + const struct tune_params arm_xscale_tune = +@@ -1535,7 +1651,8 @@ + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ +- false /* Prefer Neon for 64-bits bitops. */ ++ false, /* Prefer Neon for 64-bits bitops. */ ++ false, false /* Prefer 32-bit encodings. */ + }; + + const struct tune_params arm_9e_tune = +@@ -1551,7 +1668,8 @@ + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ +- false /* Prefer Neon for 64-bits bitops. */ ++ false, /* Prefer Neon for 64-bits bitops. */ ++ false, false /* Prefer 32-bit encodings. */ + }; + + const struct tune_params arm_v6t2_tune = +@@ -1567,7 +1685,8 @@ + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ +- false /* Prefer Neon for 64-bits bitops. */ ++ false, /* Prefer Neon for 64-bits bitops. */ ++ false, false /* Prefer 32-bit encodings. */ + }; + + /* Generic Cortex tuning. Use more specific tunings if appropriate. */ +@@ -1584,9 +1703,27 @@ + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ +- false /* Prefer Neon for 64-bits bitops. */ ++ false, /* Prefer Neon for 64-bits bitops. */ ++ false, false /* Prefer 32-bit encodings. */ + }; + ++const struct tune_params arm_cortex_a8_tune = ++{ ++ arm_9e_rtx_costs, ++ &cortexa8_extra_costs, ++ NULL, /* Sched adj cost. */ ++ 1, /* Constant limit. */ ++ 5, /* Max cond insns. */ ++ ARM_PREFETCH_NOT_BENEFICIAL, ++ false, /* Prefer constant pool. */ ++ arm_default_branch_cost, ++ false, /* Prefer LDRD/STRD. */ ++ {true, true}, /* Prefer non short circuit. */ ++ &arm_default_vec_cost, /* Vectorizer costs. */ ++ false, /* Prefer Neon for 64-bits bitops. */ ++ false, false /* Prefer 32-bit encodings. */ ++}; ++ + const struct tune_params arm_cortex_a7_tune = + { + arm_9e_rtx_costs, +@@ -1600,7 +1737,8 @@ + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ +- false /* Prefer Neon for 64-bits bitops. */ ++ false, /* Prefer Neon for 64-bits bitops. */ ++ false, false /* Prefer 32-bit encodings. */ + }; -- if (op1 != const0_rtx) -- *cost += rtx_cost (op1, SET, 1, speed); -+ *cost += -+ COSTS_N_INSNS (aarch64_address_cost (address, mode, -+ 0, speed)); -+ } -+ -+ *cost += rtx_cost (op1, SET, 1, speed); - return true; + const struct tune_params arm_cortex_a15_tune = +@@ -1616,7 +1754,8 @@ + true, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ +- false /* Prefer Neon for 64-bits bitops. */ ++ false, /* Prefer Neon for 64-bits bitops. */ ++ true, true /* Prefer 32-bit encodings. */ + }; - case SUBREG: - if (! REG_P (SUBREG_REG (op0))) - *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed); -+ - /* Fall through. */ - case REG: -- /* Cost is just the cost of the RHS of the set. */ -- *cost += rtx_cost (op1, SET, 1, true); -+ /* const0_rtx is in general free, but we will use an -+ instruction to set a register to 0. */ -+ if (REG_P (op1) || op1 == const0_rtx) -+ { -+ /* The cost is 1 per register copied. */ -+ int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1) -+ / UNITS_PER_WORD; -+ *cost = COSTS_N_INSNS (n_minus_1 + 1); -+ } -+ else -+ /* Cost is just the cost of the RHS of the set. */ -+ *cost += rtx_cost (op1, SET, 1, speed); - return true; + const struct tune_params arm_cortex_a53_tune = +@@ -1632,7 +1771,8 @@ + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ +- false /* Prefer Neon for 64-bits bitops. */ ++ false, /* Prefer Neon for 64-bits bitops. */ ++ false, false /* Prefer 32-bit encodings. */ + }; -- case ZERO_EXTRACT: /* Bit-field insertion. */ -+ case ZERO_EXTRACT: - case SIGN_EXTRACT: -- /* Strip any redundant widening of the RHS to meet the width of -- the target. */ -+ /* Bit-field insertion. Strip any redundant widening of -+ the RHS to meet the width of the target. */ - if (GET_CODE (op1) == SUBREG) - op1 = SUBREG_REG (op1); - if ((GET_CODE (op1) == ZERO_EXTEND -@@ -4547,25 +5002,139 @@ - && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0))) - >= INTVAL (XEXP (op0, 1)))) - op1 = XEXP (op1, 0); -- *cost += rtx_cost (op1, SET, 1, speed); -+ -+ if (CONST_INT_P (op1)) -+ { -+ /* MOV immediate is assumed to always be cheap. */ -+ *cost = COSTS_N_INSNS (1); -+ } -+ else -+ { -+ /* BFM. */ -+ if (speed) -+ *cost += extra_cost->alu.bfi; -+ *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed); -+ } -+ - return true; + const struct tune_params arm_cortex_a57_tune = +@@ -1648,7 +1788,8 @@ + true, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ +- false /* Prefer Neon for 64-bits bitops. */ ++ false, /* Prefer Neon for 64-bits bitops. */ ++ true, true /* Prefer 32-bit encodings. */ + }; - default: -+ /* We can't make sense of this, assume default cost. */ -+ *cost = COSTS_N_INSNS (1); - break; - } - return false; + /* Branches can be dual-issued on Cortex-A5, so conditional execution is +@@ -1667,7 +1808,8 @@ + false, /* Prefer LDRD/STRD. */ + {false, false}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ +- false /* Prefer Neon for 64-bits bitops. */ ++ false, /* Prefer Neon for 64-bits bitops. */ ++ false, false /* Prefer 32-bit encodings. */ + }; -+ case CONST_INT: -+ /* If an instruction can incorporate a constant within the -+ instruction, the instruction's expression avoids calling -+ rtx_cost() on the constant. If rtx_cost() is called on a -+ constant, then it is usually because the constant must be -+ moved into a register by one or more instructions. -+ -+ The exception is constant 0, which can be expressed -+ as XZR/WZR and is therefore free. The exception to this is -+ if we have (set (reg) (const0_rtx)) in which case we must cost -+ the move. However, we can catch that when we cost the SET, so -+ we don't need to consider that here. */ -+ if (x == const0_rtx) -+ *cost = 0; -+ else -+ { -+ /* To an approximation, building any other constant is -+ proportionally expensive to the number of instructions -+ required to build that constant. This is true whether we -+ are compiling for SPEED or otherwise. */ -+ *cost = COSTS_N_INSNS (aarch64_build_constant (0, -+ INTVAL (x), -+ false)); -+ } -+ return true; -+ -+ case CONST_DOUBLE: -+ if (speed) -+ { -+ /* mov[df,sf]_aarch64. */ -+ if (aarch64_float_const_representable_p (x)) -+ /* FMOV (scalar immediate). */ -+ *cost += extra_cost->fp[mode == DFmode].fpconst; -+ else if (!aarch64_float_const_zero_rtx_p (x)) -+ { -+ /* This will be a load from memory. */ -+ if (mode == DFmode) -+ *cost += extra_cost->ldst.loadd; -+ else -+ *cost += extra_cost->ldst.loadf; -+ } -+ else -+ /* Otherwise this is +0.0. We get this using MOVI d0, #0 -+ or MOV v0.s[0], wzr - neither of which are modeled by the -+ cost tables. Just use the default cost. */ -+ { -+ } -+ } -+ -+ return true; -+ - case MEM: - if (speed) -- *cost += extra_cost->ldst.load; -+ { -+ /* For loads we want the base cost of a load, plus an -+ approximation for the additional cost of the addressing -+ mode. */ -+ rtx address = XEXP (x, 0); -+ if (GET_MODE_CLASS (mode) == MODE_INT) -+ *cost += extra_cost->ldst.load; -+ else if (mode == SFmode) -+ *cost += extra_cost->ldst.loadf; -+ else if (mode == DFmode) -+ *cost += extra_cost->ldst.loadd; + const struct tune_params arm_cortex_a9_tune = +@@ -1683,7 +1825,8 @@ + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ +- false /* Prefer Neon for 64-bits bitops. */ ++ false, /* Prefer Neon for 64-bits bitops. */ ++ false, false /* Prefer 32-bit encodings. */ + }; -+ *cost += -+ COSTS_N_INSNS (aarch64_address_cost (address, mode, -+ 0, speed)); -+ } -+ - return true; + const struct tune_params arm_cortex_a12_tune = +@@ -1699,7 +1842,8 @@ + true, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ +- false /* Prefer Neon for 64-bits bitops. */ ++ false, /* Prefer Neon for 64-bits bitops. */ ++ false, false /* Prefer 32-bit encodings. */ + }; - case NEG: -- op0 = CONST0_RTX (GET_MODE (x)); -- op1 = XEXP (x, 0); -- goto cost_minus; -+ op0 = XEXP (x, 0); + /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single +@@ -1722,7 +1866,8 @@ + false, /* Prefer LDRD/STRD. */ + {false, false}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ +- false /* Prefer Neon for 64-bits bitops. */ ++ false, /* Prefer Neon for 64-bits bitops. */ ++ false, false /* Prefer 32-bit encodings. */ + }; -+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) -+ { -+ if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE -+ || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE) -+ { -+ /* CSETM. */ -+ *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed); -+ return true; -+ } -+ -+ /* Cost this as SUB wzr, X. */ -+ op0 = CONST0_RTX (GET_MODE (x)); -+ op1 = XEXP (x, 0); -+ goto cost_minus; -+ } -+ -+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) -+ { -+ /* Support (neg(fma...)) as a single instruction only if -+ sign of zeros is unimportant. This matches the decision -+ making in aarch64.md. */ -+ if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0))) -+ { -+ /* FNMADD. */ -+ *cost = rtx_cost (op0, NEG, 0, speed); -+ return true; -+ } -+ if (speed) -+ /* FNEG. */ -+ *cost += extra_cost->fp[mode == DFmode].neg; -+ return false; -+ } -+ -+ return false; -+ - case COMPARE: - op0 = XEXP (x, 0); - op1 = XEXP (x, 1); -@@ -4577,96 +5146,228 @@ - goto cost_logic; - } + /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than +@@ -1740,7 +1885,8 @@ + false, /* Prefer LDRD/STRD. */ + {false, false}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ +- false /* Prefer Neon for 64-bits bitops. */ ++ false, /* Prefer Neon for 64-bits bitops. */ ++ false, false /* Prefer 32-bit encodings. */ + }; + + const struct tune_params arm_fa726te_tune = +@@ -1756,7 +1902,8 @@ + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ +- false /* Prefer Neon for 64-bits bitops. */ ++ false, /* Prefer Neon for 64-bits bitops. */ ++ false, false /* Prefer 32-bit encodings. */ + }; -- /* Comparisons can work if the order is swapped. -- Canonicalization puts the more complex operation first, but -- we want it in op1. */ -- if (! (REG_P (op0) -- || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0))))) -- { -- op0 = XEXP (x, 1); -- op1 = XEXP (x, 0); -- } -- goto cost_minus; -+ if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT) -+ { -+ /* TODO: A write to the CC flags possibly costs extra, this -+ needs encoding in the cost tables. */ -+ /* CC_ZESWPmode supports zero extend for free. */ -+ if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND) -+ op0 = XEXP (op0, 0); -+ -+ /* ANDS. */ -+ if (GET_CODE (op0) == AND) -+ { -+ x = op0; -+ goto cost_logic; -+ } -+ -+ if (GET_CODE (op0) == PLUS) -+ { -+ /* ADDS (and CMN alias). */ -+ x = op0; -+ goto cost_plus; -+ } -+ -+ if (GET_CODE (op0) == MINUS) -+ { -+ /* SUBS. */ -+ x = op0; -+ goto cost_minus; -+ } -+ -+ if (GET_CODE (op1) == NEG) +@@ -2807,7 +2954,7 @@ + prefer_neon_for_64bits = true; + + /* Use the alternative scheduling-pressure algorithm by default. */ +- maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2, ++ maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL, + global_options.x_param_values, + global_options_set.x_param_values); + +@@ -6080,11 +6227,6 @@ + if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl)) + return false; + +- /* Cannot tail-call to long calls, since these are out of range of +- a branch instruction. */ +- if (decl && arm_is_long_call_p (decl)) +- return false; +- + /* If we are interworking and the function is not declared static + then we can't tail-call it unless we know that it exists in this + compilation unit (since it might be a Thumb routine). */ +@@ -9338,6 +9480,47 @@ + *cost = LIBCALL_COST (2); + return false; + ++ case BSWAP: ++ if (arm_arch6) ++ { ++ if (mode == SImode) + { -+ /* CMN. */ -+ if (speed) -+ *cost += extra_cost->alu.arith; ++ *cost = COSTS_N_INSNS (1); ++ if (speed_p) ++ *cost += extra_cost->alu.rev; + -+ *cost += rtx_cost (op0, COMPARE, 0, speed); -+ *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed); -+ return true; ++ return false; + } -+ -+ /* CMP. -+ -+ Compare can freely swap the order of operands, and -+ canonicalization puts the more complex operation first. -+ But the integer MINUS logic expects the shift/extend -+ operation in op1. */ -+ if (! (REG_P (op0) -+ || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0))))) -+ { -+ op0 = XEXP (x, 1); -+ op1 = XEXP (x, 0); -+ } -+ goto cost_minus; + } -+ -+ if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT) ++ else + { -+ /* FCMP. */ -+ if (speed) -+ *cost += extra_cost->fp[mode == DFmode].compare; ++ /* No rev instruction available. Look at arm_legacy_rev ++ and thumb_legacy_rev for the form of RTL used then. */ ++ if (TARGET_THUMB) ++ { ++ *cost = COSTS_N_INSNS (10); + -+ if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1)) ++ if (speed_p) ++ { ++ *cost += 6 * extra_cost->alu.shift; ++ *cost += 3 * extra_cost->alu.logical; ++ } ++ } ++ else + { -+ /* FCMP supports constant 0.0 for no extra cost. */ -+ return true; ++ *cost = COSTS_N_INSNS (5); ++ ++ if (speed_p) ++ { ++ *cost += 2 * extra_cost->alu.shift; ++ *cost += extra_cost->alu.arith_shift; ++ *cost += 2 * extra_cost->alu.logical; ++ } + } -+ return false; ++ return true; + } -+ + return false; + case MINUS: -- op0 = XEXP (x, 0); -- op1 = XEXP (x, 1); -+ { -+ op0 = XEXP (x, 0); -+ op1 = XEXP (x, 1); - -- cost_minus: -- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT -- || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC -- && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)) -- { -- if (op0 != const0_rtx) -+cost_minus: -+ /* Detect valid immediates. */ -+ if ((GET_MODE_CLASS (mode) == MODE_INT -+ || (GET_MODE_CLASS (mode) == MODE_CC -+ && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)) -+ && CONST_INT_P (op1) -+ && aarch64_uimm12_shift (INTVAL (op1))) -+ { - *cost += rtx_cost (op0, MINUS, 0, speed); - -- if (CONST_INT_P (op1)) -- { -- if (!aarch64_uimm12_shift (INTVAL (op1))) -- *cost += rtx_cost (op1, MINUS, 1, speed); -- } -- else -- { -- op1 = aarch64_strip_shift_or_extend (op1); -- *cost += rtx_cost (op1, MINUS, 1, speed); -- } -- return true; -- } -+ if (speed) -+ /* SUB(S) (immediate). */ -+ *cost += extra_cost->alu.arith; -+ return true; + if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT + && (mode == SFmode || !TARGET_VFP_SINGLE)) +@@ -9720,8 +9903,17 @@ + /* Vector mode? */ + *cost = LIBCALL_COST (2); + return false; ++ case IOR: ++ if (mode == SImode && arm_arch6 && aarch_rev16_p (x)) ++ { ++ *cost = COSTS_N_INSNS (1); ++ if (speed_p) ++ *cost += extra_cost->alu.rev; -- return false; -+ } +- case AND: case XOR: case IOR: ++ return true; ++ } ++ /* Fall through. */ ++ case AND: case XOR: + if (mode == SImode) + { + enum rtx_code subcode = GET_CODE (XEXP (x, 0)); +@@ -10620,6 +10812,36 @@ + *cost = LIBCALL_COST (1); + return false; -+ /* Look for SUB (extended register). */ -+ if (aarch64_rtx_arith_op_extract_p (op1, mode)) -+ { -+ if (speed) -+ *cost += extra_cost->alu.arith_shift; ++ case FMA: ++ if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA) ++ { ++ rtx op0 = XEXP (x, 0); ++ rtx op1 = XEXP (x, 1); ++ rtx op2 = XEXP (x, 2); + -+ *cost += rtx_cost (XEXP (XEXP (op1, 0), 0), -+ (enum rtx_code) GET_CODE (op1), -+ 0, speed); -+ return true; -+ } ++ *cost = COSTS_N_INSNS (1); + -+ rtx new_op1 = aarch64_strip_extend (op1); ++ /* vfms or vfnma. */ ++ if (GET_CODE (op0) == NEG) ++ op0 = XEXP (op0, 0); + -+ /* Cost this as an FMA-alike operation. */ -+ if ((GET_CODE (new_op1) == MULT -+ || GET_CODE (new_op1) == ASHIFT) -+ && code != COMPARE) -+ { -+ *cost += aarch64_rtx_mult_cost (new_op1, MULT, -+ (enum rtx_code) code, -+ speed); -+ *cost += rtx_cost (op0, MINUS, 0, speed); -+ return true; -+ } ++ /* vfnms or vfnma. */ ++ if (GET_CODE (op2) == NEG) ++ op2 = XEXP (op2, 0); + -+ *cost += rtx_cost (new_op1, MINUS, 1, speed); ++ *cost += rtx_cost (op0, FMA, 0, speed_p); ++ *cost += rtx_cost (op1, FMA, 1, speed_p); ++ *cost += rtx_cost (op2, FMA, 2, speed_p); + -+ if (speed) -+ { -+ if (GET_MODE_CLASS (mode) == MODE_INT) -+ /* SUB(S). */ -+ *cost += extra_cost->alu.arith; -+ else if (GET_MODE_CLASS (mode) == MODE_FLOAT) -+ /* FSUB. */ -+ *cost += extra_cost->fp[mode == DFmode].addsub; -+ } -+ return true; ++ if (speed_p) ++ *cost += extra_cost->fp[mode ==DFmode].fma; ++ ++ return true; ++ } ++ ++ *cost = LIBCALL_COST (3); ++ return false; ++ + case FIX: + case UNSIGNED_FIX: + if (TARGET_HARD_FLOAT) +@@ -10670,10 +10892,16 @@ + return true; + + case ASM_OPERANDS: +- /* Just a guess. Cost one insn per input. */ +- *cost = COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x)); +- return true; ++ { ++ /* Just a guess. Guess number of instructions in the asm ++ plus one insn per input. Always a minimum of COSTS_N_INSNS (1) ++ though (see PR60663). */ ++ int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x))); ++ int num_operands = ASM_OPERANDS_INPUT_LENGTH (x); + ++ *cost = COSTS_N_INSNS (asm_length + num_operands); ++ return true; + } + default: + if (mode != VOIDmode) + *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode)); +@@ -16788,9 +17016,20 @@ + compute_bb_for_insn (); + df_analyze (); + ++ enum Convert_Action {SKIP, CONV, SWAP_CONV}; + - case PLUS: -- op0 = XEXP (x, 0); -- op1 = XEXP (x, 1); -+ { -+ rtx new_op0; + FOR_EACH_BB_FN (bb, cfun) + { ++ if (current_tune->disparage_flag_setting_t16_encodings ++ && optimize_bb_for_speed_p (bb)) ++ continue; ++ + rtx insn; ++ Convert_Action action = SKIP; ++ Convert_Action action_for_partial_flag_setting ++ = (current_tune->disparage_partial_flag_setting_t16_encodings ++ && optimize_bb_for_speed_p (bb)) ++ ? SKIP : CONV; -- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) -- { -- if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1))) -- { -- *cost += rtx_cost (op0, PLUS, 0, speed); -- } -- else -- { -- rtx new_op0 = aarch64_strip_shift_or_extend (op0); -+ op0 = XEXP (x, 0); -+ op1 = XEXP (x, 1); + COPY_REG_SET (&live, DF_LR_OUT (bb)); + df_simulate_initialize_backwards (bb, &live); +@@ -16800,7 +17039,7 @@ + && !REGNO_REG_SET_P (&live, CC_REGNUM) + && GET_CODE (PATTERN (insn)) == SET) + { +- enum {SKIP, CONV, SWAP_CONV} action = SKIP; ++ action = SKIP; + rtx pat = PATTERN (insn); + rtx dst = XEXP (pat, 0); + rtx src = XEXP (pat, 1); +@@ -16881,10 +17120,11 @@ + /* ANDS , */ + if (rtx_equal_p (dst, op0) + && low_register_operand (op1, SImode)) +- action = CONV; ++ action = action_for_partial_flag_setting; + else if (rtx_equal_p (dst, op1) + && low_register_operand (op0, SImode)) +- action = SWAP_CONV; ++ action = action_for_partial_flag_setting == SKIP ++ ? SKIP : SWAP_CONV; + break; -- if (new_op0 == op0 -- && GET_CODE (op0) == MULT) -- { -- if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND -- && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND) -- || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND -- && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND)) -- { -- *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0, -- speed) -- + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1, -- speed) -- + rtx_cost (op1, PLUS, 1, speed)); -- if (speed) -- *cost += -- extra_cost->mult[GET_MODE (x) == DImode].extend_add; -- return true; -- } -+cost_plus: -+ if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE -+ || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE) -+ { -+ /* CSINC. */ -+ *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed); -+ *cost += rtx_cost (op1, PLUS, 1, speed); -+ return true; -+ } + case ASHIFTRT: +@@ -16895,7 +17135,7 @@ + /* LSLS , */ + if (rtx_equal_p (dst, op0) + && low_register_operand (op1, SImode)) +- action = CONV; ++ action = action_for_partial_flag_setting; + /* ASRS ,,# */ + /* LSRS ,,# */ + /* LSLS ,,# */ +@@ -16902,7 +17142,7 @@ + else if (low_register_operand (op0, SImode) + && CONST_INT_P (op1) + && IN_RANGE (INTVAL (op1), 0, 31)) +- action = CONV; ++ action = action_for_partial_flag_setting; + break; -- *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed) -- + rtx_cost (XEXP (op0, 1), MULT, 1, speed) -- + rtx_cost (op1, PLUS, 1, speed)); -+ if (GET_MODE_CLASS (mode) == MODE_INT -+ && CONST_INT_P (op1) -+ && aarch64_uimm12_shift (INTVAL (op1))) -+ { -+ *cost += rtx_cost (op0, PLUS, 0, speed); + case ROTATERT: +@@ -16909,12 +17149,16 @@ + /* RORS , */ + if (rtx_equal_p (dst, op0) + && low_register_operand (op1, SImode)) +- action = CONV; ++ action = action_for_partial_flag_setting; + break; -- if (speed) -- *cost += extra_cost->mult[GET_MODE (x) == DImode].add; -+ if (speed) -+ /* ADD (immediate). */ -+ *cost += extra_cost->alu.arith; -+ return true; -+ } + case NOT: ++ /* MVNS , */ ++ if (low_register_operand (op0, SImode)) ++ action = action_for_partial_flag_setting; ++ break; ++ + case NEG: +- /* MVNS , */ + /* NEGS , (a.k.a RSBS) */ + if (low_register_operand (op0, SImode)) + action = CONV; +@@ -16924,7 +17168,7 @@ + /* MOVS ,# */ + if (CONST_INT_P (src) + && IN_RANGE (INTVAL (src), 0, 255)) +- action = CONV; ++ action = action_for_partial_flag_setting; + break; -- return true; -- } -+ /* Look for ADD (extended register). */ -+ if (aarch64_rtx_arith_op_extract_p (op0, mode)) -+ { -+ if (speed) -+ *cost += extra_cost->alu.arith_shift; + case REG: +@@ -21040,7 +21284,15 @@ + } -- *cost += (rtx_cost (new_op0, PLUS, 0, speed) -- + rtx_cost (op1, PLUS, 1, speed)); -- } -- return true; -- } -+ *cost += rtx_cost (XEXP (XEXP (op0, 0), 0), -+ (enum rtx_code) GET_CODE (op0), -+ 0, speed); -+ return true; -+ } -+ /* Strip any extend, leave shifts behind as we will -+ cost them through mult_cost. */ -+ new_op0 = aarch64_strip_extend (op0); +-/* If CODE is 'd', then the X is a condition operand and the instruction ++/* Globally reserved letters: acln ++ Puncutation letters currently used: @_|?().!# ++ Lower case letters currently used: bcdefhimpqtvwxyz ++ Upper case letters currently used: ABCDFGHJKLMNOPQRSTU ++ Letters previously used, but now deprecated/obsolete: sVWXYZ. + -+ if (GET_CODE (new_op0) == MULT -+ || GET_CODE (new_op0) == ASHIFT) -+ { -+ *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS, -+ speed); -+ *cost += rtx_cost (op1, PLUS, 1, speed); -+ return true; -+ } ++ Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P. + -+ *cost += (rtx_cost (new_op0, PLUS, 0, speed) -+ + rtx_cost (op1, PLUS, 1, speed)); ++ If CODE is 'd', then the X is a condition operand and the instruction + should only be executed if the condition is true. + if CODE is 'D', then the X is a condition operand and the instruction + should only be executed if the condition is false: however, if the mode +@@ -21180,6 +21432,19 @@ + } + return; + ++ case 'b': ++ /* Print the log2 of a CONST_INT. */ ++ { ++ HOST_WIDE_INT val; + -+ if (speed) -+ { -+ if (GET_MODE_CLASS (mode) == MODE_INT) -+ /* ADD. */ -+ *cost += extra_cost->alu.arith; -+ else if (GET_MODE_CLASS (mode) == MODE_FLOAT) -+ /* FADD. */ -+ *cost += extra_cost->fp[mode == DFmode].addsub; -+ } -+ return true; ++ if (!CONST_INT_P (x) ++ || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0) ++ output_operand_lossage ("Unsupported operand for code '%c'", code); ++ else ++ fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val); + } ++ return; + -+ case BSWAP: -+ *cost = COSTS_N_INSNS (1); -+ -+ if (speed) -+ *cost += extra_cost->alu.rev; -+ - return false; + case 'L': + /* The low 16 bits of an immediate constant. */ + fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff); +@@ -21422,7 +21687,7 @@ + register. */ + case 'p': + { +- int mode = GET_MODE (x); ++ enum machine_mode mode = GET_MODE (x); + int regno; - case IOR: -+ if (aarch_rev16_p (x)) -+ { -+ *cost = COSTS_N_INSNS (1); -+ -+ if (speed) -+ *cost += extra_cost->alu.rev; -+ -+ return true; -+ } -+ /* Fall through. */ - case XOR: - case AND: - cost_logic: -@@ -4673,117 +5374,252 @@ - op0 = XEXP (x, 0); - op1 = XEXP (x, 1); + if (GET_MODE_SIZE (mode) != 8 || !REG_P (x)) +@@ -21446,7 +21711,7 @@ + case 'P': + case 'q': + { +- int mode = GET_MODE (x); ++ enum machine_mode mode = GET_MODE (x); + int is_quad = (code == 'q'); + int regno; + +@@ -21482,7 +21747,7 @@ + case 'e': + case 'f': + { +- int mode = GET_MODE (x); ++ enum machine_mode mode = GET_MODE (x); + int regno; + + if ((GET_MODE_SIZE (mode) != 16 +@@ -21615,7 +21880,7 @@ + /* Translate an S register number into a D register number and element index. */ + case 'y': + { +- int mode = GET_MODE (x); ++ enum machine_mode mode = GET_MODE (x); + int regno; + + if (GET_MODE_SIZE (mode) != 4 || !REG_P (x)) +@@ -21649,7 +21914,7 @@ + number into a D register number and element index. */ + case 'z': + { +- int mode = GET_MODE (x); ++ enum machine_mode mode = GET_MODE (x); + int regno; + + if (GET_MODE_SIZE (mode) != 2 || !REG_P (x)) +@@ -22610,13 +22875,20 @@ + } + + /* We allow almost any value to be stored in the general registers. +- Restrict doubleword quantities to even register pairs so that we can +- use ldrd. Do not allow very large Neon structure opaque modes in +- general registers; they would use too many. */ ++ Restrict doubleword quantities to even register pairs in ARM state ++ so that we can use ldrd. Do not allow very large Neon structure ++ opaque modes in general registers; they would use too many. */ + if (regno <= LAST_ARM_REGNUM) +- return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0) +- && ARM_NUM_REGS (mode) <= 4; ++ { ++ if (ARM_NUM_REGS (mode) > 4) ++ return FALSE; -+ if (code == AND -+ && GET_CODE (op0) == MULT -+ && CONST_INT_P (XEXP (op0, 1)) -+ && CONST_INT_P (op1) -+ && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))), -+ INTVAL (op1)) != 0) -+ { -+ /* This is a UBFM/SBFM. */ -+ *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed); -+ if (speed) -+ *cost += extra_cost->alu.bfx; -+ return true; -+ } ++ if (TARGET_THUMB2) ++ return TRUE; + - if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) - { -+ /* We possibly get the immediate for free, this is not -+ modelled. */ - if (CONST_INT_P (op1) - && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x))) - { -- *cost += rtx_cost (op0, AND, 0, speed); -+ *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed); ++ return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0); ++ } + -+ if (speed) -+ *cost += extra_cost->alu.logical; + if (regno == FRAME_POINTER_REGNUM + || regno == ARG_POINTER_REGNUM) + /* We only allow integers in the fake hard registers. */ +@@ -22827,6 +23099,7 @@ + NEON_BINOP, + NEON_TERNOP, + NEON_UNOP, ++ NEON_BSWAP, + NEON_GETLANE, + NEON_SETLANE, + NEON_CREATE, +@@ -22848,7 +23121,6 @@ + NEON_FLOAT_NARROW, + NEON_FIXCONV, + NEON_SELECT, +- NEON_RESULTPAIR, + NEON_REINTERP, + NEON_VTBL, + NEON_VTBX, +@@ -23217,6 +23489,9 @@ + ARM_BUILTIN_CRC32CH, + ARM_BUILTIN_CRC32CW, + ++ ARM_BUILTIN_GET_FPSCR, ++ ARM_BUILTIN_SET_FPSCR, ++ + #undef CRYPTO1 + #undef CRYPTO2 + #undef CRYPTO3 +@@ -23294,14 +23569,19 @@ + + tree V8QI_type_node; + tree V4HI_type_node; ++ tree V4UHI_type_node; + tree V4HF_type_node; + tree V2SI_type_node; ++ tree V2USI_type_node; + tree V2SF_type_node; + tree V16QI_type_node; + tree V8HI_type_node; ++ tree V8UHI_type_node; + tree V4SI_type_node; ++ tree V4USI_type_node; + tree V4SF_type_node; + tree V2DI_type_node; ++ tree V2UDI_type_node; + + tree intUQI_type_node; + tree intUHI_type_node; +@@ -23313,27 +23593,6 @@ + tree intCI_type_node; + tree intXI_type_node; + +- tree V8QI_pointer_node; +- tree V4HI_pointer_node; +- tree V2SI_pointer_node; +- tree V2SF_pointer_node; +- tree V16QI_pointer_node; +- tree V8HI_pointer_node; +- tree V4SI_pointer_node; +- tree V4SF_pointer_node; +- tree V2DI_pointer_node; +- +- tree void_ftype_pv8qi_v8qi_v8qi; +- tree void_ftype_pv4hi_v4hi_v4hi; +- tree void_ftype_pv2si_v2si_v2si; +- tree void_ftype_pv2sf_v2sf_v2sf; +- tree void_ftype_pdi_di_di; +- tree void_ftype_pv16qi_v16qi_v16qi; +- tree void_ftype_pv8hi_v8hi_v8hi; +- tree void_ftype_pv4si_v4si_v4si; +- tree void_ftype_pv4sf_v4sf_v4sf; +- tree void_ftype_pv2di_v2di_v2di; +- + tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES]; + tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES]; + tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES]; +@@ -23397,6 +23656,12 @@ + const_intDI_pointer_node = build_pointer_type (const_intDI_node); + const_float_pointer_node = build_pointer_type (const_float_node); + ++ /* Unsigned integer types for various mode sizes. */ ++ intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode)); ++ intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode)); ++ intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode)); ++ intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode)); ++ neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode)); + /* Now create vector types based on our NEON element types. */ + /* 64-bit vectors. */ + V8QI_type_node = +@@ -23403,10 +23668,14 @@ + build_vector_type_for_mode (neon_intQI_type_node, V8QImode); + V4HI_type_node = + build_vector_type_for_mode (neon_intHI_type_node, V4HImode); ++ V4UHI_type_node = ++ build_vector_type_for_mode (intUHI_type_node, V4HImode); + V4HF_type_node = + build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode); + V2SI_type_node = + build_vector_type_for_mode (neon_intSI_type_node, V2SImode); ++ V2USI_type_node = ++ build_vector_type_for_mode (intUSI_type_node, V2SImode); + V2SF_type_node = + build_vector_type_for_mode (neon_float_type_node, V2SFmode); + /* 128-bit vectors. */ +@@ -23414,21 +23683,20 @@ + build_vector_type_for_mode (neon_intQI_type_node, V16QImode); + V8HI_type_node = + build_vector_type_for_mode (neon_intHI_type_node, V8HImode); ++ V8UHI_type_node = ++ build_vector_type_for_mode (intUHI_type_node, V8HImode); + V4SI_type_node = + build_vector_type_for_mode (neon_intSI_type_node, V4SImode); ++ V4USI_type_node = ++ build_vector_type_for_mode (intUSI_type_node, V4SImode); + V4SF_type_node = + build_vector_type_for_mode (neon_float_type_node, V4SFmode); + V2DI_type_node = + build_vector_type_for_mode (neon_intDI_type_node, V2DImode); ++ V2UDI_type_node = ++ build_vector_type_for_mode (intUDI_type_node, V2DImode); + +- /* Unsigned integer types for various mode sizes. */ +- intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode)); +- intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode)); +- intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode)); +- intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode)); +- neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode)); + +- + (*lang_hooks.types.register_builtin_type) (intUQI_type_node, + "__builtin_neon_uqi"); + (*lang_hooks.types.register_builtin_type) (intUHI_type_node, +@@ -23459,53 +23727,8 @@ + (*lang_hooks.types.register_builtin_type) (intXI_type_node, + "__builtin_neon_xi"); + +- /* Pointers to vector types. */ +- V8QI_pointer_node = build_pointer_type (V8QI_type_node); +- V4HI_pointer_node = build_pointer_type (V4HI_type_node); +- V2SI_pointer_node = build_pointer_type (V2SI_type_node); +- V2SF_pointer_node = build_pointer_type (V2SF_type_node); +- V16QI_pointer_node = build_pointer_type (V16QI_type_node); +- V8HI_pointer_node = build_pointer_type (V8HI_type_node); +- V4SI_pointer_node = build_pointer_type (V4SI_type_node); +- V4SF_pointer_node = build_pointer_type (V4SF_type_node); +- V2DI_pointer_node = build_pointer_type (V2DI_type_node); +- +- /* Operations which return results as pairs. */ +- void_ftype_pv8qi_v8qi_v8qi = +- build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node, +- V8QI_type_node, NULL); +- void_ftype_pv4hi_v4hi_v4hi = +- build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node, +- V4HI_type_node, NULL); +- void_ftype_pv2si_v2si_v2si = +- build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node, +- V2SI_type_node, NULL); +- void_ftype_pv2sf_v2sf_v2sf = +- build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node, +- V2SF_type_node, NULL); +- void_ftype_pdi_di_di = +- build_function_type_list (void_type_node, intDI_pointer_node, +- neon_intDI_type_node, neon_intDI_type_node, NULL); +- void_ftype_pv16qi_v16qi_v16qi = +- build_function_type_list (void_type_node, V16QI_pointer_node, +- V16QI_type_node, V16QI_type_node, NULL); +- void_ftype_pv8hi_v8hi_v8hi = +- build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node, +- V8HI_type_node, NULL); +- void_ftype_pv4si_v4si_v4si = +- build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node, +- V4SI_type_node, NULL); +- void_ftype_pv4sf_v4sf_v4sf = +- build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node, +- V4SF_type_node, NULL); +- void_ftype_pv2di_v2di_v2di = +- build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node, +- V2DI_type_node, NULL); +- + if (TARGET_CRYPTO && TARGET_HARD_FLOAT) + { +- tree V4USI_type_node = +- build_vector_type_for_mode (intUSI_type_node, V4SImode); + + tree V16UQI_type_node = + build_vector_type_for_mode (intUQI_type_node, V16QImode); +@@ -23791,25 +24014,6 @@ + } + break; + +- case NEON_RESULTPAIR: +- { +- switch (insn_data[d->code].operand[1].mode) +- { +- case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break; +- case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break; +- case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break; +- case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break; +- case DImode: ftype = void_ftype_pdi_di_di; break; +- case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break; +- case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break; +- case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break; +- case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break; +- case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break; +- default: gcc_unreachable (); +- } +- } +- break; +- + case NEON_REINTERP: + { + /* We iterate over NUM_DREG_TYPES doubleword types, +@@ -23869,6 +24073,31 @@ + ftype = build_function_type_list (return_type, eltype, NULL); + break; + } ++ case NEON_BSWAP: ++ { ++ tree eltype = NULL_TREE; ++ switch (insn_data[d->code].operand[1].mode) ++ { ++ case V4HImode: ++ eltype = V4UHI_type_node; ++ break; ++ case V8HImode: ++ eltype = V8UHI_type_node; ++ break; ++ case V2SImode: ++ eltype = V2USI_type_node; ++ break; ++ case V4SImode: ++ eltype = V4USI_type_node; ++ break; ++ case V2DImode: ++ eltype = V2UDI_type_node; ++ break; ++ default: gcc_unreachable (); ++ } ++ ftype = build_function_type_list (eltype, eltype, NULL); ++ break; ++ } + default: + gcc_unreachable (); + } +@@ -24015,6 +24244,15 @@ + IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ) + IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ) + + -+ return true; - } - else - { -+ rtx new_op0 = op0; ++#define FP_BUILTIN(L, U) \ ++ {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \ ++ UNKNOWN, 0}, + -+ /* Handle ORN, EON, or BIC. */ - if (GET_CODE (op0) == NOT) - op0 = XEXP (op0, 0); -- op0 = aarch64_strip_shift (op0); -- *cost += (rtx_cost (op0, AND, 0, speed) -- + rtx_cost (op1, AND, 1, speed)); ++ FP_BUILTIN (set_fpscr, GET_FPSCR) ++ FP_BUILTIN (get_fpscr, SET_FPSCR) ++#undef FP_BUILTIN + -+ new_op0 = aarch64_strip_shift (op0); + #define CRC32_BUILTIN(L, U) \ + {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \ + UNKNOWN, 0}, +@@ -24529,6 +24767,21 @@ + + if (TARGET_CRC32) + arm_init_crc32_builtins (); + -+ /* If we had a shift on op0 then this is a logical-shift- -+ by-register/immediate operation. Otherwise, this is just -+ a logical operation. */ -+ if (speed) -+ { -+ if (new_op0 != op0) -+ { -+ /* Shift by immediate. */ -+ if (CONST_INT_P (XEXP (op0, 1))) -+ *cost += extra_cost->alu.log_shift; -+ else -+ *cost += extra_cost->alu.log_shift_reg; -+ } -+ else -+ *cost += extra_cost->alu.logical; -+ } ++ if (TARGET_VFP) ++ { ++ tree ftype_set_fpscr ++ = build_function_type_list (void_type_node, unsigned_type_node, NULL); ++ tree ftype_get_fpscr ++ = build_function_type_list (unsigned_type_node, NULL); ++ ++ arm_builtin_decls[ARM_BUILTIN_GET_FPSCR] ++ = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr, ++ ARM_BUILTIN_GET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE); ++ arm_builtin_decls[ARM_BUILTIN_SET_FPSCR] ++ = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr, ++ ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE); ++ } + } + + /* Return the ARM builtin for CODE. */ +@@ -25043,6 +25296,7 @@ + case NEON_SPLIT: + case NEON_FLOAT_WIDEN: + case NEON_FLOAT_NARROW: ++ case NEON_BSWAP: + case NEON_REINTERP: + return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, + NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); +@@ -25052,11 +25306,6 @@ + return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, + NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); + +- case NEON_RESULTPAIR: +- return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode, +- NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, +- NEON_ARG_STOP); +- + case NEON_LANEMUL: + case NEON_LANEMULL: + case NEON_LANEMULH: +@@ -25118,24 +25367,6 @@ + emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src)); + } + +-/* Emit code to place a Neon pair result in memory locations (with equal +- registers). */ +-void +-neon_emit_pair_result_insn (enum machine_mode mode, +- rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr, +- rtx op1, rtx op2) +-{ +- rtx mem = gen_rtx_MEM (mode, destaddr); +- rtx tmp1 = gen_reg_rtx (mode); +- rtx tmp2 = gen_reg_rtx (mode); +- +- emit_insn (intfn (tmp1, op1, op2, tmp2)); +- +- emit_move_insn (mem, tmp1); +- mem = adjust_address (mem, mode, GET_MODE_SIZE (mode)); +- emit_move_insn (mem, tmp2); +-} +- + /* Set up OPERANDS for a register copy from SRC to DEST, taking care + not to early-clobber SRC registers in the process. + +@@ -25256,6 +25487,25 @@ + + switch (fcode) + { ++ case ARM_BUILTIN_GET_FPSCR: ++ case ARM_BUILTIN_SET_FPSCR: ++ if (fcode == ARM_BUILTIN_GET_FPSCR) ++ { ++ icode = CODE_FOR_get_fpscr; ++ target = gen_reg_rtx (SImode); ++ pat = GEN_FCN (icode) (target); ++ } ++ else ++ { ++ target = NULL_RTX; ++ icode = CODE_FOR_set_fpscr; ++ arg0 = CALL_EXPR_ARG (exp, 0); ++ op0 = expand_normal (arg0); ++ pat = GEN_FCN (icode) (op0); ++ } ++ emit_insn (pat); ++ return target; + -+ /* In both cases we want to cost both operands. */ -+ *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed) -+ + rtx_cost (op1, (enum rtx_code) code, 1, speed); + case ARM_BUILTIN_TEXTRMSB: + case ARM_BUILTIN_TEXTRMUB: + case ARM_BUILTIN_TEXTRMSH: +@@ -25889,7 +26139,7 @@ + int pops_needed; + unsigned available; + unsigned required; +- int mode; ++ enum machine_mode mode; + int size; + int restore_a4 = FALSE; + +@@ -29550,8 +29800,7 @@ + int in_n, out_n; + + if (TREE_CODE (type_out) != VECTOR_TYPE +- || TREE_CODE (type_in) != VECTOR_TYPE +- || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations)) ++ || TREE_CODE (type_in) != VECTOR_TYPE) + return NULL_TREE; + + out_mode = TYPE_MODE (TREE_TYPE (type_out)); +@@ -29563,7 +29812,13 @@ + decl of the vectorized builtin for the appropriate vector mode. + NULL_TREE is returned if no such builtin is available. */ + #undef ARM_CHECK_BUILTIN_MODE +-#define ARM_CHECK_BUILTIN_MODE(C) \ ++#define ARM_CHECK_BUILTIN_MODE(C) \ ++ (TARGET_NEON && TARGET_FPU_ARMV8 \ ++ && flag_unsafe_math_optimizations \ ++ && ARM_CHECK_BUILTIN_MODE_1 (C)) ++ ++#undef ARM_CHECK_BUILTIN_MODE_1 ++#define ARM_CHECK_BUILTIN_MODE_1(C) \ + (out_mode == SFmode && out_n == C \ + && in_mode == SFmode && in_n == C) + +@@ -29588,6 +29843,30 @@ + return ARM_FIND_VRINT_VARIANT (vrintz); + case BUILT_IN_ROUNDF: + return ARM_FIND_VRINT_VARIANT (vrinta); ++#undef ARM_CHECK_BUILTIN_MODE ++#define ARM_CHECK_BUILTIN_MODE(C, N) \ ++ (out_mode == N##Imode && out_n == C \ ++ && in_mode == N##Imode && in_n == C) ++ case BUILT_IN_BSWAP16: ++ if (ARM_CHECK_BUILTIN_MODE (4, H)) ++ return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false); ++ else if (ARM_CHECK_BUILTIN_MODE (8, H)) ++ return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false); ++ else ++ return NULL_TREE; ++ case BUILT_IN_BSWAP32: ++ if (ARM_CHECK_BUILTIN_MODE (2, S)) ++ return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false); ++ else if (ARM_CHECK_BUILTIN_MODE (4, S)) ++ return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false); ++ else ++ return NULL_TREE; ++ case BUILT_IN_BSWAP64: ++ if (ARM_CHECK_BUILTIN_MODE (2, D)) ++ return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false); ++ else ++ return NULL_TREE; ++ + default: + return NULL_TREE; + } +@@ -31167,4 +31446,73 @@ + return false; + } + ++static void ++arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) ++{ ++ const unsigned ARM_FE_INVALID = 1; ++ const unsigned ARM_FE_DIVBYZERO = 2; ++ const unsigned ARM_FE_OVERFLOW = 4; ++ const unsigned ARM_FE_UNDERFLOW = 8; ++ const unsigned ARM_FE_INEXACT = 16; ++ const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT = (ARM_FE_INVALID ++ | ARM_FE_DIVBYZERO ++ | ARM_FE_OVERFLOW ++ | ARM_FE_UNDERFLOW ++ | ARM_FE_INEXACT); ++ const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT = 8; ++ tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv; ++ tree new_fenv_var, reload_fenv, restore_fnenv; ++ tree update_call, atomic_feraiseexcept, hold_fnclex; ++ ++ if (!TARGET_VFP) ++ return; ++ ++ /* Generate the equivalent of : ++ unsigned int fenv_var; ++ fenv_var = __builtin_arm_get_fpscr (); ++ ++ unsigned int masked_fenv; ++ masked_fenv = fenv_var & mask; ++ ++ __builtin_arm_set_fpscr (masked_fenv); */ ++ ++ fenv_var = create_tmp_var (unsigned_type_node, NULL); ++ get_fpscr = arm_builtin_decls[ARM_BUILTIN_GET_FPSCR]; ++ set_fpscr = arm_builtin_decls[ARM_BUILTIN_SET_FPSCR]; ++ mask = build_int_cst (unsigned_type_node, ++ ~((ARM_FE_ALL_EXCEPT << ARM_FE_EXCEPT_SHIFT) ++ | ARM_FE_ALL_EXCEPT)); ++ ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node, ++ fenv_var, build_call_expr (get_fpscr, 0)); ++ masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask); ++ hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv); ++ *hold = build2 (COMPOUND_EXPR, void_type_node, ++ build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv), ++ hold_fnclex); ++ ++ /* Store the value of masked_fenv to clear the exceptions: ++ __builtin_arm_set_fpscr (masked_fenv); */ ++ ++ *clear = build_call_expr (set_fpscr, 1, masked_fenv); ++ ++ /* Generate the equivalent of : ++ unsigned int new_fenv_var; ++ new_fenv_var = __builtin_arm_get_fpscr (); ++ ++ __builtin_arm_set_fpscr (fenv_var); ++ ++ __atomic_feraiseexcept (new_fenv_var); */ ++ ++ new_fenv_var = create_tmp_var (unsigned_type_node, NULL); ++ reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var, ++ build_call_expr (get_fpscr, 0)); ++ restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var); ++ atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); ++ update_call = build_call_expr (atomic_feraiseexcept, 1, ++ fold_convert (integer_type_node, new_fenv_var)); ++ *update = build2 (COMPOUND_EXPR, void_type_node, ++ build2 (COMPOUND_EXPR, void_type_node, ++ reload_fenv, restore_fnenv), update_call); ++} ++ + #include "gt-arm.h" +--- a/src/gcc/config/arm/unspecs.md ++++ b/src/gcc/config/arm/unspecs.md +@@ -143,6 +143,8 @@ + VUNSPEC_SLX ; Represent a store-register-release-exclusive. + VUNSPEC_LDA ; Represent a store-register-acquire. + VUNSPEC_STL ; Represent a store-register-release. ++ VUNSPEC_GET_FPSCR ; Represent fetch of FPSCR content. ++ VUNSPEC_SET_FPSCR ; Represent assign of FPSCR content. + ]) + + ;; Enumerators for NEON unspecs. +--- a/src/gcc/config/arm/arm-modes.def ++++ b/src/gcc/config/arm/arm-modes.def +@@ -21,9 +21,6 @@ + along with GCC; see the file COPYING3. If not see + . */ + +-/* Extended precision floating point. +- FIXME What format is this? */ +-FLOAT_MODE (XF, 12, 0); + + /* Half-precision floating point */ + FLOAT_MODE (HF, 2, 0); +--- a/src/gcc/config/arm/arm-cores.def ++++ b/src/gcc/config/arm/arm-cores.def +@@ -141,7 +141,7 @@ + ARM_CORE("generic-armv7-a", genericv7a, genericv7a, 7A, FL_LDSCHED, cortex) + ARM_CORE("cortex-a5", cortexa5, cortexa5, 7A, FL_LDSCHED, cortex_a5) + ARM_CORE("cortex-a7", cortexa7, cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a7) +-ARM_CORE("cortex-a8", cortexa8, cortexa8, 7A, FL_LDSCHED, cortex) ++ARM_CORE("cortex-a8", cortexa8, cortexa8, 7A, FL_LDSCHED, cortex_a8) + ARM_CORE("cortex-a9", cortexa9, cortexa9, 7A, FL_LDSCHED, cortex_a9) + ARM_CORE("cortex-a12", cortexa12, cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a12) + ARM_CORE("cortex-a15", cortexa15, cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15) +--- a/src/gcc/config/arm/arm-protos.h ++++ b/src/gcc/config/arm/arm-protos.h +@@ -272,6 +272,11 @@ + const struct cpu_vec_costs* vec_costs; + /* Prefer Neon for 64-bit bitops. */ + bool prefer_neon_for_64bits; ++ /* Prefer 32-bit encoding instead of flag-setting 16-bit encoding. */ ++ bool disparage_flag_setting_t16_encodings; ++ /* Prefer 32-bit encoding instead of 16-bit encoding where subset of flags ++ would be set. */ ++ bool disparage_partial_flag_setting_t16_encodings; + }; + + extern const struct tune_params *current_tune; +--- a/src/gcc/config/arm/vfp.md ++++ b/src/gcc/config/arm/vfp.md +@@ -100,7 +100,7 @@ + " + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no,no,no") +- (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_reg,load1,load1,store1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores") ++ (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load1,load1,store1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores") + (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4") + (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*") + (set_attr "neg_pool_range" "*,*,*,*,*, 0, 0,*,*,*,*,*,1008,*")] +@@ -1322,6 +1322,22 @@ + (set_attr "conds" "unconditional")] + ) + ++;; Write Floating-point Status and Control Register. ++(define_insn "set_fpscr" ++ [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] VUNSPEC_SET_FPSCR)] ++ "TARGET_VFP" ++ "mcr\\tp10, 7, %0, cr1, cr0, 0\\t @SET_FPSCR" ++ [(set_attr "type" "mrs")]) ++ ++;; Read Floating-point Status and Control Register. ++(define_insn "get_fpscr" ++ [(set (match_operand:SI 0 "register_operand" "=r") ++ (unspec_volatile:SI [(const_int 0)] VUNSPEC_GET_FPSCR))] ++ "TARGET_VFP" ++ "mrc\\tp10, 7, %0, cr1, cr0, 0\\t @GET_FPSCR" ++ [(set_attr "type" "mrs")]) ++ ++ + ;; Unimplemented insns: + ;; fldm* + ;; fstm* +--- a/src/gcc/config/arm/neon.md ++++ b/src/gcc/config/arm/neon.md +@@ -1842,9 +1842,9 @@ + ; good for plain vadd, vaddq. + + (define_expand "neon_vadd" +- [(match_operand:VDQX 0 "s_register_operand" "=w") +- (match_operand:VDQX 1 "s_register_operand" "w") +- (match_operand:VDQX 2 "s_register_operand" "w") ++ [(match_operand:VCVTF 0 "s_register_operand" "=w") ++ (match_operand:VCVTF 1 "s_register_operand" "w") ++ (match_operand:VCVTF 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_NEON" + { +@@ -1869,9 +1869,9 @@ + ; Used for intrinsics when flag_unsafe_math_optimizations is false. + + (define_insn "neon_vadd_unspec" +- [(set (match_operand:VDQX 0 "s_register_operand" "=w") +- (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") +- (match_operand:VDQX 2 "s_register_operand" "w")] ++ [(set (match_operand:VCVTF 0 "s_register_operand" "=w") ++ (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") ++ (match_operand:VCVTF 2 "s_register_operand" "w")] + UNSPEC_VADD))] + "TARGET_NEON" + "vadd.\t%0, %1, %2" +@@ -2132,9 +2132,9 @@ + ) + + (define_expand "neon_vsub" +- [(match_operand:VDQX 0 "s_register_operand" "=w") +- (match_operand:VDQX 1 "s_register_operand" "w") +- (match_operand:VDQX 2 "s_register_operand" "w") ++ [(match_operand:VCVTF 0 "s_register_operand" "=w") ++ (match_operand:VCVTF 1 "s_register_operand" "w") ++ (match_operand:VCVTF 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_NEON" + { +@@ -2149,9 +2149,9 @@ + ; Used for intrinsics when flag_unsafe_math_optimizations is false. + + (define_insn "neon_vsub_unspec" +- [(set (match_operand:VDQX 0 "s_register_operand" "=w") +- (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") +- (match_operand:VDQX 2 "s_register_operand" "w")] ++ [(set (match_operand:VCVTF 0 "s_register_operand" "=w") ++ (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") ++ (match_operand:VCVTF 2 "s_register_operand" "w")] + UNSPEC_VSUB))] + "TARGET_NEON" + "vsub.\t%0, %1, %2" +@@ -2547,6 +2547,14 @@ + [(set_attr "type" "neon_qabs")] + ) + ++(define_insn "neon_bswap" ++ [(set (match_operand:VDQHSD 0 "register_operand" "=w") ++ (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] ++ "TARGET_NEON" ++ "vrev.8\\t%0, %1" ++ [(set_attr "type" "neon_rev")] ++) + -+ return true; - } -- return true; - } - return false; + (define_expand "neon_vneg" + [(match_operand:VDQW 0 "s_register_operand" "") + (match_operand:VDQW 1 "s_register_operand" "") +@@ -4140,17 +4148,6 @@ + [(set_attr "type" "neon_permute")] + ) + +-(define_expand "neon_vtrn" +- [(match_operand:SI 0 "s_register_operand" "r") +- (match_operand:VDQW 1 "s_register_operand" "w") +- (match_operand:VDQW 2 "s_register_operand" "w")] +- "TARGET_NEON" +-{ +- neon_emit_pair_result_insn (mode, gen_neon_vtrn_internal, +- operands[0], operands[1], operands[2]); +- DONE; +-}) +- + (define_expand "neon_vzip_internal" + [(parallel + [(set (match_operand:VDQW 0 "s_register_operand" "") +@@ -4177,17 +4174,6 @@ + [(set_attr "type" "neon_zip")] + ) + +-(define_expand "neon_vzip" +- [(match_operand:SI 0 "s_register_operand" "r") +- (match_operand:VDQW 1 "s_register_operand" "w") +- (match_operand:VDQW 2 "s_register_operand" "w")] +- "TARGET_NEON" +-{ +- neon_emit_pair_result_insn (mode, gen_neon_vzip_internal, +- operands[0], operands[1], operands[2]); +- DONE; +-}) +- + (define_expand "neon_vuzp_internal" + [(parallel + [(set (match_operand:VDQW 0 "s_register_operand" "") +@@ -4214,17 +4200,6 @@ + [(set_attr "type" "neon_zip")] + ) + +-(define_expand "neon_vuzp" +- [(match_operand:SI 0 "s_register_operand" "r") +- (match_operand:VDQW 1 "s_register_operand" "w") +- (match_operand:VDQW 2 "s_register_operand" "w")] +- "TARGET_NEON" +-{ +- neon_emit_pair_result_insn (mode, gen_neon_vuzp_internal, +- operands[0], operands[1], operands[2]); +- DONE; +-}) +- + (define_expand "neon_vreinterpretv8qi" + [(match_operand:V8QI 0 "s_register_operand" "") + (match_operand:VDX 1 "s_register_operand" "")] +@@ -5357,61 +5332,6 @@ + [(set_attr "type" "neon_store4_4reg")] + ) + +-(define_expand "neon_vand" +- [(match_operand:VDQX 0 "s_register_operand" "") +- (match_operand:VDQX 1 "s_register_operand" "") +- (match_operand:VDQX 2 "neon_inv_logic_op2" "") +- (match_operand:SI 3 "immediate_operand" "")] +- "TARGET_NEON" +-{ +- emit_insn (gen_and3 (operands[0], operands[1], operands[2])); +- DONE; +-}) +- +-(define_expand "neon_vorr" +- [(match_operand:VDQX 0 "s_register_operand" "") +- (match_operand:VDQX 1 "s_register_operand" "") +- (match_operand:VDQX 2 "neon_logic_op2" "") +- (match_operand:SI 3 "immediate_operand" "")] +- "TARGET_NEON" +-{ +- emit_insn (gen_ior3 (operands[0], operands[1], operands[2])); +- DONE; +-}) +- +-(define_expand "neon_veor" +- [(match_operand:VDQX 0 "s_register_operand" "") +- (match_operand:VDQX 1 "s_register_operand" "") +- (match_operand:VDQX 2 "s_register_operand" "") +- (match_operand:SI 3 "immediate_operand" "")] +- "TARGET_NEON" +-{ +- emit_insn (gen_xor3 (operands[0], operands[1], operands[2])); +- DONE; +-}) +- +-(define_expand "neon_vbic" +- [(match_operand:VDQX 0 "s_register_operand" "") +- (match_operand:VDQX 1 "s_register_operand" "") +- (match_operand:VDQX 2 "neon_logic_op2" "") +- (match_operand:SI 3 "immediate_operand" "")] +- "TARGET_NEON" +-{ +- emit_insn (gen_bic3_neon (operands[0], operands[1], operands[2])); +- DONE; +-}) +- +-(define_expand "neon_vorn" +- [(match_operand:VDQX 0 "s_register_operand" "") +- (match_operand:VDQX 1 "s_register_operand" "") +- (match_operand:VDQX 2 "neon_inv_logic_op2" "") +- (match_operand:SI 3 "immediate_operand" "")] +- "TARGET_NEON" +-{ +- emit_insn (gen_orn3_neon (operands[0], operands[1], operands[2])); +- DONE; +-}) +- + (define_insn "neon_vec_unpack_lo_" + [(set (match_operand: 0 "register_operand" "=w") + (SE: (vec_select: +--- a/src/gcc/config/arm/arm_neon_builtins.def ++++ b/src/gcc/config/arm/arm_neon_builtins.def +@@ -18,8 +18,7 @@ + along with GCC; see the file COPYING3. If not see + . */ + +-VAR10 (BINOP, vadd, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), ++VAR2 (BINOP, vadd, v2sf, v4sf), + VAR3 (BINOP, vaddl, v8qi, v4hi, v2si), + VAR3 (BINOP, vaddw, v8qi, v4hi, v2si), + VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +@@ -54,7 +53,7 @@ + VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), + VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si), + VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +-VAR10 (BINOP, vsub, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), ++VAR2 (BINOP, vsub, v2sf, v4sf), + VAR3 (BINOP, vsubl, v8qi, v4hi, v2si), + VAR3 (BINOP, vsubw, v8qi, v4hi, v2si), + VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), +@@ -89,6 +88,7 @@ + VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si), + VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si), + VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si), ++VAR5 (BSWAP, bswap, v4hi, v8hi, v2si, v4si, v2di), + VAR2 (UNOP, vcnt, v8qi, v16qi), + VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf), + VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf), +@@ -149,9 +149,6 @@ + VAR1 (VTBX, vtbx2, v8qi), + VAR1 (VTBX, vtbx3, v8qi), + VAR1 (VTBX, vtbx4, v8qi), +-VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +-VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), +-VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), + VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di), + VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di), + VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di), +@@ -199,14 +196,4 @@ + VAR9 (STORESTRUCT, vst4, + v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), + VAR7 (STORESTRUCTLANE, vst4_lane, +- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), +-VAR10 (LOGICBINOP, vand, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +-VAR10 (LOGICBINOP, vorr, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +-VAR10 (BINOP, veor, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +-VAR10 (LOGICBINOP, vbic, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +-VAR10 (LOGICBINOP, vorn, +- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) ++ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) +--- a/src/gcc/config/arm/aarch-common-protos.h ++++ b/src/gcc/config/arm/aarch-common-protos.h +@@ -24,6 +24,9 @@ + #define GCC_AARCH_COMMON_PROTOS_H + + extern int aarch_crypto_can_dual_issue (rtx, rtx); ++extern bool aarch_rev16_p (rtx); ++extern bool aarch_rev16_shleft_mask_imm_p (rtx, enum machine_mode); ++extern bool aarch_rev16_shright_mask_imm_p (rtx, enum machine_mode); + extern int arm_early_load_addr_dep (rtx, rtx); + extern int arm_early_store_addr_dep (rtx, rtx); + extern int arm_mac_accumulator_is_mul_result (rtx, rtx); +@@ -54,6 +57,7 @@ + const int bfi; /* Bit-field insert. */ + const int bfx; /* Bit-field extraction. */ + const int clz; /* Count Leading Zeros. */ ++ const int rev; /* Reverse bits/bytes. */ + const int non_exec; /* Extra cost when not executing insn. */ + const bool non_exec_costs_exec; /* True if non-execution must add the exec + cost. */ +--- a/src/gcc/config/arm/predicates.md ++++ b/src/gcc/config/arm/predicates.md +@@ -291,6 +291,15 @@ + || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))) + (match_test "mode == GET_MODE (op)"))) + ++(define_special_predicate "shift_nomul_operator" ++ (and (ior (and (match_code "rotate") ++ (match_test "CONST_INT_P (XEXP (op, 1)) ++ && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")) ++ (and (match_code "ashift,ashiftrt,lshiftrt,rotatert") ++ (match_test "!CONST_INT_P (XEXP (op, 1)) ++ || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))) ++ (match_test "mode == GET_MODE (op)"))) ++ + ;; True for shift operators which can be used with saturation instructions. + (define_special_predicate "sat_shift_operator" + (and (ior (and (match_code "mult") +@@ -681,5 +690,6 @@ + (match_code "reg" "0"))) + + (define_predicate "call_insn_operand" +- (ior (match_code "symbol_ref") ++ (ior (and (match_code "symbol_ref") ++ (match_test "!arm_is_long_call_p (SYMBOL_REF_DECL (op))")) + (match_operand 0 "s_register_operand"))) +--- a/src/gcc/config/arm/arm_neon.h ++++ b/src/gcc/config/arm/arm_neon.h +@@ -452,114 +452,121 @@ + } poly64x2x4_t; + #endif + +- +- ++/* vadd */ + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) + vadd_s8 (int8x8_t __a, int8x8_t __b) + { +- return (int8x8_t)__builtin_neon_vaddv8qi (__a, __b, 1); ++ return __a + __b; + } + + __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) + vadd_s16 (int16x4_t __a, int16x4_t __b) + { +- return (int16x4_t)__builtin_neon_vaddv4hi (__a, __b, 1); ++ return __a + __b; + } + + __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) + vadd_s32 (int32x2_t __a, int32x2_t __b) + { +- return (int32x2_t)__builtin_neon_vaddv2si (__a, __b, 1); ++ return __a + __b; + } + + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vadd_f32 (float32x2_t __a, float32x2_t __b) + { +- return (float32x2_t)__builtin_neon_vaddv2sf (__a, __b, 3); ++#ifdef __FAST_MATH__ ++ return __a + __b; ++#else ++ return (float32x2_t) __builtin_neon_vaddv2sf (__a, __b, 3); ++#endif + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vadd_u8 (uint8x8_t __a, uint8x8_t __b) + { +- return (uint8x8_t)__builtin_neon_vaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); ++ return __a + __b; + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vadd_u16 (uint16x4_t __a, uint16x4_t __b) + { +- return (uint16x4_t)__builtin_neon_vaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); ++ return __a + __b; + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vadd_u32 (uint32x2_t __a, uint32x2_t __b) + { +- return (uint32x2_t)__builtin_neon_vaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0); ++ return __a + __b; + } + + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) + vadd_s64 (int64x1_t __a, int64x1_t __b) + { +- return (int64x1_t)__builtin_neon_vadddi (__a, __b, 1); ++ return __a + __b; + } -+ case NOT: -+ /* MVN. */ -+ if (speed) -+ *cost += extra_cost->alu.logical; -+ -+ /* The logical instruction could have the shifted register form, -+ but the cost is the same if the shift is processed as a separate -+ instruction, so we don't bother with it here. */ -+ return false; -+ - case ZERO_EXTEND: -- if ((GET_MODE (x) == DImode -- && GET_MODE (XEXP (x, 0)) == SImode) -- || GET_CODE (XEXP (x, 0)) == MEM) -+ -+ op0 = XEXP (x, 0); -+ /* If a value is written in SI mode, then zero extended to DI -+ mode, the operation will in general be free as a write to -+ a 'w' register implicitly zeroes the upper bits of an 'x' -+ register. However, if this is -+ -+ (set (reg) (zero_extend (reg))) -+ -+ we must cost the explicit register move. */ -+ if (mode == DImode -+ && GET_MODE (op0) == SImode -+ && outer == SET) - { -- *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed); -+ int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed); -+ -+ if (!op_cost && speed) -+ /* MOV. */ -+ *cost += extra_cost->alu.extend; -+ else -+ /* Free, the cost is that of the SI mode operation. */ -+ *cost = op_cost; -+ - return true; - } -+ else if (MEM_P (XEXP (x, 0))) -+ { -+ /* All loads can zero extend to any size for free. */ -+ *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed); -+ return true; -+ } -+ -+ /* UXTB/UXTH. */ -+ if (speed) -+ *cost += extra_cost->alu.extend; -+ - return false; + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vadd_u64 (uint64x1_t __a, uint64x1_t __b) + { +- return (uint64x1_t)__builtin_neon_vadddi ((int64x1_t) __a, (int64x1_t) __b, 0); ++ return __a + __b; + } - case SIGN_EXTEND: -- if (GET_CODE (XEXP (x, 0)) == MEM) -+ if (MEM_P (XEXP (x, 0))) - { -- *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed); -+ /* LDRSH. */ -+ if (speed) -+ { -+ rtx address = XEXP (XEXP (x, 0), 0); -+ *cost += extra_cost->ldst.load_sign_extend; -+ -+ *cost += -+ COSTS_N_INSNS (aarch64_address_cost (address, mode, -+ 0, speed)); -+ } - return true; - } -+ -+ if (speed) -+ *cost += extra_cost->alu.extend; - return false; + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) + vaddq_s8 (int8x16_t __a, int8x16_t __b) + { +- return (int8x16_t)__builtin_neon_vaddv16qi (__a, __b, 1); ++ return __a + __b; + } -+ case ASHIFT: -+ op0 = XEXP (x, 0); -+ op1 = XEXP (x, 1); -+ -+ if (CONST_INT_P (op1)) -+ { -+ /* LSL (immediate), UBMF, UBFIZ and friends. These are all -+ aliases. */ -+ if (speed) -+ *cost += extra_cost->alu.shift; -+ -+ /* We can incorporate zero/sign extend for free. */ -+ if (GET_CODE (op0) == ZERO_EXTEND -+ || GET_CODE (op0) == SIGN_EXTEND) -+ op0 = XEXP (op0, 0); -+ -+ *cost += rtx_cost (op0, ASHIFT, 0, speed); -+ return true; -+ } -+ else -+ { -+ /* LSLV. */ -+ if (speed) -+ *cost += extra_cost->alu.shift_reg; -+ -+ return false; /* All arguments need to be in registers. */ -+ } -+ - case ROTATE: -- if (!CONST_INT_P (XEXP (x, 1))) -- *cost += COSTS_N_INSNS (2); -- /* Fall through. */ - case ROTATERT: - case LSHIFTRT: -- case ASHIFT: - case ASHIFTRT: -+ op0 = XEXP (x, 0); -+ op1 = XEXP (x, 1); + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) + vaddq_s16 (int16x8_t __a, int16x8_t __b) + { +- return (int16x8_t)__builtin_neon_vaddv8hi (__a, __b, 1); ++ return __a + __b; + } -- /* Shifting by a register often takes an extra cycle. */ -- if (speed && !CONST_INT_P (XEXP (x, 1))) -- *cost += extra_cost->alu.arith_shift_reg; -+ if (CONST_INT_P (op1)) -+ { -+ /* ASR (immediate) and friends. */ -+ if (speed) -+ *cost += extra_cost->alu.shift; + __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) + vaddq_s32 (int32x4_t __a, int32x4_t __b) + { +- return (int32x4_t)__builtin_neon_vaddv4si (__a, __b, 1); ++ return __a + __b; + } -- *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed); -+ *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed); -+ return true; -+ } -+ else -+ { -+ -+ /* ASR (register) and friends. */ -+ if (speed) -+ *cost += extra_cost->alu.shift_reg; -+ -+ return false; /* All arguments need to be in registers. */ -+ } -+ -+ case SYMBOL_REF: -+ -+ if (aarch64_cmodel == AARCH64_CMODEL_LARGE) -+ { -+ /* LDR. */ -+ if (speed) -+ *cost += extra_cost->ldst.load; -+ } -+ else if (aarch64_cmodel == AARCH64_CMODEL_SMALL -+ || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC) -+ { -+ /* ADRP, followed by ADD. */ -+ *cost += COSTS_N_INSNS (1); -+ if (speed) -+ *cost += 2 * extra_cost->alu.arith; -+ } -+ else if (aarch64_cmodel == AARCH64_CMODEL_TINY -+ || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC) -+ { -+ /* ADR. */ -+ if (speed) -+ *cost += extra_cost->alu.arith; -+ } + __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) + vaddq_s64 (int64x2_t __a, int64x2_t __b) + { +- return (int64x2_t)__builtin_neon_vaddv2di (__a, __b, 1); ++ return __a + __b; + } + + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vaddq_f32 (float32x4_t __a, float32x4_t __b) + { +- return (float32x4_t)__builtin_neon_vaddv4sf (__a, __b, 3); ++#ifdef __FAST_MATH ++ return __a + __b; ++#else ++ return (float32x4_t) __builtin_neon_vaddv4sf (__a, __b, 3); ++#endif + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vaddq_u8 (uint8x16_t __a, uint8x16_t __b) + { +- return (uint8x16_t)__builtin_neon_vaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); ++ return __a + __b; + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vaddq_u16 (uint16x8_t __a, uint16x8_t __b) + { +- return (uint16x8_t)__builtin_neon_vaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); ++ return __a + __b; + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vaddq_u32 (uint32x4_t __a, uint32x4_t __b) + { +- return (uint32x4_t)__builtin_neon_vaddv4si ((int32x4_t) __a, (int32x4_t) __b, 0); ++ return __a + __b; + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vaddq_u64 (uint64x2_t __a, uint64x2_t __b) + { +- return (uint64x2_t)__builtin_neon_vaddv2di ((int64x2_t) __a, (int64x2_t) __b, 0); ++ return __a + __b; + } + + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +@@ -949,93 +956,102 @@ + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) + vmul_s8 (int8x8_t __a, int8x8_t __b) + { +- return (int8x8_t)__builtin_neon_vmulv8qi (__a, __b, 1); ++ return __a * __b; + } + + __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) + vmul_s16 (int16x4_t __a, int16x4_t __b) + { +- return (int16x4_t)__builtin_neon_vmulv4hi (__a, __b, 1); ++ return __a * __b; + } + + __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) + vmul_s32 (int32x2_t __a, int32x2_t __b) + { +- return (int32x2_t)__builtin_neon_vmulv2si (__a, __b, 1); ++ return __a * __b; + } + + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vmul_f32 (float32x2_t __a, float32x2_t __b) + { +- return (float32x2_t)__builtin_neon_vmulv2sf (__a, __b, 3); ++#ifdef __FAST_MATH ++ return __a * __b; ++#else ++ return (float32x2_t) __builtin_neon_vmulv2sf (__a, __b, 3); ++#endif + -+ if (flag_pic) -+ { -+ /* One extra load instruction, after accessing the GOT. */ -+ *cost += COSTS_N_INSNS (1); -+ if (speed) -+ *cost += extra_cost->ldst.load; -+ } - return true; + } - case HIGH: -- if (!CONSTANT_P (XEXP (x, 0))) -- *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed); -- return true; + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vmul_u8 (uint8x8_t __a, uint8x8_t __b) + { +- return (uint8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); ++ return __a * __b; + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vmul_u16 (uint16x4_t __a, uint16x4_t __b) + { +- return (uint16x4_t)__builtin_neon_vmulv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); ++ return __a * __b; + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vmul_u32 (uint32x2_t __a, uint32x2_t __b) + { +- return (uint32x2_t)__builtin_neon_vmulv2si ((int32x2_t) __a, (int32x2_t) __b, 0); ++ return __a * __b; + } + +-__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +-vmul_p8 (poly8x8_t __a, poly8x8_t __b) +-{ +- return (poly8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 2); +-} - - case LO_SUM: -- if (!CONSTANT_P (XEXP (x, 1))) -- *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed); -- *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed); -+ /* ADRP/ADD (immediate). */ -+ if (speed) -+ *cost += extra_cost->alu.arith; - return true; + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) + vmulq_s8 (int8x16_t __a, int8x16_t __b) + { +- return (int8x16_t)__builtin_neon_vmulv16qi (__a, __b, 1); ++ return __a * __b; + } + + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) + vmulq_s16 (int16x8_t __a, int16x8_t __b) + { +- return (int16x8_t)__builtin_neon_vmulv8hi (__a, __b, 1); ++ return __a * __b; + } + + __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) + vmulq_s32 (int32x4_t __a, int32x4_t __b) + { +- return (int32x4_t)__builtin_neon_vmulv4si (__a, __b, 1); ++ return __a * __b; + } - case ZERO_EXTRACT: - case SIGN_EXTRACT: -- *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed); -+ /* UBFX/SBFX. */ -+ if (speed) -+ *cost += extra_cost->alu.bfx; -+ -+ /* We can trust that the immediates used will be correct (there -+ are no by-register forms), so we need only cost op0. */ -+ *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed); - return true; + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vmulq_f32 (float32x4_t __a, float32x4_t __b) + { +- return (float32x4_t)__builtin_neon_vmulv4sf (__a, __b, 3); ++#ifdef __FAST_MATH ++ return __a * __b; ++#else ++ return (float32x4_t) __builtin_neon_vmulv4sf (__a, __b, 3); ++#endif + } - case MULT: -- op0 = XEXP (x, 0); -- op1 = XEXP (x, 1); -+ *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed); -+ /* aarch64_rtx_mult_cost always handles recursion to its -+ operands. */ -+ return true; + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vmulq_u8 (uint8x16_t __a, uint8x16_t __b) + { +- return (uint8x16_t)__builtin_neon_vmulv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); ++ return __a * __b; + } -- *cost = COSTS_N_INSNS (1); -- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) -- { -- if (CONST_INT_P (op1) -- && exact_log2 (INTVAL (op1)) > 0) -- { -- *cost += rtx_cost (op0, ASHIFT, 0, speed); -- return true; -- } -- -- if ((GET_CODE (op0) == ZERO_EXTEND -- && GET_CODE (op1) == ZERO_EXTEND) -- || (GET_CODE (op0) == SIGN_EXTEND -- && GET_CODE (op1) == SIGN_EXTEND)) -- { -- *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed) -- + rtx_cost (XEXP (op1, 0), MULT, 1, speed)); -- if (speed) -- *cost += extra_cost->mult[GET_MODE (x) == DImode].extend; -- return true; -- } -- -- if (speed) -- *cost += extra_cost->mult[GET_MODE (x) == DImode].simple; -- } -- else if (speed) -- { -- if (GET_MODE (x) == DFmode) -- *cost += extra_cost->fp[1].mult; -- else if (GET_MODE (x) == SFmode) -- *cost += extra_cost->fp[0].mult; -- } -- -- return false; /* All arguments need to be in registers. */ -- - case MOD: - case UMOD: -- *cost = COSTS_N_INSNS (2); - if (speed) - { - if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) -@@ -4800,53 +5636,179 @@ + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vmulq_u16 (uint16x8_t __a, uint16x8_t __b) + { +- return (uint16x8_t)__builtin_neon_vmulv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); ++ return __a * __b; + } - case DIV: - case UDIV: -- *cost = COSTS_N_INSNS (1); -+ case SQRT: - if (speed) - { -- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) -- *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv; -- else if (GET_MODE (x) == DFmode) -- *cost += extra_cost->fp[1].div; -- else if (GET_MODE (x) == SFmode) -- *cost += extra_cost->fp[0].div; -+ if (GET_MODE_CLASS (mode) == MODE_INT) -+ /* There is no integer SQRT, so only DIV and UDIV can get -+ here. */ -+ *cost += extra_cost->mult[mode == DImode].idiv; -+ else -+ *cost += extra_cost->fp[mode == DFmode].div; - } - return false; /* All arguments need to be in registers. */ + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vmulq_u32 (uint32x4_t __a, uint32x4_t __b) + { +- return (uint32x4_t)__builtin_neon_vmulv4si ((int32x4_t) __a, (int32x4_t) __b, 0); ++ return __a * __b; + } -+ case IF_THEN_ELSE: -+ return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1), -+ XEXP (x, 2), cost, speed); -+ -+ case EQ: -+ case NE: -+ case GT: -+ case GTU: -+ case LT: -+ case LTU: -+ case GE: -+ case GEU: -+ case LE: -+ case LEU: -+ -+ return false; /* All arguments must be in registers. */ -+ -+ case FMA: -+ op0 = XEXP (x, 0); -+ op1 = XEXP (x, 1); -+ op2 = XEXP (x, 2); -+ -+ if (speed) -+ *cost += extra_cost->fp[mode == DFmode].fma; -+ -+ /* FMSUB, FNMADD, and FNMSUB are free. */ -+ if (GET_CODE (op0) == NEG) -+ op0 = XEXP (op0, 0); -+ -+ if (GET_CODE (op2) == NEG) -+ op2 = XEXP (op2, 0); -+ -+ /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1, -+ and the by-element operand as operand 0. */ -+ if (GET_CODE (op1) == NEG) -+ op1 = XEXP (op1, 0); -+ -+ /* Catch vector-by-element operations. The by-element operand can -+ either be (vec_duplicate (vec_select (x))) or just -+ (vec_select (x)), depending on whether we are multiplying by -+ a vector or a scalar. -+ -+ Canonicalization is not very good in these cases, FMA4 will put the -+ by-element operand as operand 0, FNMA4 will have it as operand 1. */ -+ if (GET_CODE (op0) == VEC_DUPLICATE) -+ op0 = XEXP (op0, 0); -+ else if (GET_CODE (op1) == VEC_DUPLICATE) -+ op1 = XEXP (op1, 0); -+ -+ if (GET_CODE (op0) == VEC_SELECT) -+ op0 = XEXP (op0, 0); -+ else if (GET_CODE (op1) == VEC_SELECT) -+ op1 = XEXP (op1, 0); -+ -+ /* If the remaining parameters are not registers, -+ get the cost to put them into registers. */ -+ *cost += rtx_cost (op0, FMA, 0, speed); -+ *cost += rtx_cost (op1, FMA, 1, speed); -+ *cost += rtx_cost (op2, FMA, 2, speed); -+ return true; -+ -+ case FLOAT_EXTEND: -+ if (speed) -+ *cost += extra_cost->fp[mode == DFmode].widen; -+ return false; -+ -+ case FLOAT_TRUNCATE: -+ if (speed) -+ *cost += extra_cost->fp[mode == DFmode].narrow; -+ return false; -+ -+ case ABS: -+ if (GET_MODE_CLASS (mode) == MODE_FLOAT) -+ { -+ /* FABS and FNEG are analogous. */ -+ if (speed) -+ *cost += extra_cost->fp[mode == DFmode].neg; -+ } -+ else -+ { -+ /* Integer ABS will either be split to -+ two arithmetic instructions, or will be an ABS -+ (scalar), which we don't model. */ -+ *cost = COSTS_N_INSNS (2); -+ if (speed) -+ *cost += 2 * extra_cost->alu.arith; -+ } -+ return false; -+ -+ case SMAX: -+ case SMIN: -+ if (speed) -+ { -+ /* FMAXNM/FMINNM/FMAX/FMIN. -+ TODO: This may not be accurate for all implementations, but -+ we do not model this in the cost tables. */ -+ *cost += extra_cost->fp[mode == DFmode].addsub; -+ } -+ return false; -+ -+ case TRUNCATE: -+ -+ /* Decompose muldi3_highpart. */ -+ if (/* (truncate:DI */ -+ mode == DImode -+ /* (lshiftrt:TI */ -+ && GET_MODE (XEXP (x, 0)) == TImode -+ && GET_CODE (XEXP (x, 0)) == LSHIFTRT -+ /* (mult:TI */ -+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT -+ /* (ANY_EXTEND:TI (reg:DI)) -+ (ANY_EXTEND:TI (reg:DI))) */ -+ && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND -+ && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND) -+ || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND -+ && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)) -+ && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode -+ && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode -+ /* (const_int 64) */ -+ && CONST_INT_P (XEXP (XEXP (x, 0), 1)) -+ && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64) -+ { -+ /* UMULH/SMULH. */ -+ if (speed) -+ *cost += extra_cost->mult[mode == DImode].extend; -+ *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0), -+ MULT, 0, speed); -+ *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0), -+ MULT, 1, speed); -+ return true; -+ } ++__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) ++vmul_p8 (poly8x8_t __a, poly8x8_t __b) ++{ ++ return (poly8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 2); ++} + -+ /* Fall through. */ - default: -- break; -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, -+ "\nFailed to cost RTX. Assuming default cost.\n"); + __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) + vmulq_p8 (poly8x16_t __a, poly8x16_t __b) + { +@@ -1520,112 +1536,121 @@ + } + + #endif + -+ return true; - } - return false; + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) + vsub_s8 (int8x8_t __a, int8x8_t __b) + { +- return (int8x8_t)__builtin_neon_vsubv8qi (__a, __b, 1); ++ return __a - __b; + } + + __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) + vsub_s16 (int16x4_t __a, int16x4_t __b) + { +- return (int16x4_t)__builtin_neon_vsubv4hi (__a, __b, 1); ++ return __a - __b; + } + + __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) + vsub_s32 (int32x2_t __a, int32x2_t __b) + { +- return (int32x2_t)__builtin_neon_vsubv2si (__a, __b, 1); ++ return __a - __b; + } + + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) + vsub_f32 (float32x2_t __a, float32x2_t __b) + { +- return (float32x2_t)__builtin_neon_vsubv2sf (__a, __b, 3); ++#ifdef __FAST_MATH ++ return __a - __b; ++#else ++ return (float32x2_t) __builtin_neon_vsubv2sf (__a, __b, 3); ++#endif } --static int --aarch64_address_cost (rtx x ATTRIBUTE_UNUSED, -- enum machine_mode mode ATTRIBUTE_UNUSED, -- addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED) -+/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost -+ calculated for X. This cost is stored in *COST. Returns true -+ if the total cost of X was calculated. */ -+static bool -+aarch64_rtx_costs_wrapper (rtx x, int code, int outer, -+ int param, int *cost, bool speed) + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vsub_u8 (uint8x8_t __a, uint8x8_t __b) { -- enum rtx_code c = GET_CODE (x); -- const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost; -+ bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed); +- return (uint8x8_t)__builtin_neon_vsubv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); ++ return __a - __b; + } -- if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY) -- return addr_cost->pre_modify; -- -- if (c == POST_INC || c == POST_DEC || c == POST_MODIFY) -- return addr_cost->post_modify; -- -- if (c == PLUS) -+ if (dump_file && (dump_flags & TDF_DETAILS)) - { -- if (GET_CODE (XEXP (x, 1)) == CONST_INT) -- return addr_cost->imm_offset; -- else if (GET_CODE (XEXP (x, 0)) == MULT -- || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND -- || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND) -- return addr_cost->register_extend; -- -- return addr_cost->register_offset; -+ print_rtl_single (dump_file, x); -+ fprintf (dump_file, "\n%s cost: %d (%s)\n", -+ speed ? "Hot" : "Cold", -+ *cost, result ? "final" : "partial"); - } -- else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF) -- return addr_cost->imm_offset; + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vsub_u16 (uint16x4_t __a, uint16x4_t __b) + { +- return (uint16x4_t)__builtin_neon_vsubv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); ++ return __a - __b; + } -- return 0; -+ return result; + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vsub_u32 (uint32x2_t __a, uint32x2_t __b) + { +- return (uint32x2_t)__builtin_neon_vsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0); ++ return __a - __b; } - static int -@@ -8093,11 +9055,6 @@ - enum machine_mode vmode = d->vmode; - unsigned int i, nelt = d->nelt; + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) + vsub_s64 (int64x1_t __a, int64x1_t __b) + { +- return (int64x1_t)__builtin_neon_vsubdi (__a, __b, 1); ++ return __a - __b; + } -- /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's -- numbering of elements for big-endian, we must reverse the order. */ -- if (BYTES_BIG_ENDIAN) -- return false; -- - if (d->testing_p) - return true; + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vsub_u64 (uint64x1_t __a, uint64x1_t __b) + { +- return (uint64x1_t)__builtin_neon_vsubdi ((int64x1_t) __a, (int64x1_t) __b, 0); ++ return __a - __b; + } -@@ -8108,7 +9065,15 @@ - return false; + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) + vsubq_s8 (int8x16_t __a, int8x16_t __b) + { +- return (int8x16_t)__builtin_neon_vsubv16qi (__a, __b, 1); ++ return __a - __b; + } - for (i = 0; i < nelt; ++i) -- rperm[i] = GEN_INT (d->perm[i]); -+ { -+ int nunits = GET_MODE_NUNITS (vmode); -+ -+ /* If big-endian and two vectors we end up with a weird mixed-endian -+ mode on NEON. Reverse the index within each word but not the word -+ itself. */ -+ rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1) -+ : d->perm[i]); -+ } - sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm)); - sel = force_reg (vmode, sel); + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) + vsubq_s16 (int16x8_t __a, int16x8_t __b) + { +- return (int16x8_t)__builtin_neon_vsubv8hi (__a, __b, 1); ++ return __a - __b; + } -@@ -8263,7 +9228,8 @@ - /* Limited combinations of subregs are safe on FPREGs. Particularly, - 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed. - 2. Scalar to Scalar for integer modes or same size float modes. -- 3. Vector to Vector modes. */ -+ 3. Vector to Vector modes. -+ 4. On little-endian only, Vector-Structure to Vector modes. */ - if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to)) - { - if (aarch64_vector_mode_supported_p (from) -@@ -8279,11 +9245,41 @@ - if (aarch64_vector_mode_supported_p (from) - && aarch64_vector_mode_supported_p (to)) - return false; -+ -+ /* Within an vector structure straddling multiple vector registers -+ we are in a mixed-endian representation. As such, we can't -+ easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can -+ switch between vectors and vector structures cheaply. */ -+ if (!BYTES_BIG_ENDIAN) -+ if ((aarch64_vector_mode_supported_p (from) -+ && aarch64_vect_struct_mode_p (to)) -+ || (aarch64_vector_mode_supported_p (to) -+ && aarch64_vect_struct_mode_p (from))) -+ return false; - } + __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) + vsubq_s32 (int32x4_t __a, int32x4_t __b) + { +- return (int32x4_t)__builtin_neon_vsubv4si (__a, __b, 1); ++ return __a - __b; + } - return true; + __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) + vsubq_s64 (int64x2_t __a, int64x2_t __b) + { +- return (int64x2_t)__builtin_neon_vsubv2di (__a, __b, 1); ++ return __a - __b; } -+/* Implement MODES_TIEABLE_P. */ -+ -+bool -+aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) -+{ -+ if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)) -+ return true; -+ -+ /* We specifically want to allow elements of "structure" modes to -+ be tieable to the structure. This more general condition allows -+ other rarer situations too. */ -+ if (TARGET_SIMD -+ && aarch64_vector_mode_p (mode1) -+ && aarch64_vector_mode_p (mode2)) -+ return true; -+ -+ return false; -+} -+ - #undef TARGET_ADDRESS_COST - #define TARGET_ADDRESS_COST aarch64_address_cost + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) + vsubq_f32 (float32x4_t __a, float32x4_t __b) + { +- return (float32x4_t)__builtin_neon_vsubv4sf (__a, __b, 3); ++#ifdef __FAST_MATH ++ return __a - __b; ++#else ++ return (float32x4_t) __builtin_neon_vsubv4sf (__a, __b, 3); ++#endif + } -@@ -8454,7 +9450,7 @@ - #define TARGET_RETURN_IN_MSB aarch64_return_in_msb + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vsubq_u8 (uint8x16_t __a, uint8x16_t __b) + { +- return (uint8x16_t)__builtin_neon_vsubv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); ++ return __a - __b; + } - #undef TARGET_RTX_COSTS --#define TARGET_RTX_COSTS aarch64_rtx_costs -+#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vsubq_u16 (uint16x8_t __a, uint16x8_t __b) + { +- return (uint16x8_t)__builtin_neon_vsubv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); ++ return __a - __b; + } - #undef TARGET_SCHED_ISSUE_RATE - #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate ---- a/src/gcc/config/aarch64/iterators.md -+++ b/src/gcc/config/aarch64/iterators.md -@@ -150,6 +150,9 @@ - ;; Vector modes for H and S types. - (define_mode_iterator VDQHS [V4HI V8HI V2SI V4SI]) + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vsubq_u32 (uint32x4_t __a, uint32x4_t __b) + { +- return (uint32x4_t)__builtin_neon_vsubv4si ((int32x4_t) __a, (int32x4_t) __b, 0); ++ return __a - __b; + } -+;; Vector modes for H, S and D types. -+(define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI]) -+ - ;; Vector modes for Q, H and S types. - (define_mode_iterator VDQQHS [V8QI V16QI V4HI V8HI V2SI V4SI]) + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vsubq_u64 (uint64x2_t __a, uint64x2_t __b) + { +- return (uint64x2_t)__builtin_neon_vsubv2di ((int64x2_t) __a, (int64x2_t) __b, 0); ++ return __a - __b; + } -@@ -352,6 +355,9 @@ - (V2DI "2d") (V2SF "2s") - (V4SF "4s") (V2DF "2d")]) + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +@@ -11295,484 +11320,483 @@ + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) + vand_s8 (int8x8_t __a, int8x8_t __b) + { +- return (int8x8_t)__builtin_neon_vandv8qi (__a, __b, 1); ++ return __a & __b; + } -+(define_mode_attr Vrevsuff [(V4HI "16") (V8HI "16") (V2SI "32") -+ (V4SI "32") (V2DI "64")]) -+ - (define_mode_attr Vmtype [(V8QI ".8b") (V16QI ".16b") - (V4HI ".4h") (V8HI ".8h") - (V2SI ".2s") (V4SI ".4s") -@@ -554,6 +560,32 @@ + __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) + vand_s16 (int16x4_t __a, int16x4_t __b) + { +- return (int16x4_t)__builtin_neon_vandv4hi (__a, __b, 1); ++ return __a & __b; + } - (define_mode_attr VSTRUCT_DREG [(OI "TI") (CI "EI") (XI "OI")]) + __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) + vand_s32 (int32x2_t __a, int32x2_t __b) + { +- return (int32x2_t)__builtin_neon_vandv2si (__a, __b, 1); ++ return __a & __b; + } -+;; Mode of pair of elements for each vector mode, to define transfer -+;; size for structure lane/dup loads and stores. -+(define_mode_attr V_TWO_ELEM [(V8QI "HI") (V16QI "HI") -+ (V4HI "SI") (V8HI "SI") -+ (V2SI "V2SI") (V4SI "V2SI") -+ (DI "V2DI") (V2DI "V2DI") -+ (V2SF "V2SF") (V4SF "V2SF") -+ (DF "V2DI") (V2DF "V2DI")]) -+ -+;; Similar, for three elements. -+(define_mode_attr V_THREE_ELEM [(V8QI "BLK") (V16QI "BLK") -+ (V4HI "BLK") (V8HI "BLK") -+ (V2SI "BLK") (V4SI "BLK") -+ (DI "EI") (V2DI "EI") -+ (V2SF "BLK") (V4SF "BLK") -+ (DF "EI") (V2DF "EI")]) -+ -+;; Similar, for four elements. -+(define_mode_attr V_FOUR_ELEM [(V8QI "SI") (V16QI "SI") -+ (V4HI "V4HI") (V8HI "V4HI") -+ (V2SI "V4SI") (V4SI "V4SI") -+ (DI "OI") (V2DI "OI") -+ (V2SF "V4SF") (V4SF "V4SF") -+ (DF "OI") (V2DF "OI")]) -+ -+ - ;; Mode for atomic operation suffixes - (define_mode_attr atomic_sfx - [(QI "b") (HI "h") (SI "") (DI "")]) ---- a/src/gcc/config/aarch64/aarch64.h -+++ b/src/gcc/config/aarch64/aarch64.h -@@ -365,8 +365,7 @@ + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vand_u8 (uint8x8_t __a, uint8x8_t __b) + { +- return (uint8x8_t)__builtin_neon_vandv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); ++ return __a & __b; + } - #define HARD_REGNO_MODE_OK(REGNO, MODE) aarch64_hard_regno_mode_ok (REGNO, MODE) + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vand_u16 (uint16x4_t __a, uint16x4_t __b) + { +- return (uint16x4_t)__builtin_neon_vandv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); ++ return __a & __b; + } --#define MODES_TIEABLE_P(MODE1, MODE2) \ -- (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2)) -+#define MODES_TIEABLE_P(MODE1, MODE2) aarch64_modes_tieable_p (MODE1, MODE2) + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vand_u32 (uint32x2_t __a, uint32x2_t __b) + { +- return (uint32x2_t)__builtin_neon_vandv2si ((int32x2_t) __a, (int32x2_t) __b, 0); ++ return __a & __b; + } - #define DWARF2_UNWIND_INFO 1 + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) + vand_s64 (int64x1_t __a, int64x1_t __b) + { +- return (int64x1_t)__builtin_neon_vanddi (__a, __b, 1); ++ return __a & __b; + } -@@ -520,7 +519,6 @@ - been saved. */ - HOST_WIDE_INT padding0; - HOST_WIDE_INT hardfp_offset; /* HARD_FRAME_POINTER_REGNUM */ -- HOST_WIDE_INT fp_lr_offset; /* Space needed for saving fp and/or lr */ + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vand_u64 (uint64x1_t __a, uint64x1_t __b) + { +- return (uint64x1_t)__builtin_neon_vanddi ((int64x1_t) __a, (int64x1_t) __b, 0); ++ return __a & __b; + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) + vandq_s8 (int8x16_t __a, int8x16_t __b) + { +- return (int8x16_t)__builtin_neon_vandv16qi (__a, __b, 1); ++ return __a & __b; + } + + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) + vandq_s16 (int16x8_t __a, int16x8_t __b) + { +- return (int16x8_t)__builtin_neon_vandv8hi (__a, __b, 1); ++ return __a & __b; + } + + __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) + vandq_s32 (int32x4_t __a, int32x4_t __b) + { +- return (int32x4_t)__builtin_neon_vandv4si (__a, __b, 1); ++ return __a & __b; + } - bool laid_out; - }; ---- a/src/gcc/config/arm/aarch-cost-tables.h -+++ b/src/gcc/config/arm/aarch-cost-tables.h -@@ -39,6 +39,7 @@ - 0, /* bfi. */ - 0, /* bfx. */ - 0, /* clz. */ -+ 0, /* rev. */ - COSTS_N_INSNS (1), /* non_exec. */ - false /* non_exec_costs_exec. */ - }, -@@ -139,6 +140,7 @@ - COSTS_N_INSNS (1), /* bfi. */ - COSTS_N_INSNS (1), /* bfx. */ - 0, /* clz. */ -+ 0, /* rev. */ - 0, /* non_exec. */ - true /* non_exec_costs_exec. */ - }, -@@ -239,6 +241,7 @@ - COSTS_N_INSNS (1), /* bfi. */ - 0, /* bfx. */ - 0, /* clz. */ -+ 0, /* rev. */ - 0, /* non_exec. */ - true /* non_exec_costs_exec. */ - }, ---- a/src/gcc/config/arm/thumb2.md -+++ b/src/gcc/config/arm/thumb2.md -@@ -1370,6 +1370,103 @@ - (set_attr "type" "alu_reg")] - ) + __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) + vandq_s64 (int64x2_t __a, int64x2_t __b) + { +- return (int64x2_t)__builtin_neon_vandv2di (__a, __b, 1); ++ return __a & __b; + } -+; Constants for op 2 will never be given to these patterns. -+(define_insn_and_split "*iordi_notdi_di" -+ [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") -+ (ior:DI (not:DI (match_operand:DI 1 "s_register_operand" "0,r")) -+ (match_operand:DI 2 "s_register_operand" "r,0")))] -+ "TARGET_THUMB2" -+ "#" -+ "TARGET_THUMB2 && reload_completed" -+ [(set (match_dup 0) (ior:SI (not:SI (match_dup 1)) (match_dup 2))) -+ (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))] -+ " -+ { -+ operands[3] = gen_highpart (SImode, operands[0]); -+ operands[0] = gen_lowpart (SImode, operands[0]); -+ operands[4] = gen_highpart (SImode, operands[1]); -+ operands[1] = gen_lowpart (SImode, operands[1]); -+ operands[5] = gen_highpart (SImode, operands[2]); -+ operands[2] = gen_lowpart (SImode, operands[2]); -+ }" -+ [(set_attr "length" "8") -+ (set_attr "predicable" "yes") -+ (set_attr "predicable_short_it" "no") -+ (set_attr "type" "multiple")] -+) -+ -+(define_insn_and_split "*iordi_notzesidi_di" -+ [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") -+ (ior:DI (not:DI (zero_extend:DI -+ (match_operand:SI 2 "s_register_operand" "r,r"))) -+ (match_operand:DI 1 "s_register_operand" "0,?r")))] -+ "TARGET_THUMB2" -+ "#" -+ ; (not (zero_extend...)) means operand0 will always be 0xffffffff -+ "TARGET_THUMB2 && reload_completed" -+ [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1))) -+ (set (match_dup 3) (const_int -1))] -+ " -+ { -+ operands[3] = gen_highpart (SImode, operands[0]); -+ operands[0] = gen_lowpart (SImode, operands[0]); -+ operands[1] = gen_lowpart (SImode, operands[1]); -+ }" -+ [(set_attr "length" "4,8") -+ (set_attr "predicable" "yes") -+ (set_attr "predicable_short_it" "no") -+ (set_attr "type" "multiple")] -+) -+ -+(define_insn_and_split "*iordi_notdi_zesidi" -+ [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") -+ (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "0,?r")) -+ (zero_extend:DI -+ (match_operand:SI 1 "s_register_operand" "r,r"))))] -+ "TARGET_THUMB2" -+ "#" -+ "TARGET_THUMB2 && reload_completed" -+ [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1))) -+ (set (match_dup 3) (not:SI (match_dup 4)))] -+ " -+ { -+ operands[3] = gen_highpart (SImode, operands[0]); -+ operands[0] = gen_lowpart (SImode, operands[0]); -+ operands[1] = gen_lowpart (SImode, operands[1]); -+ operands[4] = gen_highpart (SImode, operands[2]); -+ operands[2] = gen_lowpart (SImode, operands[2]); -+ }" -+ [(set_attr "length" "8") -+ (set_attr "predicable" "yes") -+ (set_attr "predicable_short_it" "no") -+ (set_attr "type" "multiple")] -+) -+ -+(define_insn_and_split "*iordi_notsesidi_di" -+ [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") -+ (ior:DI (not:DI (sign_extend:DI -+ (match_operand:SI 2 "s_register_operand" "r,r"))) -+ (match_operand:DI 1 "s_register_operand" "0,r")))] -+ "TARGET_THUMB2" -+ "#" -+ "TARGET_THUMB2 && reload_completed" -+ [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1))) -+ (set (match_dup 3) (ior:SI (not:SI -+ (ashiftrt:SI (match_dup 2) (const_int 31))) -+ (match_dup 4)))] -+ " -+ { -+ operands[3] = gen_highpart (SImode, operands[0]); -+ operands[0] = gen_lowpart (SImode, operands[0]); -+ operands[4] = gen_highpart (SImode, operands[1]); -+ operands[1] = gen_lowpart (SImode, operands[1]); -+ }" -+ [(set_attr "length" "8") -+ (set_attr "predicable" "yes") -+ (set_attr "predicable_short_it" "no") -+ (set_attr "type" "multiple")] -+) -+ - (define_insn "*orsi_notsi_si" - [(set (match_operand:SI 0 "s_register_operand" "=r") - (ior:SI (not:SI (match_operand:SI 2 "s_register_operand" "r")) ---- a/src/gcc/config/arm/arm.c -+++ b/src/gcc/config/arm/arm.c -@@ -986,6 +986,7 @@ - COSTS_N_INSNS (1), /* bfi. */ - COSTS_N_INSNS (1), /* bfx. */ - 0, /* clz. */ -+ 0, /* rev. */ - 0, /* non_exec. */ - true /* non_exec_costs_exec. */ - }, -@@ -1069,7 +1070,109 @@ - } - }; + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vandq_u8 (uint8x16_t __a, uint8x16_t __b) + { +- return (uint8x16_t)__builtin_neon_vandv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); ++ return __a & __b; + } -+const struct cpu_cost_table cortexa8_extra_costs = -+{ -+ /* ALU */ -+ { -+ 0, /* arith. */ -+ 0, /* logical. */ -+ COSTS_N_INSNS (1), /* shift. */ -+ 0, /* shift_reg. */ -+ COSTS_N_INSNS (1), /* arith_shift. */ -+ 0, /* arith_shift_reg. */ -+ COSTS_N_INSNS (1), /* log_shift. */ -+ 0, /* log_shift_reg. */ -+ 0, /* extend. */ -+ 0, /* extend_arith. */ -+ 0, /* bfi. */ -+ 0, /* bfx. */ -+ 0, /* clz. */ -+ 0, /* rev. */ -+ 0, /* non_exec. */ -+ true /* non_exec_costs_exec. */ -+ }, -+ { -+ /* MULT SImode */ -+ { -+ COSTS_N_INSNS (1), /* simple. */ -+ COSTS_N_INSNS (1), /* flag_setting. */ -+ COSTS_N_INSNS (1), /* extend. */ -+ COSTS_N_INSNS (1), /* add. */ -+ COSTS_N_INSNS (1), /* extend_add. */ -+ COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */ -+ }, -+ /* MULT DImode */ -+ { -+ 0, /* simple (N/A). */ -+ 0, /* flag_setting (N/A). */ -+ COSTS_N_INSNS (2), /* extend. */ -+ 0, /* add (N/A). */ -+ COSTS_N_INSNS (2), /* extend_add. */ -+ 0 /* idiv (N/A). */ -+ } -+ }, -+ /* LD/ST */ -+ { -+ COSTS_N_INSNS (1), /* load. */ -+ COSTS_N_INSNS (1), /* load_sign_extend. */ -+ COSTS_N_INSNS (1), /* ldrd. */ -+ COSTS_N_INSNS (1), /* ldm_1st. */ -+ 1, /* ldm_regs_per_insn_1st. */ -+ 2, /* ldm_regs_per_insn_subsequent. */ -+ COSTS_N_INSNS (1), /* loadf. */ -+ COSTS_N_INSNS (1), /* loadd. */ -+ COSTS_N_INSNS (1), /* load_unaligned. */ -+ COSTS_N_INSNS (1), /* store. */ -+ COSTS_N_INSNS (1), /* strd. */ -+ COSTS_N_INSNS (1), /* stm_1st. */ -+ 1, /* stm_regs_per_insn_1st. */ -+ 2, /* stm_regs_per_insn_subsequent. */ -+ COSTS_N_INSNS (1), /* storef. */ -+ COSTS_N_INSNS (1), /* stored. */ -+ COSTS_N_INSNS (1) /* store_unaligned. */ -+ }, -+ { -+ /* FP SFmode */ -+ { -+ COSTS_N_INSNS (36), /* div. */ -+ COSTS_N_INSNS (11), /* mult. */ -+ COSTS_N_INSNS (20), /* mult_addsub. */ -+ COSTS_N_INSNS (30), /* fma. */ -+ COSTS_N_INSNS (9), /* addsub. */ -+ COSTS_N_INSNS (3), /* fpconst. */ -+ COSTS_N_INSNS (3), /* neg. */ -+ COSTS_N_INSNS (6), /* compare. */ -+ COSTS_N_INSNS (4), /* widen. */ -+ COSTS_N_INSNS (4), /* narrow. */ -+ COSTS_N_INSNS (8), /* toint. */ -+ COSTS_N_INSNS (8), /* fromint. */ -+ COSTS_N_INSNS (8) /* roundint. */ -+ }, -+ /* FP DFmode */ -+ { -+ COSTS_N_INSNS (64), /* div. */ -+ COSTS_N_INSNS (16), /* mult. */ -+ COSTS_N_INSNS (25), /* mult_addsub. */ -+ COSTS_N_INSNS (30), /* fma. */ -+ COSTS_N_INSNS (9), /* addsub. */ -+ COSTS_N_INSNS (3), /* fpconst. */ -+ COSTS_N_INSNS (3), /* neg. */ -+ COSTS_N_INSNS (6), /* compare. */ -+ COSTS_N_INSNS (6), /* widen. */ -+ COSTS_N_INSNS (6), /* narrow. */ -+ COSTS_N_INSNS (8), /* toint. */ -+ COSTS_N_INSNS (8), /* fromint. */ -+ COSTS_N_INSNS (8) /* roundint. */ -+ } -+ }, -+ /* Vector */ -+ { -+ COSTS_N_INSNS (1) /* alu. */ -+ } -+}; + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vandq_u16 (uint16x8_t __a, uint16x8_t __b) + { +- return (uint16x8_t)__builtin_neon_vandv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); ++ return __a & __b; + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vandq_u32 (uint32x4_t __a, uint32x4_t __b) + { +- return (uint32x4_t)__builtin_neon_vandv4si ((int32x4_t) __a, (int32x4_t) __b, 0); ++ return __a & __b; + } -+ -+ - const struct cpu_cost_table cortexa7_extra_costs = + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vandq_u64 (uint64x2_t __a, uint64x2_t __b) { - /* ALU */ -@@ -1087,6 +1190,7 @@ - COSTS_N_INSNS (1), /* bfi. */ - COSTS_N_INSNS (1), /* bfx. */ - COSTS_N_INSNS (1), /* clz. */ -+ COSTS_N_INSNS (1), /* rev. */ - 0, /* non_exec. */ - true /* non_exec_costs_exec. */ - }, -@@ -1188,6 +1292,7 @@ - 0, /* bfi. */ - COSTS_N_INSNS (1), /* bfx. */ - COSTS_N_INSNS (1), /* clz. */ -+ COSTS_N_INSNS (1), /* rev. */ - 0, /* non_exec. */ - true /* non_exec_costs_exec. */ - }, -@@ -1288,6 +1393,7 @@ - COSTS_N_INSNS (1), /* bfi. */ - 0, /* bfx. */ - 0, /* clz. */ -+ 0, /* rev. */ - 0, /* non_exec. */ - true /* non_exec_costs_exec. */ - }, -@@ -1388,6 +1494,7 @@ - 0, /* bfi. */ - 0, /* bfx. */ - 0, /* clz. */ -+ 0, /* rev. */ - COSTS_N_INSNS (1), /* non_exec. */ - false /* non_exec_costs_exec. */ - }, -@@ -1484,7 +1591,8 @@ - false, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; +- return (uint64x2_t)__builtin_neon_vandv2di ((int64x2_t) __a, (int64x2_t) __b, 0); ++ return __a & __b; + } - const struct tune_params arm_fastmul_tune = -@@ -1500,7 +1608,8 @@ - false, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) + vorr_s8 (int8x8_t __a, int8x8_t __b) + { +- return (int8x8_t)__builtin_neon_vorrv8qi (__a, __b, 1); ++ return __a | __b; + } - /* StrongARM has early execution of branches, so a sequence that is worth -@@ -1519,7 +1628,8 @@ - false, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; + __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) + vorr_s16 (int16x4_t __a, int16x4_t __b) + { +- return (int16x4_t)__builtin_neon_vorrv4hi (__a, __b, 1); ++ return __a | __b; + } - const struct tune_params arm_xscale_tune = -@@ -1535,7 +1645,8 @@ - false, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; + __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) + vorr_s32 (int32x2_t __a, int32x2_t __b) + { +- return (int32x2_t)__builtin_neon_vorrv2si (__a, __b, 1); ++ return __a | __b; + } - const struct tune_params arm_9e_tune = -@@ -1551,7 +1662,8 @@ - false, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vorr_u8 (uint8x8_t __a, uint8x8_t __b) + { +- return (uint8x8_t)__builtin_neon_vorrv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); ++ return __a | __b; + } - const struct tune_params arm_v6t2_tune = -@@ -1567,7 +1679,8 @@ - false, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vorr_u16 (uint16x4_t __a, uint16x4_t __b) + { +- return (uint16x4_t)__builtin_neon_vorrv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); ++ return __a | __b; + } - /* Generic Cortex tuning. Use more specific tunings if appropriate. */ -@@ -1584,9 +1697,27 @@ - false, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vorr_u32 (uint32x2_t __a, uint32x2_t __b) + { +- return (uint32x2_t)__builtin_neon_vorrv2si ((int32x2_t) __a, (int32x2_t) __b, 0); ++ return __a | __b; + } -+const struct tune_params arm_cortex_a8_tune = -+{ -+ arm_9e_rtx_costs, -+ &cortexa8_extra_costs, -+ NULL, /* Sched adj cost. */ -+ 1, /* Constant limit. */ -+ 5, /* Max cond insns. */ -+ ARM_PREFETCH_NOT_BENEFICIAL, -+ false, /* Prefer constant pool. */ -+ arm_default_branch_cost, -+ false, /* Prefer LDRD/STRD. */ -+ {true, true}, /* Prefer non short circuit. */ -+ &arm_default_vec_cost, /* Vectorizer costs. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ -+}; -+ - const struct tune_params arm_cortex_a7_tune = + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) + vorr_s64 (int64x1_t __a, int64x1_t __b) { - arm_9e_rtx_costs, -@@ -1600,7 +1731,8 @@ - false, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; +- return (int64x1_t)__builtin_neon_vorrdi (__a, __b, 1); ++ return __a | __b; + } - const struct tune_params arm_cortex_a15_tune = -@@ -1616,7 +1748,8 @@ - true, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ true, true /* Prefer 32-bit encodings. */ - }; + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vorr_u64 (uint64x1_t __a, uint64x1_t __b) + { +- return (uint64x1_t)__builtin_neon_vorrdi ((int64x1_t) __a, (int64x1_t) __b, 0); ++ return __a | __b; + } - const struct tune_params arm_cortex_a53_tune = -@@ -1632,7 +1765,8 @@ - false, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) + vorrq_s8 (int8x16_t __a, int8x16_t __b) + { +- return (int8x16_t)__builtin_neon_vorrv16qi (__a, __b, 1); ++ return __a | __b; + } - const struct tune_params arm_cortex_a57_tune = -@@ -1648,7 +1782,8 @@ - true, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ true, true /* Prefer 32-bit encodings. */ - }; + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) + vorrq_s16 (int16x8_t __a, int16x8_t __b) + { +- return (int16x8_t)__builtin_neon_vorrv8hi (__a, __b, 1); ++ return __a | __b; + } + + __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) + vorrq_s32 (int32x4_t __a, int32x4_t __b) + { +- return (int32x4_t)__builtin_neon_vorrv4si (__a, __b, 1); ++ return __a | __b; + } + + __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) + vorrq_s64 (int64x2_t __a, int64x2_t __b) + { +- return (int64x2_t)__builtin_neon_vorrv2di (__a, __b, 1); ++ return __a | __b; + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vorrq_u8 (uint8x16_t __a, uint8x16_t __b) + { +- return (uint8x16_t)__builtin_neon_vorrv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); ++ return __a | __b; + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vorrq_u16 (uint16x8_t __a, uint16x8_t __b) + { +- return (uint16x8_t)__builtin_neon_vorrv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); ++ return __a | __b; + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vorrq_u32 (uint32x4_t __a, uint32x4_t __b) + { +- return (uint32x4_t)__builtin_neon_vorrv4si ((int32x4_t) __a, (int32x4_t) __b, 0); ++ return __a | __b; + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vorrq_u64 (uint64x2_t __a, uint64x2_t __b) + { +- return (uint64x2_t)__builtin_neon_vorrv2di ((int64x2_t) __a, (int64x2_t) __b, 0); ++ return __a | __b; + } + + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) + veor_s8 (int8x8_t __a, int8x8_t __b) + { +- return (int8x8_t)__builtin_neon_veorv8qi (__a, __b, 1); ++ return __a ^ __b; + } + + __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) + veor_s16 (int16x4_t __a, int16x4_t __b) + { +- return (int16x4_t)__builtin_neon_veorv4hi (__a, __b, 1); ++ return __a ^ __b; + } + + __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) + veor_s32 (int32x2_t __a, int32x2_t __b) + { +- return (int32x2_t)__builtin_neon_veorv2si (__a, __b, 1); ++ return __a ^ __b; + } + + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + veor_u8 (uint8x8_t __a, uint8x8_t __b) + { +- return (uint8x8_t)__builtin_neon_veorv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); ++ return __a ^ __b; + } + + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + veor_u16 (uint16x4_t __a, uint16x4_t __b) + { +- return (uint16x4_t)__builtin_neon_veorv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); ++ return __a ^ __b; + } + + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + veor_u32 (uint32x2_t __a, uint32x2_t __b) + { +- return (uint32x2_t)__builtin_neon_veorv2si ((int32x2_t) __a, (int32x2_t) __b, 0); ++ return __a ^ __b; + } + + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) + veor_s64 (int64x1_t __a, int64x1_t __b) + { +- return (int64x1_t)__builtin_neon_veordi (__a, __b, 1); ++ return __a ^ __b; + } + + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + veor_u64 (uint64x1_t __a, uint64x1_t __b) + { +- return (uint64x1_t)__builtin_neon_veordi ((int64x1_t) __a, (int64x1_t) __b, 0); ++ return __a ^ __b; + } + + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) + veorq_s8 (int8x16_t __a, int8x16_t __b) + { +- return (int8x16_t)__builtin_neon_veorv16qi (__a, __b, 1); ++ return __a ^ __b; + } + + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) + veorq_s16 (int16x8_t __a, int16x8_t __b) + { +- return (int16x8_t)__builtin_neon_veorv8hi (__a, __b, 1); ++ return __a ^ __b; + } + + __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) + veorq_s32 (int32x4_t __a, int32x4_t __b) + { +- return (int32x4_t)__builtin_neon_veorv4si (__a, __b, 1); ++ return __a ^ __b; + } + + __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) + veorq_s64 (int64x2_t __a, int64x2_t __b) + { +- return (int64x2_t)__builtin_neon_veorv2di (__a, __b, 1); ++ return __a ^ __b; + } + + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + veorq_u8 (uint8x16_t __a, uint8x16_t __b) + { +- return (uint8x16_t)__builtin_neon_veorv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); ++ return __a ^ __b; + } + + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + veorq_u16 (uint16x8_t __a, uint16x8_t __b) + { +- return (uint16x8_t)__builtin_neon_veorv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); ++ return __a ^ __b; + } + + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + veorq_u32 (uint32x4_t __a, uint32x4_t __b) + { +- return (uint32x4_t)__builtin_neon_veorv4si ((int32x4_t) __a, (int32x4_t) __b, 0); ++ return __a ^ __b; + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + veorq_u64 (uint64x2_t __a, uint64x2_t __b) + { +- return (uint64x2_t)__builtin_neon_veorv2di ((int64x2_t) __a, (int64x2_t) __b, 0); ++ return __a ^ __b; + } - /* Branches can be dual-issued on Cortex-A5, so conditional execution is -@@ -1667,7 +1802,8 @@ - false, /* Prefer LDRD/STRD. */ - {false, false}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) + vbic_s8 (int8x8_t __a, int8x8_t __b) + { +- return (int8x8_t)__builtin_neon_vbicv8qi (__a, __b, 1); ++ return __a & ~__b; + } - const struct tune_params arm_cortex_a9_tune = -@@ -1683,7 +1819,8 @@ - false, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; + __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) + vbic_s16 (int16x4_t __a, int16x4_t __b) + { +- return (int16x4_t)__builtin_neon_vbicv4hi (__a, __b, 1); ++ return __a & ~__b; + } - const struct tune_params arm_cortex_a12_tune = -@@ -1699,7 +1836,8 @@ - true, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; + __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) + vbic_s32 (int32x2_t __a, int32x2_t __b) + { +- return (int32x2_t)__builtin_neon_vbicv2si (__a, __b, 1); ++ return __a & ~__b; + } - /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single -@@ -1722,7 +1860,8 @@ - false, /* Prefer LDRD/STRD. */ - {false, false}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vbic_u8 (uint8x8_t __a, uint8x8_t __b) + { +- return (uint8x8_t)__builtin_neon_vbicv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); ++ return __a & ~__b; + } - /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than -@@ -1740,7 +1879,8 @@ - false, /* Prefer LDRD/STRD. */ - {false, false}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vbic_u16 (uint16x4_t __a, uint16x4_t __b) + { +- return (uint16x4_t)__builtin_neon_vbicv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); ++ return __a & ~__b; + } - const struct tune_params arm_fa726te_tune = -@@ -1756,7 +1896,8 @@ - false, /* Prefer LDRD/STRD. */ - {true, true}, /* Prefer non short circuit. */ - &arm_default_vec_cost, /* Vectorizer costs. */ -- false /* Prefer Neon for 64-bits bitops. */ -+ false, /* Prefer Neon for 64-bits bitops. */ -+ false, false /* Prefer 32-bit encodings. */ - }; + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vbic_u32 (uint32x2_t __a, uint32x2_t __b) + { +- return (uint32x2_t)__builtin_neon_vbicv2si ((int32x2_t) __a, (int32x2_t) __b, 0); ++ return __a & ~__b; + } + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) + vbic_s64 (int64x1_t __a, int64x1_t __b) + { +- return (int64x1_t)__builtin_neon_vbicdi (__a, __b, 1); ++ return __a & ~__b; + } -@@ -6080,11 +6221,6 @@ - if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl)) - return false; + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vbic_u64 (uint64x1_t __a, uint64x1_t __b) + { +- return (uint64x1_t)__builtin_neon_vbicdi ((int64x1_t) __a, (int64x1_t) __b, 0); ++ return __a & ~__b; + } -- /* Cannot tail-call to long calls, since these are out of range of -- a branch instruction. */ -- if (decl && arm_is_long_call_p (decl)) -- return false; -- - /* If we are interworking and the function is not declared static - then we can't tail-call it unless we know that it exists in this - compilation unit (since it might be a Thumb routine). */ -@@ -9338,6 +9474,47 @@ - *cost = LIBCALL_COST (2); - return false; + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) + vbicq_s8 (int8x16_t __a, int8x16_t __b) + { +- return (int8x16_t)__builtin_neon_vbicv16qi (__a, __b, 1); ++ return __a & ~__b; + } -+ case BSWAP: -+ if (arm_arch6) -+ { -+ if (mode == SImode) -+ { -+ *cost = COSTS_N_INSNS (1); -+ if (speed_p) -+ *cost += extra_cost->alu.rev; -+ -+ return false; -+ } -+ } -+ else -+ { -+ /* No rev instruction available. Look at arm_legacy_rev -+ and thumb_legacy_rev for the form of RTL used then. */ -+ if (TARGET_THUMB) -+ { -+ *cost = COSTS_N_INSNS (10); -+ -+ if (speed_p) -+ { -+ *cost += 6 * extra_cost->alu.shift; -+ *cost += 3 * extra_cost->alu.logical; -+ } -+ } -+ else -+ { -+ *cost = COSTS_N_INSNS (5); -+ -+ if (speed_p) -+ { -+ *cost += 2 * extra_cost->alu.shift; -+ *cost += extra_cost->alu.arith_shift; -+ *cost += 2 * extra_cost->alu.logical; -+ } -+ } -+ return true; -+ } -+ return false; -+ - case MINUS: - if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT - && (mode == SFmode || !TARGET_VFP_SINGLE)) -@@ -9720,8 +9897,17 @@ - /* Vector mode? */ - *cost = LIBCALL_COST (2); - return false; -+ case IOR: -+ if (mode == SImode && arm_arch6 && aarch_rev16_p (x)) -+ { -+ *cost = COSTS_N_INSNS (1); -+ if (speed_p) -+ *cost += extra_cost->alu.rev; + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) + vbicq_s16 (int16x8_t __a, int16x8_t __b) + { +- return (int16x8_t)__builtin_neon_vbicv8hi (__a, __b, 1); ++ return __a & ~__b; + } -- case AND: case XOR: case IOR: -+ return true; -+ } -+ /* Fall through. */ -+ case AND: case XOR: - if (mode == SImode) - { - enum rtx_code subcode = GET_CODE (XEXP (x, 0)); -@@ -10620,6 +10806,36 @@ - *cost = LIBCALL_COST (1); - return false; + __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) + vbicq_s32 (int32x4_t __a, int32x4_t __b) + { +- return (int32x4_t)__builtin_neon_vbicv4si (__a, __b, 1); ++ return __a & ~__b; + } -+ case FMA: -+ if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA) -+ { -+ rtx op0 = XEXP (x, 0); -+ rtx op1 = XEXP (x, 1); -+ rtx op2 = XEXP (x, 2); -+ -+ *cost = COSTS_N_INSNS (1); -+ -+ /* vfms or vfnma. */ -+ if (GET_CODE (op0) == NEG) -+ op0 = XEXP (op0, 0); -+ -+ /* vfnms or vfnma. */ -+ if (GET_CODE (op2) == NEG) -+ op2 = XEXP (op2, 0); -+ -+ *cost += rtx_cost (op0, FMA, 0, speed_p); -+ *cost += rtx_cost (op1, FMA, 1, speed_p); -+ *cost += rtx_cost (op2, FMA, 2, speed_p); -+ -+ if (speed_p) -+ *cost += extra_cost->fp[mode ==DFmode].fma; -+ -+ return true; -+ } -+ -+ *cost = LIBCALL_COST (3); -+ return false; -+ - case FIX: - case UNSIGNED_FIX: - if (TARGET_HARD_FLOAT) -@@ -10670,10 +10886,16 @@ - return true; + __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) + vbicq_s64 (int64x2_t __a, int64x2_t __b) + { +- return (int64x2_t)__builtin_neon_vbicv2di (__a, __b, 1); ++ return __a & ~__b; + } - case ASM_OPERANDS: -- /* Just a guess. Cost one insn per input. */ -- *cost = COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x)); -- return true; -+ { -+ /* Just a guess. Guess number of instructions in the asm -+ plus one insn per input. Always a minimum of COSTS_N_INSNS (1) -+ though (see PR60663). */ -+ int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x))); -+ int num_operands = ASM_OPERANDS_INPUT_LENGTH (x); + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vbicq_u8 (uint8x16_t __a, uint8x16_t __b) + { +- return (uint8x16_t)__builtin_neon_vbicv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); ++ return __a & ~__b; + } -+ *cost = COSTS_N_INSNS (asm_length + num_operands); -+ return true; -+ } - default: - if (mode != VOIDmode) - *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode)); -@@ -16787,9 +17009,20 @@ - compute_bb_for_insn (); - df_analyze (); + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vbicq_u16 (uint16x8_t __a, uint16x8_t __b) + { +- return (uint16x8_t)__builtin_neon_vbicv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); ++ return __a & ~__b; + } -+ enum Convert_Action {SKIP, CONV, SWAP_CONV}; -+ - FOR_EACH_BB_FN (bb, cfun) - { -+ if (current_tune->disparage_flag_setting_t16_encodings -+ && optimize_bb_for_speed_p (bb)) -+ continue; -+ - rtx insn; -+ Convert_Action action = SKIP; -+ Convert_Action action_for_partial_flag_setting -+ = (current_tune->disparage_partial_flag_setting_t16_encodings -+ && optimize_bb_for_speed_p (bb)) -+ ? SKIP : CONV; + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vbicq_u32 (uint32x4_t __a, uint32x4_t __b) + { +- return (uint32x4_t)__builtin_neon_vbicv4si ((int32x4_t) __a, (int32x4_t) __b, 0); ++ return __a & ~__b; + } - COPY_REG_SET (&live, DF_LR_OUT (bb)); - df_simulate_initialize_backwards (bb, &live); -@@ -16799,7 +17032,7 @@ - && !REGNO_REG_SET_P (&live, CC_REGNUM) - && GET_CODE (PATTERN (insn)) == SET) - { -- enum {SKIP, CONV, SWAP_CONV} action = SKIP; -+ action = SKIP; - rtx pat = PATTERN (insn); - rtx dst = XEXP (pat, 0); - rtx src = XEXP (pat, 1); -@@ -16880,10 +17113,11 @@ - /* ANDS , */ - if (rtx_equal_p (dst, op0) - && low_register_operand (op1, SImode)) -- action = CONV; -+ action = action_for_partial_flag_setting; - else if (rtx_equal_p (dst, op1) - && low_register_operand (op0, SImode)) -- action = SWAP_CONV; -+ action = action_for_partial_flag_setting == SKIP -+ ? SKIP : SWAP_CONV; - break; + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vbicq_u64 (uint64x2_t __a, uint64x2_t __b) + { +- return (uint64x2_t)__builtin_neon_vbicv2di ((int64x2_t) __a, (int64x2_t) __b, 0); ++ return __a & ~__b; + } - case ASHIFTRT: -@@ -16894,7 +17128,7 @@ - /* LSLS , */ - if (rtx_equal_p (dst, op0) - && low_register_operand (op1, SImode)) -- action = CONV; -+ action = action_for_partial_flag_setting; - /* ASRS ,,# */ - /* LSRS ,,# */ - /* LSLS ,,# */ -@@ -16901,7 +17135,7 @@ - else if (low_register_operand (op0, SImode) - && CONST_INT_P (op1) - && IN_RANGE (INTVAL (op1), 0, 31)) -- action = CONV; -+ action = action_for_partial_flag_setting; - break; + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) + vorn_s8 (int8x8_t __a, int8x8_t __b) + { +- return (int8x8_t)__builtin_neon_vornv8qi (__a, __b, 1); ++ return __a | ~__b; + } - case ROTATERT: -@@ -16908,12 +17142,16 @@ - /* RORS , */ - if (rtx_equal_p (dst, op0) - && low_register_operand (op1, SImode)) -- action = CONV; -+ action = action_for_partial_flag_setting; - break; + __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) + vorn_s16 (int16x4_t __a, int16x4_t __b) + { +- return (int16x4_t)__builtin_neon_vornv4hi (__a, __b, 1); ++ return __a | ~__b; + } - case NOT: -+ /* MVNS , */ -+ if (low_register_operand (op0, SImode)) -+ action = action_for_partial_flag_setting; -+ break; -+ - case NEG: -- /* MVNS , */ - /* NEGS , (a.k.a RSBS) */ - if (low_register_operand (op0, SImode)) - action = CONV; -@@ -16923,7 +17161,7 @@ - /* MOVS ,# */ - if (CONST_INT_P (src) - && IN_RANGE (INTVAL (src), 0, 255)) -- action = CONV; -+ action = action_for_partial_flag_setting; - break; + __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) + vorn_s32 (int32x2_t __a, int32x2_t __b) + { +- return (int32x2_t)__builtin_neon_vornv2si (__a, __b, 1); ++ return __a | ~__b; + } - case REG: -@@ -21421,7 +21659,7 @@ - register. */ - case 'p': - { -- int mode = GET_MODE (x); -+ enum machine_mode mode = GET_MODE (x); - int regno; + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) + vorn_u8 (uint8x8_t __a, uint8x8_t __b) + { +- return (uint8x8_t)__builtin_neon_vornv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); ++ return __a | ~__b; + } - if (GET_MODE_SIZE (mode) != 8 || !REG_P (x)) -@@ -21445,7 +21683,7 @@ - case 'P': - case 'q': - { -- int mode = GET_MODE (x); -+ enum machine_mode mode = GET_MODE (x); - int is_quad = (code == 'q'); - int regno; + __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) + vorn_u16 (uint16x4_t __a, uint16x4_t __b) + { +- return (uint16x4_t)__builtin_neon_vornv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); ++ return __a | ~__b; + } -@@ -21481,7 +21719,7 @@ - case 'e': - case 'f': - { -- int mode = GET_MODE (x); -+ enum machine_mode mode = GET_MODE (x); - int regno; + __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) + vorn_u32 (uint32x2_t __a, uint32x2_t __b) + { +- return (uint32x2_t)__builtin_neon_vornv2si ((int32x2_t) __a, (int32x2_t) __b, 0); ++ return __a | ~__b; + } - if ((GET_MODE_SIZE (mode) != 16 -@@ -21614,7 +21852,7 @@ - /* Translate an S register number into a D register number and element index. */ - case 'y': - { -- int mode = GET_MODE (x); -+ enum machine_mode mode = GET_MODE (x); - int regno; + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) + vorn_s64 (int64x1_t __a, int64x1_t __b) + { +- return (int64x1_t)__builtin_neon_vorndi (__a, __b, 1); ++ return __a | ~__b; + } - if (GET_MODE_SIZE (mode) != 4 || !REG_P (x)) -@@ -21648,7 +21886,7 @@ - number into a D register number and element index. */ - case 'z': - { -- int mode = GET_MODE (x); -+ enum machine_mode mode = GET_MODE (x); - int regno; + __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) + vorn_u64 (uint64x1_t __a, uint64x1_t __b) + { +- return (uint64x1_t)__builtin_neon_vorndi ((int64x1_t) __a, (int64x1_t) __b, 0); ++ return __a | ~__b; + } - if (GET_MODE_SIZE (mode) != 2 || !REG_P (x)) -@@ -22609,13 +22847,20 @@ - } + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) + vornq_s8 (int8x16_t __a, int8x16_t __b) + { +- return (int8x16_t)__builtin_neon_vornv16qi (__a, __b, 1); ++ return __a | ~__b; + } - /* We allow almost any value to be stored in the general registers. -- Restrict doubleword quantities to even register pairs so that we can -- use ldrd. Do not allow very large Neon structure opaque modes in -- general registers; they would use too many. */ -+ Restrict doubleword quantities to even register pairs in ARM state -+ so that we can use ldrd. Do not allow very large Neon structure -+ opaque modes in general registers; they would use too many. */ - if (regno <= LAST_ARM_REGNUM) -- return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0) -- && ARM_NUM_REGS (mode) <= 4; -+ { -+ if (ARM_NUM_REGS (mode) > 4) -+ return FALSE; + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) + vornq_s16 (int16x8_t __a, int16x8_t __b) + { +- return (int16x8_t)__builtin_neon_vornv8hi (__a, __b, 1); ++ return __a | ~__b; + } -+ if (TARGET_THUMB2) -+ return TRUE; -+ -+ return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0); -+ } -+ - if (regno == FRAME_POINTER_REGNUM - || regno == ARG_POINTER_REGNUM) - /* We only allow integers in the fake hard registers. */ -@@ -25888,7 +26133,7 @@ - int pops_needed; - unsigned available; - unsigned required; -- int mode; -+ enum machine_mode mode; - int size; - int restore_a4 = FALSE; + __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) + vornq_s32 (int32x4_t __a, int32x4_t __b) + { +- return (int32x4_t)__builtin_neon_vornv4si (__a, __b, 1); ++ return __a | ~__b; + } ---- a/src/gcc/config/arm/arm-cores.def -+++ b/src/gcc/config/arm/arm-cores.def -@@ -141,7 +141,7 @@ - ARM_CORE("generic-armv7-a", genericv7a, genericv7a, 7A, FL_LDSCHED, cortex) - ARM_CORE("cortex-a5", cortexa5, cortexa5, 7A, FL_LDSCHED, cortex_a5) - ARM_CORE("cortex-a7", cortexa7, cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a7) --ARM_CORE("cortex-a8", cortexa8, cortexa8, 7A, FL_LDSCHED, cortex) -+ARM_CORE("cortex-a8", cortexa8, cortexa8, 7A, FL_LDSCHED, cortex_a8) - ARM_CORE("cortex-a9", cortexa9, cortexa9, 7A, FL_LDSCHED, cortex_a9) - ARM_CORE("cortex-a12", cortexa12, cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a12) - ARM_CORE("cortex-a15", cortexa15, cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15) ---- a/src/gcc/config/arm/arm-protos.h -+++ b/src/gcc/config/arm/arm-protos.h -@@ -272,6 +272,11 @@ - const struct cpu_vec_costs* vec_costs; - /* Prefer Neon for 64-bit bitops. */ - bool prefer_neon_for_64bits; -+ /* Prefer 32-bit encoding instead of flag-setting 16-bit encoding. */ -+ bool disparage_flag_setting_t16_encodings; -+ /* Prefer 32-bit encoding instead of 16-bit encoding where subset of flags -+ would be set. */ -+ bool disparage_partial_flag_setting_t16_encodings; - }; + __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) + vornq_s64 (int64x2_t __a, int64x2_t __b) + { +- return (int64x2_t)__builtin_neon_vornv2di (__a, __b, 1); ++ return __a | ~__b; + } - extern const struct tune_params *current_tune; ---- a/src/gcc/config/arm/aarch-common-protos.h -+++ b/src/gcc/config/arm/aarch-common-protos.h -@@ -24,6 +24,9 @@ - #define GCC_AARCH_COMMON_PROTOS_H + __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) + vornq_u8 (uint8x16_t __a, uint8x16_t __b) + { +- return (uint8x16_t)__builtin_neon_vornv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); ++ return __a | ~__b; + } - extern int aarch_crypto_can_dual_issue (rtx, rtx); -+extern bool aarch_rev16_p (rtx); -+extern bool aarch_rev16_shleft_mask_imm_p (rtx, enum machine_mode); -+extern bool aarch_rev16_shright_mask_imm_p (rtx, enum machine_mode); - extern int arm_early_load_addr_dep (rtx, rtx); - extern int arm_early_store_addr_dep (rtx, rtx); - extern int arm_mac_accumulator_is_mul_result (rtx, rtx); -@@ -54,6 +57,7 @@ - const int bfi; /* Bit-field insert. */ - const int bfx; /* Bit-field extraction. */ - const int clz; /* Count Leading Zeros. */ -+ const int rev; /* Reverse bits/bytes. */ - const int non_exec; /* Extra cost when not executing insn. */ - const bool non_exec_costs_exec; /* True if non-execution must add the exec - cost. */ ---- a/src/gcc/config/arm/predicates.md -+++ b/src/gcc/config/arm/predicates.md -@@ -681,5 +681,6 @@ - (match_code "reg" "0"))) + __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) + vornq_u16 (uint16x8_t __a, uint16x8_t __b) + { +- return (uint16x8_t)__builtin_neon_vornv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); ++ return __a | ~__b; + } - (define_predicate "call_insn_operand" -- (ior (match_code "symbol_ref") -+ (ior (and (match_code "symbol_ref") -+ (match_test "!arm_is_long_call_p (SYMBOL_REF_DECL (op))")) - (match_operand 0 "s_register_operand"))) + __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) + vornq_u32 (uint32x4_t __a, uint32x4_t __b) + { +- return (uint32x4_t)__builtin_neon_vornv4si ((int32x4_t) __a, (int32x4_t) __b, 0); ++ return __a | ~__b; + } + + __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) + vornq_u64 (uint64x2_t __a, uint64x2_t __b) + { +- return (uint64x2_t)__builtin_neon_vornv2di ((int64x2_t) __a, (int64x2_t) __b, 0); ++ return __a | ~__b; + } + +- + __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) + vreinterpret_p8_p16 (poly16x4_t __a) + { --- a/src/gcc/config/arm/aarch-common.c +++ b/src/gcc/config/arm/aarch-common.c -@@ -191,6 +191,79 @@ +@@ -191,6 +191,83 @@ return 0; } @@ -13200,14 +30196,18 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ +aarch_rev16_shright_mask_imm_p (rtx val, enum machine_mode mode) +{ + return CONST_INT_P (val) -+ && INTVAL (val) == trunc_int_for_mode (0xff00ff00ff00ff, mode); ++ && INTVAL (val) ++ == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff), ++ mode); +} + +bool +aarch_rev16_shleft_mask_imm_p (rtx val, enum machine_mode mode) +{ + return CONST_INT_P (val) -+ && INTVAL (val) == trunc_int_for_mode (0xff00ff00ff00ff00, mode); ++ && INTVAL (val) ++ == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff00), ++ mode); +} + + @@ -13272,9 +30272,61 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ /* Return nonzero if the CONSUMER instruction (a load) does need PRODUCER's value to calculate the address. */ int +--- a/src/gcc/config/arm/iterators.md ++++ b/src/gcc/config/arm/iterators.md +@@ -116,6 +116,9 @@ + ;; Vector modes including 64-bit integer elements, but no floats. + (define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI]) + ++;; Vector modes for H, S and D types. ++(define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI]) ++ + ;; Vector modes for float->int conversions. + (define_mode_iterator VCVTF [V2SF V4SF]) + +@@ -191,6 +194,20 @@ + ;; Right shifts + (define_code_iterator rshifts [ashiftrt lshiftrt]) + ++;; Binary operators whose second operand can be shifted. ++(define_code_iterator shiftable_ops [plus minus ior xor and]) ++ ++;; plus and minus are the only shiftable_ops for which Thumb2 allows ++;; a stack pointer opoerand. The minus operation is a candidate for an rsub ++;; and hence only plus is supported. ++(define_code_attr t2_binop0 ++ [(plus "rk") (minus "r") (ior "r") (xor "r") (and "r")]) ++ ++;; The instruction to use when a shiftable_ops has a shift operation as ++;; its first operand. ++(define_code_attr arith_shift_insn ++ [(plus "add") (minus "rsb") (ior "orr") (xor "eor") (and "and")]) ++ + ;;---------------------------------------------------------------------------- + ;; Int iterators + ;;---------------------------------------------------------------------------- --- a/src/gcc/config/arm/arm.md +++ b/src/gcc/config/arm/arm.md -@@ -2863,6 +2863,28 @@ +@@ -200,17 +200,9 @@ + (const_string "yes")] + (const_string "no"))) + +-; Allows an insn to disable certain alternatives for reasons other than +-; arch support. +-(define_attr "insn_enabled" "no,yes" +- (const_string "yes")) +- + ; Enable all alternatives that are both arch_enabled and insn_enabled. + (define_attr "enabled" "no,yes" +- (cond [(eq_attr "insn_enabled" "no") +- (const_string "no") +- +- (and (eq_attr "predicable_short_it" "no") ++ (cond [(and (eq_attr "predicable_short_it" "no") + (and (eq_attr "predicated" "yes") + (match_test "arm_restrict_it"))) + (const_string "no") +@@ -2863,6 +2855,28 @@ (set_attr "type" "multiple")] ) @@ -13303,7 +30355,7 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ (define_insn_and_split "*anddi_notsesidi_di" [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") (and:DI (not:DI (sign_extend:DI -@@ -9345,8 +9367,10 @@ +@@ -9345,8 +9359,10 @@ "TARGET_32BIT" " { @@ -13316,7 +30368,7 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ XEXP (operands[0], 0) = force_reg (SImode, XEXP (operands[0], 0)); if (operands[2] == NULL_RTX) -@@ -9363,8 +9387,10 @@ +@@ -9363,8 +9379,10 @@ "TARGET_32BIT" " { @@ -13329,7 +30381,71 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ XEXP (operands[1], 0) = force_reg (SImode, XEXP (operands[1], 0)); if (operands[3] == NULL_RTX) -@@ -12669,6 +12695,44 @@ +@@ -9850,39 +9868,35 @@ + + ;; Patterns to allow combination of arithmetic, cond code and shifts + +-(define_insn "*arith_shiftsi" +- [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r") +- (match_operator:SI 1 "shiftable_operator" +- [(match_operator:SI 3 "shift_operator" +- [(match_operand:SI 4 "s_register_operand" "r,r,r,r") +- (match_operand:SI 5 "shift_amount_operand" "M,M,M,r")]) +- (match_operand:SI 2 "s_register_operand" "rk,rk,r,rk")]))] ++(define_insn "*_multsi" ++ [(set (match_operand:SI 0 "s_register_operand" "=r,r") ++ (shiftable_ops:SI ++ (mult:SI (match_operand:SI 2 "s_register_operand" "r,r") ++ (match_operand:SI 3 "power_of_two_operand" "")) ++ (match_operand:SI 1 "s_register_operand" "rk,")))] + "TARGET_32BIT" +- "%i1%?\\t%0, %2, %4%S3" ++ "%?\\t%0, %1, %2, lsl %b3" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "shift" "4") +- (set_attr "arch" "a,t2,t2,a") +- ;; Thumb2 doesn't allow the stack pointer to be used for +- ;; operand1 for all operations other than add and sub. In this case +- ;; the minus operation is a candidate for an rsub and hence needs +- ;; to be disabled. +- ;; We have to make sure to disable the fourth alternative if +- ;; the shift_operator is MULT, since otherwise the insn will +- ;; also match a multiply_accumulate pattern and validate_change +- ;; will allow a replacement of the constant with a register +- ;; despite the checks done in shift_operator. +- (set_attr_alternative "insn_enabled" +- [(const_string "yes") +- (if_then_else +- (match_operand:SI 1 "add_operator" "") +- (const_string "yes") (const_string "no")) +- (const_string "yes") +- (if_then_else +- (match_operand:SI 3 "mult_operator" "") +- (const_string "no") (const_string "yes"))]) +- (set_attr "type" "alu_shift_imm,alu_shift_imm,alu_shift_imm,alu_shift_reg")]) ++ (set_attr "arch" "a,t2") ++ (set_attr "type" "alu_shift_imm")]) + ++(define_insn "*_shiftsi" ++ [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") ++ (shiftable_ops:SI ++ (match_operator:SI 2 "shift_nomul_operator" ++ [(match_operand:SI 3 "s_register_operand" "r,r,r") ++ (match_operand:SI 4 "shift_amount_operand" "M,M,r")]) ++ (match_operand:SI 1 "s_register_operand" "rk,,rk")))] ++ "TARGET_32BIT && GET_CODE (operands[3]) != MULT" ++ "%?\\t%0, %1, %3%S2" ++ [(set_attr "predicable" "yes") ++ (set_attr "predicable_short_it" "no") ++ (set_attr "shift" "4") ++ (set_attr "arch" "a,t2,a") ++ (set_attr "type" "alu_shift_imm,alu_shift_imm,alu_shift_reg")]) ++ + (define_split + [(set (match_operand:SI 0 "s_register_operand" "") + (match_operator:SI 1 "shiftable_operator" +@@ -12669,6 +12683,44 @@ (set_attr "type" "rev")] ) @@ -13376,7 +30492,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ (bswap:HI (match_operand:HI 1 "s_register_operand" "r")))] --- a/src/libobjc/ChangeLog.linaro +++ b/src/libobjc/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -13394,7 +30514,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/libvtv/ChangeLog.linaro +++ b/src/libvtv/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -13434,7 +30558,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ [Define to 1 if you have the `clock_gettime' function in librt.])]) --- a/src/libgfortran/ChangeLog.linaro +++ b/src/libgfortran/ChangeLog.linaro -@@ -0,0 +1,23 @@ +@@ -0,0 +1,27 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -13460,7 +30588,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/libada/ChangeLog.linaro +++ b/src/libada/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -13478,7 +30610,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/libffi/ChangeLog.linaro +++ b/src/libffi/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -13496,7 +30632,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/libssp/ChangeLog.linaro +++ b/src/libssp/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -13514,7 +30654,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/libcilkrts/ChangeLog.linaro +++ b/src/libcilkrts/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -13532,7 +30676,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/libcpp/ChangeLog.linaro +++ b/src/libcpp/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -13550,7 +30698,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/libcpp/po/ChangeLog.linaro +++ b/src/libcpp/po/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. @@ -13568,7 +30720,11 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211979 \ + GCC Linaro 4.9-2014.04 released. --- a/src/fixincludes/ChangeLog.linaro +++ b/src/fixincludes/ChangeLog.linaro -@@ -0,0 +1,15 @@ +@@ -0,0 +1,19 @@ ++2014-07-17 Yvan Roux ++ ++ GCC Linaro 4.9-2014.07 released. ++ +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. diff --git a/debian/patches/gcc-multiarch-linaro.diff b/debian/patches/gcc-multiarch-linaro.diff new file mode 100644 index 0000000..08e16ed --- /dev/null +++ b/debian/patches/gcc-multiarch-linaro.diff @@ -0,0 +1,147 @@ +# DP: - Remaining multiarch patches, not yet submitted upstream. +# DP: - Add MULTIARCH_DIRNAME definitions for multilib configurations, +# DP: which are used for the non-multilib builds. + +2013-06-12 Matthias Klose + + * config/i386/t-linux64: Set MULTIARCH_DIRNAME. + * config/i386/t-kfreebsd: Set MULTIARCH_DIRNAME. + * config.gcc (i[34567]86-*-linux* | x86_64-*-linux*): Prepend + i386/t-linux to $tmake_file. + * config/mips/t-linux64: Set MULTIARCH_DIRNAME. + * config/rs6000/t-linux64: Set MULTIARCH_DIRNAME. + * config/s390/t-linux64: Set MULTIARCH_DIRNAME. + * config/sparc/t-linux64: Set MULTIARCH_DIRNAME. + +Index: b/src/gcc/config/sh/t-linux +=================================================================== +--- a/src/gcc/config/sh/t-linux ++++ b/src/gcc/config/sh/t-linux +@@ -1,2 +1,4 @@ + MULTILIB_DIRNAMES= + MULTILIB_MATCHES = ++ ++MULTILIB_OSDIRNAMES = sh4-linux-gnu:sh4-linux-gnu sh4_nofpu-linux-gnu:sh4-linux-gnu +Index: b/src/gcc/config/sparc/t-linux64 +=================================================================== +--- a/src/gcc/config/sparc/t-linux64 ++++ b/src/gcc/config/sparc/t-linux64 +@@ -27,3 +27,5 @@ MULTILIB_OPTIONS = m64/m32 + MULTILIB_DIRNAMES = 64 32 + MULTILIB_OSDIRNAMES = ../lib64$(call if_multiarch,:sparc64-linux-gnu) + MULTILIB_OSDIRNAMES += $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:sparc-linux-gnu) ++ ++MULTIARCH_DIRNAME = $(call if_multiarch,sparc$(if $(findstring 64,$(target)),64)-linux-gnu) +Index: b/src/gcc/config/s390/t-linux64 +=================================================================== +--- a/src/gcc/config/s390/t-linux64 ++++ b/src/gcc/config/s390/t-linux64 +@@ -9,3 +9,5 @@ MULTILIB_OPTIONS = m64/m31 + MULTILIB_DIRNAMES = 64 32 + MULTILIB_OSDIRNAMES = ../lib64$(call if_multiarch,:s390x-linux-gnu) + MULTILIB_OSDIRNAMES += $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:s390-linux-gnu) ++ ++MULTIARCH_DIRNAME = $(call if_multiarch,s390$(if $(findstring s390x,$(target)),x)-linux-gnu) +Index: b/src/gcc/config/rs6000/t-linux64 +=================================================================== +--- a/src/gcc/config/rs6000/t-linux64 ++++ b/src/gcc/config/rs6000/t-linux64 +@@ -31,6 +31,8 @@ MULTILIB_EXTRA_OPTS := + MULTILIB_OSDIRNAMES := m64=../lib64$(call if_multiarch,:powerpc64-linux-gnu) + MULTILIB_OSDIRNAMES += m32=$(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:powerpc-linux-gnu) + ++MULTIARCH_DIRNAME = $(call if_multiarch,powerpc$(if $(findstring 64,$(target)),64)-linux-gnu) ++ + rs6000-linux.o: $(srcdir)/config/rs6000/rs6000-linux.c + $(COMPILE) $< + $(POSTCOMPILE) +Index: b/src/gcc/config/i386/t-linux64 +=================================================================== +--- a/src/gcc/config/i386/t-linux64 ++++ b/src/gcc/config/i386/t-linux64 +@@ -36,3 +36,13 @@ MULTILIB_DIRNAMES = $(patsubst m%, %, + MULTILIB_OSDIRNAMES = m64=../lib64$(call if_multiarch,:x86_64-linux-gnu) + MULTILIB_OSDIRNAMES+= m32=$(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)$(call if_multiarch,:i386-linux-gnu) + MULTILIB_OSDIRNAMES+= mx32=../libx32$(call if_multiarch,:x86_64-linux-gnux32) ++ ++ifneq (,$(findstring x86_64,$(target))) ++ ifneq (,$(findstring biarchx32.h,$(tm_include_list))) ++ MULTIARCH_DIRNAME = $(call if_multiarch,x86_64-linux-gnux32) ++ else ++ MULTIARCH_DIRNAME = $(call if_multiarch,x86_64-linux-gnu) ++ endif ++else ++ MULTIARCH_DIRNAME = $(call if_multiarch,i386-linux-gnu) ++endif +Index: b/src/gcc/config/i386/t-kfreebsd +=================================================================== +--- a/src/gcc/config/i386/t-kfreebsd ++++ b/src/gcc/config/i386/t-kfreebsd +@@ -1,5 +1,9 @@ +-MULTIARCH_DIRNAME = $(call if_multiarch,i386-kfreebsd-gnu) ++ifeq (,$(MULTIARCH_DIRNAME)) ++ MULTIARCH_DIRNAME = $(call if_multiarch,i386-kfreebsd-gnu) ++endif + + # MULTILIB_OSDIRNAMES are set in t-linux64. + KFREEBSD_OS = $(filter kfreebsd%, $(word 3, $(subst -, ,$(target)))) + MULTILIB_OSDIRNAMES := $(filter-out mx32=%,$(subst linux,$(KFREEBSD_OS),$(MULTILIB_OSDIRNAMES))) ++ ++MULTIARCH_DIRNAME := $(subst linux,$(KFREEBSD_OS),$(MULTIARCH_DIRNAME)) +Index: b/src/gcc/config/mips/t-linux64 +=================================================================== +--- a/src/gcc/config/mips/t-linux64 ++++ b/src/gcc/config/mips/t-linux64 +@@ -24,3 +24,13 @@ MULTILIB_OSDIRNAMES = \ + ../lib32$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) \ + ../lib$(call if_multiarch,:mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) \ + ../lib64$(call if_multiarch,:mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT)) ++ ++ifneq (,$(findstring abin32,$(target))) ++MULTIARCH_DIRNAME = $(call if_multiarch,mips64$(MIPS_EL)-linux-gnuabin32$(MIPS_SOFT)) ++else ++ifneq (,$(findstring abi64,$(target))) ++MULTIARCH_DIRNAME = $(call if_multiarch,mips64$(MIPS_EL)-linux-gnuabi64$(MIPS_SOFT)) ++else ++MULTIARCH_DIRNAME = $(call if_multiarch,mips$(MIPS_EL)-linux-gnu$(MIPS_SOFT)) ++endif ++endif +Index: b/src/gcc/config.gcc +=================================================================== +--- a/src/gcc/config.gcc ++++ b/src/gcc/config.gcc +@@ -1959,8 +1959,11 @@ mips64*-*-linux* | mipsisa64*-*-linux*) + tm_file="dbxelf.h elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file} mips/gnu-user.h mips/gnu-user64.h mips/linux64.h mips/linux-common.h" + extra_options="${extra_options} linux-android.opt" + tmake_file="${tmake_file} mips/t-linux64" +- tm_defines="${tm_defines} MIPS_ABI_DEFAULT=ABI_N32" ++ tm_defines="${tm_defines} MIPS_ABI_DEFAULT=ABI_64" + case ${target} in ++ *gnuabin32*) ++ tm_defines=$(echo ${tm_defines}| sed 's/MIPS_ABI_DEFAULT=ABI_64/MIPS_ABI_DEFAULT=ABI_N32/g') ++ ;; + mips64el-st-linux-gnu) + tm_file="${tm_file} mips/st.h" + tmake_file="${tmake_file} mips/t-st" +@@ -4107,7 +4110,7 @@ case ${target} in + i[34567]86-*-darwin* | x86_64-*-darwin*) + ;; + i[34567]86-*-linux* | x86_64-*-linux*) +- tmake_file="$tmake_file i386/t-linux" ++ tmake_file="i386/t-linux $tmake_file" + ;; + i[34567]86-*-kfreebsd*-gnu | x86_64-*-kfreebsd*-gnu) + tmake_file="$tmake_file i386/t-kfreebsd" +Index: b/src/gcc/config/aarch64/t-aarch64-linux +=================================================================== +--- a/src/gcc/config/aarch64/t-aarch64-linux ++++ b/src/gcc/config/aarch64/t-aarch64-linux +@@ -22,7 +22,7 @@ LIB1ASMSRC = aarch64/lib1funcs.asm + LIB1ASMFUNCS = _aarch64_sync_cache_range + + AARCH_BE = $(if $(findstring TARGET_BIG_ENDIAN_DEFAULT=1, $(tm_defines)),_be) +-MULTILIB_OSDIRNAMES = mabi.lp64=../lib64$(call if_multiarch,:aarch64$(AARCH_BE)-linux-gnu) ++MULTILIB_OSDIRNAMES = mabi.lp64=../lib$(call if_multiarch,:aarch64$(AARCH_BE)-linux-gnu) + MULTIARCH_DIRNAME = $(call if_multiarch,aarch64$(AARCH_BE)-linux-gnu) + + MULTILIB_OSDIRNAMES += mabi.ilp32=../libilp32 diff --git a/debian/patches/svn-updates.diff b/debian/patches/svn-updates.diff index c95af02..5d2874f 100644 --- a/debian/patches/svn-updates.diff +++ b/debian/patches/svn-updates.diff @@ -1,10 +1,10 @@ -# DP: updates from the 4.9 branch upto 20140712 (r212479). +# DP: updates from the 4.9 branch upto 20140724 (r212995). last_update() { cat > ${dir}LAST_UPDATED ++ ++ Backport from mainline ++ 2014-07-09 Richard Biener ++ ++ PR c-family/61741 ++ * c-gimplify.c (c_gimplify_expr): Gimplify self-modify expressions ++ using unsigned arithmetic if overflow does not wrap instead of ++ if overflow is undefined. ++ + 2014-07-16 Release Manager + + * GCC 4.9.1 released. +Index: gcc/DATESTAMP +=================================================================== +--- a/src/gcc/DATESTAMP (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/DATESTAMP (.../branches/gcc-4_9-branch) +@@ -1 +1 @@ +-20140716 ++20140724 +Index: gcc/omp-low.c +=================================================================== +--- a/src/gcc/omp-low.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/omp-low.c (.../branches/gcc-4_9-branch) +@@ -1872,7 +1872,6 @@ + TREE_STATIC (decl) = 1; + TREE_USED (decl) = 1; + DECL_ARTIFICIAL (decl) = 1; +- DECL_NAMELESS (decl) = 1; + DECL_IGNORED_P (decl) = 0; + TREE_PUBLIC (decl) = 0; + DECL_UNINLINABLE (decl) = 1; +Index: gcc/toplev.c +=================================================================== +--- a/src/gcc/toplev.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/toplev.c (.../branches/gcc-4_9-branch) +@@ -1052,16 +1052,19 @@ + + if (warn_stack_usage >= 0) + { ++ const location_t loc = DECL_SOURCE_LOCATION (current_function_decl); ++ + if (stack_usage_kind == DYNAMIC) +- warning (OPT_Wstack_usage_, "stack usage might be unbounded"); ++ warning_at (loc, OPT_Wstack_usage_, "stack usage might be unbounded"); + else if (stack_usage > warn_stack_usage) + { + if (stack_usage_kind == DYNAMIC_BOUNDED) +- warning (OPT_Wstack_usage_, "stack usage might be %wd bytes", +- stack_usage); ++ warning_at (loc, ++ OPT_Wstack_usage_, "stack usage might be %wd bytes", ++ stack_usage); + else +- warning (OPT_Wstack_usage_, "stack usage is %wd bytes", +- stack_usage); ++ warning_at (loc, OPT_Wstack_usage_, "stack usage is %wd bytes", ++ stack_usage); + } + } + } +Index: gcc/ChangeLog +=================================================================== +--- a/src/gcc/ChangeLog (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/ChangeLog (.../branches/gcc-4_9-branch) +@@ -1,3 +1,138 @@ ++2014-07-23 Sebastian Huber ++ ++ * config/arm/t-rtems-eabi: Add ++ mthumb/march=armv7-r/mfpu=vfpv3-d16/mfloat-abi=hard, ++ mthumb/march=armv7-m/mfpu=fpv4-sp-d16/mfloat-abi=hard, ++ mbig-endian/mthumb/march=armv7-r, and ++ mbig-endian/mthumb/march=armv7-r/mfpu=vfpv3-d16/mfloat-abi=hard ++ multilibs. ++ ++2014-07-23 Sebastian Huber ++ Chris Johns ++ Joel Sherrill ++ ++ * config.gcc: Add nios2-*-rtems*. ++ * config/nios2/rtems.h: New file. ++ * gcc/config/nios2/t-rtems: New file. ++ ++2014-07-21 Peter Bergner ++ ++ * config/rs6000/sysv4.h (LIBASAN_EARLY_SPEC): Define. ++ (LIBTSAN_EARLY_SPEC): Likewise. ++ ++2014-07-21 Uros Bizjak ++ ++ Backport from mainline ++ 2014-07-21 Uros Bizjak ++ ++ PR target/61855 ++ * config/i386/avx512fintrin.h: Move constants for mantissa extraction ++ out of #ifdef __OPTIMIZE__. ++ ++2014-07-20 Eric Botcazou ++ ++ * expr.c (store_field): Handle VOIDmode for calls that return values ++ in multiple locations. ++ ++2014-07-19 Eric Botcazou ++ ++ * toplev.c (output_stack_usage): Adjust the location of the warning. ++ ++2014-07-19 Daniel Cederman ++ ++ * config/sparc/sync.md (*membar_storeload_leon3): New insn. ++ (*membar_storeload): Disable for LEON3. ++ ++2014-07-18 Uros Bizjak ++ ++ Backport from mainline ++ 2014-07-16 David Wohlferd ++ ++ PR target/61662 ++ * config/i386/ia32intrin.h: Use __LP64__ to determine size of long. ++ ++2014-07-18 Uros Bizjak ++ ++ Backport from mainline ++ 2014-07-18 Uros Bizjak ++ ++ PR target/61794 ++ * config/i386/sse.md (avx512f_vextract32x4_1_maskm): ++ Fix instruction constraint. ++ (avx512f_vextract32x4_1): Ditto. ++ ++2014-07-17 Richard Biener ++ ++ Backport from mainline ++ 2014-07-14 Richard Biener ++ ++ PR tree-optimization/61779 ++ * tree-ssa-copy.c (copy_prop_visit_cond_stmt): Always try ++ simplifying a condition. ++ ++2014-07-17 Richard Biener ++ ++ PR rtl-optimization/61801 ++ * sched-deps.c (sched_analyze_2): For ASM_OPERANDS and ASM_INPUT ++ don't set reg_pending_barrier if it appears in a debug-insn. ++ ++2014-07-17 Hans-Peter Nilsson ++ ++ Backport from trunk. ++ PR target/61737. ++ * config/cris/cris.c (TARGET_LEGITIMATE_CONSTANT_P) ++ (TARGET_CANNOT_FORCE_CONST_MEM): Define. ++ (cris_cannot_force_const_mem, cris_legitimate_constant_p): New ++ functions. ++ (cris_print_index, cris_print_operand, cris_constant_index_p) ++ (cris_side_effect_mode_ok): Replace CONSTANT_P with CRIS_CONSTANT_P. ++ (cris_address_cost): Ditto last CONSTANT_P. ++ (cris_symbol_type_of): Rename from cris_pic_symbol_type_of. All ++ callers changed. Yield cris_offsettable_symbol for non-PIC ++ constant symbolic expressions including labels. Yield cris_unspec ++ for all unspecs. ++ (cris_expand_pic_call_address): New parameter MARKERP. Set its ++ target to pic_offset_table_rtx for calls that will likely go ++ through PLT, const0_rtx when they can't. All callers changed. ++ Assert flag_pic. Use CONSTANT_P, not CONSTANT_ADDRESS_P, for ++ symbolic expressions to be PICified. Remove second, redundant, ++ assert on can_create_pseudo_p returning non-zero. Use ++ replace_equiv_address_nv, not replace_equiv_address, for final ++ operand update. ++ * config/cris/cris.md ("movsi"): Move variable t to pattern ++ toplevel. Adjust assert for new cris_symbol_type member. Use ++ CONSTANT_P instead of CONSTANT_ADDRESS_P. ++ ("*movsi_internal") : Make check for valid unspec operands ++ for lapc stricter. ++ : Clear condition codes. ++ ("call", "call_value"): Use second incoming operand as a marker ++ for pic-offset-table-register being used. ++ ("*expanded_call_non_v32", "*expanded_call_v32") ++ ("*expanded_call_value_non_v32", "*expanded_call_value_v32"): For ++ second incoming operand to CALL, match cris_call_type_marker. ++ ("*expanded_call_value_side"): Ditto. Disable before reload_completed. ++ ("*expanded_call_side"): Ditto. Fix typo in comment. ++ (moverside, movemside peepholes): Check for CRIS_CONSTANT_P, not ++ CONSTANT_P. ++ * config/cris/predicates.md ("cris_call_type_marker"): New predicate. ++ * config/cris/cris.h (CRIS_CONSTANT_P): New macro. ++ (enum cris_symbol_type): Rename from cris_pic_symbol_type. All ++ users changed. Add members cris_offsettable_symbol and cris_unspec. ++ (cris_symbol_type): Rename from cris_pic_symbol_type. ++ * config/cris/constraints.md ("T"): Use CRIS_CONSTANT_P, not ++ just CONSTANT_P. ++ * config/cris/cris-protos.h (cris_symbol_type_of, ++ cris_expand_pic_call_address): Adjust prototypes. ++ (cris_legitimate_constant_p): New prototype. ++ ++ * config.gcc (crisv32-*-linux* | cris-*-linux*): Do not override ++ an existing tmake_file. Don't add t-slibgcc and t-linux. ++ ++2014-07-16 Jakub Jelinek ++ ++ * omp-low.c (create_omp_child_function): Don't set DECL_NAMELESS ++ on the FUNCTION_DECL. ++ + 2014-07-16 Release Manager + + * GCC 4.9.1 released. +@@ -4,14 +142,14 @@ + + 2014-07-10 Cary Coutant + +- Backport from trunk at r212211. ++ Backport from trunk at r212211. + + * dwarf2out.c (remove_addr_table_entry): Remove unnecessary hash table +- lookup. ++ lookup. + (resolve_addr_in_expr): When replacing the rtx in a location list +- entry, get a new address table entry. ++ entry, get a new address table entry. + (dwarf2out_finish): Call index_location_lists even if there are no +- addr_index_table entries yet. ++ addr_index_table entries yet. + + 2014-07-10 Tom G. Christensen + +@@ -33,13 +171,13 @@ + PR target/61062 + * config/arm/arm_neon.h (vtrn_s8, vtrn_s16, vtrn_u8, vtrn_u16, vtrn_p8, + vtrn_p16, vtrn_s32, vtrn_f32, vtrn_u32, vtrnq_s8, vtrnq_s16, vtrnq_s32, +- vtrnq_f32, vtrnq_u8, vtrnq_u16, vtrnq_u32, vtrnq_p8, vtrnq_p16, vzip_s8, +- vzip_s16, vzip_u8, vzip_u16, vzip_p8, vzip_p16, vzip_s32, vzip_f32, +- vzip_u32, vzipq_s8, vzipq_s16, vzipq_s32, vzipq_f32, vzipq_u8, +- vzipq_u16, vzipq_u32, vzipq_p8, vzipq_p16, vuzp_s8, vuzp_s16, vuzp_s32, +- vuzp_f32, vuzp_u8, vuzp_u16, vuzp_u32, vuzp_p8, vuzp_p16, vuzpq_s8, +- vuzpq_s16, vuzpq_s32, vuzpq_f32, vuzpq_u8, vuzpq_u16, vuzpq_u32, +- vuzpq_p8, vuzpq_p16): Correct mask for bigendian. ++ vtrnq_f32, vtrnq_u8, vtrnq_u16, vtrnq_u32, vtrnq_p8, vtrnq_p16, ++ vzip_s8, vzip_s16, vzip_u8, vzip_u16, vzip_p8, vzip_p16, vzip_s32, ++ vzip_f32, vzip_u32, vzipq_s8, vzipq_s16, vzipq_s32, vzipq_f32, ++ vzipq_u8, vzipq_u16, vzipq_u32, vzipq_p8, vzipq_p16, vuzp_s8, vuzp_s16, ++ vuzp_s32, vuzp_f32, vuzp_u8, vuzp_u16, vuzp_u32, vuzp_p8, vuzp_p16, ++ vuzpq_s8, vuzpq_s16, vuzpq_s32, vuzpq_f32, vuzpq_u8, vuzpq_u16, ++ vuzpq_u32, vuzpq_p8, vuzpq_p16): Correct mask for bigendian. + + + 2014-07-09 Alan Lawrence +@@ -157,11 +295,9 @@ + 2014-06-24 Jakub Jelinek + + * gimplify.c (gimplify_scan_omp_clauses) : Gimplify OMP_CLAUSE_ALIGNED_ALIGNMENT. +- (gimplify_adjust_omp_clauses_1): Make sure OMP_CLAUSE_SIZE is +- non-NULL. ++ (gimplify_adjust_omp_clauses_1): Make sure OMP_CLAUSE_SIZE is non-NULL. + (gimplify_adjust_omp_clauses): Likewise. + * omp-low.c (lower_rec_simd_input_clauses, + lower_rec_input_clauses, expand_omp_simd): Handle non-constant +@@ -176,9 +312,8 @@ + + 2014-06-18 Jakub Jelinek + +- * gimplify.c (omp_notice_variable): If n is non-NULL +- and no flags change in ORT_TARGET region, don't jump to +- do_outer. ++ * gimplify.c (omp_notice_variable): If n is non-NULL and no flags ++ change in ORT_TARGET region, don't jump to do_outer. + (struct gimplify_adjust_omp_clauses_data): New type. + (gimplify_adjust_omp_clauses_1): Adjust for data being + a struct gimplify_adjust_omp_clauses_data pointer instead +@@ -196,14 +331,12 @@ + gimple_seq * argument to omp_finish_clause hook. + * omp-low.c (scan_sharing_clauses): Call scan_omp_op on + non-DECL_P OMP_CLAUSE_DECL if ctx->outer. +- (scan_omp_parallel, lower_omp_for): When adding +- _LOOPTEMP_ clause var, add it to outer ctx's decl_map +- as identity. ++ (scan_omp_parallel, lower_omp_for): When adding _LOOPTEMP_ clause var, ++ add it to outer ctx's decl_map as identity. + * tree-core.h (OMP_CLAUSE_MAP_TO_PSET): New map kind. + * tree-nested.c (convert_nonlocal_omp_clauses, + convert_local_omp_clauses): Handle various OpenMP 4.0 clauses. +- * tree-pretty-print.c (dump_omp_clause): Handle +- OMP_CLAUSE_MAP_TO_PSET. ++ * tree-pretty-print.c (dump_omp_clause): Handle OMP_CLAUSE_MAP_TO_PSET. + + 2014-06-10 Jakub Jelinek + +@@ -227,8 +360,7 @@ + OMP_CLAUSE_LINEAR_STMT. + * omp-low.c (lower_rec_input_clauses): Fix typo. + (maybe_add_implicit_barrier_cancel, lower_omp_1): Add +- cast between Fortran boolean_type_node and C _Bool if +- needed. ++ cast between Fortran boolean_type_node and C _Bool if needed. + + 2014-06-30 Jason Merrill + +@@ -279,8 +411,7 @@ + (aarch64_sqdmlsl_lane): Likewise. + (aarch64_sqdmull_lane): Likewise. + (aarch64_sqdmull2_lane): Likewise. +- (aarch64_sqdmlal_laneq): +- Replace VCON usage with VCONQ. ++ (aarch64_sqdmlal_laneq): Replace VCON usage with VCONQ. + Emit aarch64_sqdmlal_laneq_internal insn. + (aarch64_sqdmlal2_laneq): Emit + aarch64_sqdmlal2_laneq_internal insn. +Index: gcc/testsuite/gcc.target/i386/pr61855.c +=================================================================== +--- a/src/gcc/testsuite/gcc.target/i386/pr61855.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.target/i386/pr61855.c (.../branches/gcc-4_9-branch) +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mavx512f" } */ ++ ++#include ++ ++__m512 test (__m512 x) ++{ ++ return _mm512_getmant_ps(x, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_zero); ++} ++ +Index: gcc/testsuite/gcc.target/i386/pr61794.c +=================================================================== +--- a/src/gcc/testsuite/gcc.target/i386/pr61794.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.target/i386/pr61794.c (.../branches/gcc-4_9-branch) +@@ -0,0 +1,12 @@ ++/* { dg-do compile } */ ++/* { dg-options "-mavx512f" } */ ++ ++#include ++ ++__m512i zmm; ++__m128i xmm; ++ ++void test (void) ++{ ++ xmm = _mm512_extracti32x4_epi32 (zmm, 0); ++} +Index: gcc/testsuite/gfortran.dg/dependency_44.f90 +=================================================================== +--- a/src/gcc/testsuite/gfortran.dg/dependency_44.f90 (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gfortran.dg/dependency_44.f90 (.../branches/gcc-4_9-branch) +@@ -0,0 +1,36 @@ ++! { dg-do run } ++! Tests fix for PR61780 in which the loop reversal mechanism was ++! not accounting for the first index being an element so that no ++! loop in this dimension is created. ++! ++! Contributed by Manfred Tietze on clf. ++! ++program prgm3 ++ implicit none ++ integer, parameter :: n = 10, k = 3 ++ integer :: i, j ++ integer, dimension(n,n) :: y ++ integer :: res1(n), res2(n) ++ ++1 format(10i5) ++ ++!initialize ++ do i=1,n ++ do j=1,n ++ y(i,j) = n*i + j ++ end do ++ end do ++ res2 = y(k,:) ++ ++!shift right ++ y(k,4:n) = y(k,3:n-1) ++ y(k,3) = 0 ++ res1 = y(k,:) ++ y(k,:) = res2 ++ y(k,n:4:-1) = y(k,n-1:3:-1) ++ y(k,3) = 0 ++ res2 = y(k,:) ++! print *, res1 ++! print *, res2 ++ if (any(res1 /= res2)) call abort () ++end program prgm3 +Index: gcc/testsuite/gnat.dg/pack20.adb +=================================================================== +--- a/src/gcc/testsuite/gnat.dg/pack20.adb (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gnat.dg/pack20.adb (.../branches/gcc-4_9-branch) +@@ -0,0 +1,9 @@ ++package body Pack20 is ++ ++ procedure Proc (A : Rec) is ++ Local : Rec := A; ++ begin ++ Modify (Local.Fixed); ++ end; ++ ++end Pack20; +Index: gcc/testsuite/gnat.dg/pack20.ads +=================================================================== +--- a/src/gcc/testsuite/gnat.dg/pack20.ads (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gnat.dg/pack20.ads (.../branches/gcc-4_9-branch) +@@ -0,0 +1,15 @@ ++-- { dg-do compile } ++ ++with Pack20_Pkg; use Pack20_Pkg; ++ ++package Pack20 is ++ ++ type Rec is record ++ Simple_Type : Integer; ++ Fixed : String_Ptr; ++ end record; ++ pragma Pack (Rec); ++ ++ procedure Proc (A : Rec); ++ ++end Pack20; +Index: gcc/testsuite/gnat.dg/pack20_pkg.ads +=================================================================== +--- a/src/gcc/testsuite/gnat.dg/pack20_pkg.ads (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gnat.dg/pack20_pkg.ads (.../branches/gcc-4_9-branch) +@@ -0,0 +1,7 @@ ++package Pack20_Pkg is ++ ++ type String_Ptr is access all String; ++ ++ procedure Modify (Fixed : in out String_Ptr); ++ ++end Pack20_Pkg; +Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-copyprop-2.c +=================================================================== +--- a/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-copyprop-2.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-copyprop-2.c (.../branches/gcc-4_9-branch) +@@ -0,0 +1,13 @@ ++/* { dg-do compile } */ ++/* { dg-options "-Og -fdump-tree-optimized" } */ ++ ++extern long long __sdt_unsp; ++void ++f(void) ++{ ++ for (;;) ++ __asm__ ("%0" :: "i" (((!__extension__ (__builtin_constant_p ((((unsigned long long) (__typeof (__builtin_choose_expr (((__builtin_classify_type (0) + 3) & -4) == 4, (0), 0U))) __sdt_unsp) ) == 0) )) ? 1 : -1) )); ++} ++ ++/* { dg-final { scan-tree-dump-not "PHI" "optimized" } } */ ++/* { dg-final { cleanup-tree-dump "optimized" } } */ +Index: gcc/testsuite/gcc.dg/stack-usage-2.c +=================================================================== +--- a/src/gcc/testsuite/gcc.dg/stack-usage-2.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.dg/stack-usage-2.c (.../branches/gcc-4_9-branch) +@@ -1,21 +1,21 @@ + /* { dg-do compile } */ + /* { dg-options "-Wstack-usage=512" } */ + +-int foo1 (void) ++int foo1 (void) /* { dg-bogus "stack usage" } */ + { + char arr[16]; + arr[0] = 1; + return 0; +-} /* { dg-bogus "stack usage" } */ ++} + +-int foo2 (void) ++int foo2 (void) /* { dg-warning "stack usage is \[0-9\]* bytes" } */ + { + char arr[1024]; + arr[0] = 1; + return 0; +-} /* { dg-warning "stack usage is \[0-9\]* bytes" } */ ++} + +-int foo3 (void) ++int foo3 (void) /* { dg-warning "stack usage might be \[0-9\]* bytes" } */ + { + char arr[1024] __attribute__((aligned (512))); + arr[0] = 1; +@@ -22,12 +22,11 @@ + /* Force dynamic realignment of argument pointer. */ + __builtin_apply ((void (*)()) foo2, 0, 0); + return 0; ++} + +-} /* { dg-warning "stack usage might be \[0-9\]* bytes" } */ +- +-int foo4 (int n) ++int foo4 (int n) /* { dg-warning "stack usage might be unbounded" } */ + { + char arr[n]; + arr[0] = 1; + return 0; +-} /* { dg-warning "stack usage might be unbounded" } */ ++} +Index: gcc/testsuite/ChangeLog +=================================================================== +--- a/src/gcc/testsuite/ChangeLog (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/ChangeLog (.../branches/gcc-4_9-branch) +@@ -1,3 +1,61 @@ ++2014-07-24 Martin Jambor ++ ++ PR ipa/61160 ++ * g++.dg/ipa/pr61160-2.C (main): Return zero. ++ * g++.dg/ipa/pr61160-3.C (main): Likewise. ++ ++2014-07-21 Uros Bizjak ++ ++ Backport from mainline ++ 2014-07-21 Uros Bizjak ++ ++ PR target/61855 ++ * gcc.target/i386/pr61855.c: New test. ++ ++2014-07-20 Eric Botcazou ++ ++ * gnat.dg/pack20.ad[sb]: New test. ++ * gnat.dg/pack20_pkg.ads: New helper. ++ ++2014-07-19 Eric Botcazou ++ ++ * gcc.dg/stack-usage-2.c: Adjust. ++ ++2014-07-19 Paul Thomas ++ ++ Backport from mainline ++ PR fortran/61780 ++ * gfortran.dg/dependency_44.f90 : New test ++ ++2014-07-18 Uros Bizjak ++ ++ Backport from mainline ++ 2014-07-18 Uros Bizjak ++ ++ PR target/61794 ++ * gcc.target/i386/pr61794.c: New test. ++ ++2014-07-17 Richard Biener ++ ++ Backport from mainline ++ 2014-07-10 Richard Biener ++ ++ PR c-family/61741 ++ * c-c++-common/torture/pr61741.c: Use signed char. ++ ++ 2014-07-09 Richard Biener ++ ++ PR c-family/61741 ++ * c-c++-common/torture/pr61741.c: New testcase. ++ ++2014-07-17 Richard Biener ++ ++ Backport from mainline ++ 2014-07-14 Richard Biener ++ ++ PR tree-optimization/61779 ++ * gcc.dg/tree-ssa/ssa-copyprop-2.c: New testcase. ++ + 2014-07-16 Release Manager + + * GCC 4.9.1 released. +@@ -17,7 +75,8 @@ + 2014-06-09 Alan Lawrence + + PR target/61062 +- * gcc.target/arm/pr48252.c (main): Expect same result as endian-neutral. ++ * gcc.target/arm/pr48252.c (main): Expect same result as ++ endian-neutral. + + 2014-07-08 Jakub Jelinek + +@@ -34,8 +93,8 @@ + + 2014-07-08 Alan Lawrence + +- Backport r211502 from mainline. +- 2014-06-10 Alan Lawrence ++ Backport r211502 from mainline. ++ 2014-06-10 Alan Lawrence + + PR target/59843 + * gcc.dg/vect/vect-singleton_1.c: New file. +Index: gcc/testsuite/g++.dg/ipa/pr61160-1.C +=================================================================== +--- a/src/gcc/testsuite/g++.dg/ipa/pr61160-1.C (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/g++.dg/ipa/pr61160-1.C (.../branches/gcc-4_9-branch) +@@ -27,5 +27,6 @@ + int main () + { + CExample c; +- return (test (c) != &c); ++ test (c); ++ return 0; + } +Index: gcc/testsuite/g++.dg/ipa/pr61160-2.C +=================================================================== +--- a/src/gcc/testsuite/g++.dg/ipa/pr61160-2.C (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/g++.dg/ipa/pr61160-2.C (.../branches/gcc-4_9-branch) +@@ -39,5 +39,6 @@ + int main () + { + CExample c; +- return (test (c) != &c); ++ test (c); ++ return 0; + } +Index: gcc/testsuite/c-c++-common/pr61741.c +=================================================================== +--- a/src/gcc/testsuite/c-c++-common/pr61741.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/c-c++-common/pr61741.c (.../branches/gcc-4_9-branch) +@@ -0,0 +1,22 @@ ++/* { dg-do run } */ ++ ++int a = 1, b; ++ ++void ++foo (void) ++{ ++ signed char c = 0; ++ for (; a; a--) ++ for (; c >= 0; c++); ++ if (!c) ++ b = 1; ++} ++ ++int ++main () ++{ ++ foo (); ++ if (b != 0) ++ __builtin_abort (); ++ return 0; ++} +Index: gcc/expr.c +=================================================================== +--- a/src/gcc/expr.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/expr.c (.../branches/gcc-4_9-branch) +@@ -6605,7 +6605,7 @@ + { + HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); + rtx temp_target; +- if (mode == BLKmode) ++ if (mode == BLKmode || mode == VOIDmode) + mode = smallest_mode_for_size (size * BITS_PER_UNIT, MODE_INT); + temp_target = gen_reg_rtx (mode); + emit_group_store (temp_target, temp, TREE_TYPE (exp), size); +Index: gcc/fortran/ChangeLog +=================================================================== +--- a/src/gcc/fortran/ChangeLog (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/fortran/ChangeLog (.../branches/gcc-4_9-branch) +@@ -1,3 +1,11 @@ ++2014-07-19 Paul Thomas ++ ++ Backport from mainline ++ PR fortran/61780 ++ * dependency.c (gfc_dep_resolver): Index the 'reverse' array so ++ that elements are skipped. This then correctly aligns 'reverse' ++ with the scalarizer loops. ++ + 2014-07-16 Release Manager + + * GCC 4.9.1 released. +Index: gcc/fortran/dependency.c +=================================================================== +--- a/src/gcc/fortran/dependency.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/fortran/dependency.c (.../branches/gcc-4_9-branch) +@@ -2023,6 +2023,7 @@ + gfc_dep_resolver (gfc_ref *lref, gfc_ref *rref, gfc_reverse *reverse) + { + int n; ++ int m; + gfc_dependency fin_dep; + gfc_dependency this_dep; + +@@ -2072,6 +2073,8 @@ + break; + } + ++ /* Index for the reverse array. */ ++ m = -1; + for (n=0; n < lref->u.ar.dimen; n++) + { + /* Handle dependency when either of array reference is vector +@@ -2118,31 +2121,37 @@ + The ability to reverse or not is set by previous conditions + in this dimension. If reversal is not activated, the + value GFC_DEP_BACKWARD is reset to GFC_DEP_OVERLAP. */ ++ ++ /* Get the indexing right for the scalarizing loop. If this ++ is an element, there is no corresponding loop. */ ++ if (lref->u.ar.dimen_type[n] != DIMEN_ELEMENT) ++ m++; ++ + if (rref->u.ar.dimen_type[n] == DIMEN_RANGE + && lref->u.ar.dimen_type[n] == DIMEN_RANGE) + { + /* Set reverse if backward dependence and not inhibited. */ +- if (reverse && reverse[n] == GFC_ENABLE_REVERSE) +- reverse[n] = (this_dep == GFC_DEP_BACKWARD) ? +- GFC_REVERSE_SET : reverse[n]; ++ if (reverse && reverse[m] == GFC_ENABLE_REVERSE) ++ reverse[m] = (this_dep == GFC_DEP_BACKWARD) ? ++ GFC_REVERSE_SET : reverse[m]; + + /* Set forward if forward dependence and not inhibited. */ +- if (reverse && reverse[n] == GFC_ENABLE_REVERSE) +- reverse[n] = (this_dep == GFC_DEP_FORWARD) ? +- GFC_FORWARD_SET : reverse[n]; ++ if (reverse && reverse[m] == GFC_ENABLE_REVERSE) ++ reverse[m] = (this_dep == GFC_DEP_FORWARD) ? ++ GFC_FORWARD_SET : reverse[m]; + + /* Flag up overlap if dependence not compatible with + the overall state of the expression. */ +- if (reverse && reverse[n] == GFC_REVERSE_SET ++ if (reverse && reverse[m] == GFC_REVERSE_SET + && this_dep == GFC_DEP_FORWARD) + { +- reverse[n] = GFC_INHIBIT_REVERSE; ++ reverse[m] = GFC_INHIBIT_REVERSE; + this_dep = GFC_DEP_OVERLAP; + } +- else if (reverse && reverse[n] == GFC_FORWARD_SET ++ else if (reverse && reverse[m] == GFC_FORWARD_SET + && this_dep == GFC_DEP_BACKWARD) + { +- reverse[n] = GFC_INHIBIT_REVERSE; ++ reverse[m] = GFC_INHIBIT_REVERSE; + this_dep = GFC_DEP_OVERLAP; + } + +@@ -2149,7 +2158,7 @@ + /* If no intention of reversing or reversing is explicitly + inhibited, convert backward dependence to overlap. */ + if ((reverse == NULL && this_dep == GFC_DEP_BACKWARD) +- || (reverse != NULL && reverse[n] == GFC_INHIBIT_REVERSE)) ++ || (reverse != NULL && reverse[m] == GFC_INHIBIT_REVERSE)) + this_dep = GFC_DEP_OVERLAP; + } + +Index: gcc/tree-ssa-copy.c +=================================================================== +--- a/src/gcc/tree-ssa-copy.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/tree-ssa-copy.c (.../branches/gcc-4_9-branch) +@@ -235,38 +235,26 @@ + enum ssa_prop_result retval = SSA_PROP_VARYING; + location_t loc = gimple_location (stmt); + +- tree op0 = gimple_cond_lhs (stmt); +- tree op1 = gimple_cond_rhs (stmt); ++ tree op0 = valueize_val (gimple_cond_lhs (stmt)); ++ tree op1 = valueize_val (gimple_cond_rhs (stmt)); + +- /* The only conditionals that we may be able to compute statically +- are predicates involving two SSA_NAMEs. */ +- if (TREE_CODE (op0) == SSA_NAME && TREE_CODE (op1) == SSA_NAME) ++ /* See if we can determine the predicate's value. */ ++ if (dump_file && (dump_flags & TDF_DETAILS)) + { +- op0 = valueize_val (op0); +- op1 = valueize_val (op1); ++ fprintf (dump_file, "Trying to determine truth value of "); ++ fprintf (dump_file, "predicate "); ++ print_gimple_stmt (dump_file, stmt, 0, 0); ++ } + +- /* See if we can determine the predicate's value. */ +- if (dump_file && (dump_flags & TDF_DETAILS)) +- { +- fprintf (dump_file, "Trying to determine truth value of "); +- fprintf (dump_file, "predicate "); +- print_gimple_stmt (dump_file, stmt, 0, 0); +- } +- +- /* We can fold COND and get a useful result only when we have +- the same SSA_NAME on both sides of a comparison operator. */ +- if (op0 == op1) +- { +- tree folded_cond = fold_binary_loc (loc, gimple_cond_code (stmt), +- boolean_type_node, op0, op1); +- if (folded_cond) +- { +- basic_block bb = gimple_bb (stmt); +- *taken_edge_p = find_taken_edge (bb, folded_cond); +- if (*taken_edge_p) +- retval = SSA_PROP_INTERESTING; +- } +- } ++ /* Fold COND and see whether we get a useful result. */ ++ tree folded_cond = fold_binary_loc (loc, gimple_cond_code (stmt), ++ boolean_type_node, op0, op1); ++ if (folded_cond) ++ { ++ basic_block bb = gimple_bb (stmt); ++ *taken_edge_p = find_taken_edge (bb, folded_cond); ++ if (*taken_edge_p) ++ retval = SSA_PROP_INTERESTING; + } + + if (dump_file && (dump_flags & TDF_DETAILS) && *taken_edge_p) +Index: gcc/sched-deps.c +=================================================================== +--- a/src/gcc/sched-deps.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/sched-deps.c (.../branches/gcc-4_9-branch) +@@ -2750,7 +2750,8 @@ + Consider for instance a volatile asm that changes the fpu rounding + mode. An insn should not be moved across this even if it only uses + pseudo-regs because it might give an incorrectly rounded result. */ +- if (code != ASM_OPERANDS || MEM_VOLATILE_P (x)) ++ if ((code != ASM_OPERANDS || MEM_VOLATILE_P (x)) ++ && !DEBUG_INSN_P (insn)) + reg_pending_barrier = TRUE_BARRIER; + + /* For all ASM_OPERANDS, we must traverse the vector of input operands. +Index: gcc/config.gcc +=================================================================== +--- a/src/gcc/config.gcc (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config.gcc (.../branches/gcc-4_9-branch) +@@ -432,7 +432,7 @@ + nios2-*-*) + cpu_type=nios2 + extra_options="${extra_options} g.opt" +- ;; ++ ;; + picochip-*-*) + cpu_type=picochip + ;; +@@ -1129,8 +1129,7 @@ + ;; + crisv32-*-linux* | cris-*-linux*) + tm_file="dbxelf.h elfos.h ${tm_file} gnu-user.h linux.h glibc-stdint.h cris/linux.h" +- # We need to avoid using t-linux, so override default tmake_file +- tmake_file="cris/t-cris cris/t-linux t-slibgcc t-linux" ++ tmake_file="${tmake_file} cris/t-cris cris/t-linux" + extra_options="${extra_options} cris/linux.opt" + case $target in + cris-*-*) +@@ -2156,6 +2155,10 @@ + tm_file="${tm_file} newlib-stdint.h nios2/elf.h" + extra_options="${extra_options} nios2/elf.opt" + ;; ++ nios2-*-rtems*) ++ tm_file="${tm_file} newlib-stdint.h nios2/rtems.h rtems.h" ++ tmake_file="${tmake_file} t-rtems nios2/t-rtems" ++ ;; + esac + ;; + pdp11-*-*) +Index: gcc/config/sparc/sync.md +=================================================================== +--- a/src/gcc/config/sparc/sync.md (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/sparc/sync.md (.../branches/gcc-4_9-branch) +@@ -64,11 +64,19 @@ + "stbar" + [(set_attr "type" "multi")]) + ++;; For LEON3, STB has the effect of membar #StoreLoad. ++(define_insn "*membar_storeload_leon3" ++ [(set (match_operand:BLK 0 "" "") ++ (unspec:BLK [(match_dup 0) (const_int 2)] UNSPEC_MEMBAR))] ++ "TARGET_LEON3" ++ "stb\t%%g0, [%%sp-1]" ++ [(set_attr "type" "store")]) ++ + ;; For V8, LDSTUB has the effect of membar #StoreLoad. + (define_insn "*membar_storeload" + [(set (match_operand:BLK 0 "" "") + (unspec:BLK [(match_dup 0) (const_int 2)] UNSPEC_MEMBAR))] +- "TARGET_V8" ++ "TARGET_V8 && !TARGET_LEON3" + "ldstub\t[%%sp-1], %%g0" + [(set_attr "type" "multi")]) + +Index: gcc/config/i386/sse.md +=================================================================== +--- a/src/gcc/config/i386/sse.md (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/i386/sse.md (.../branches/gcc-4_9-branch) +@@ -5887,9 +5887,10 @@ + (match_operand 5 "const_0_to_15_operand")])) + (match_operand: 6 "memory_operand" "0") + (match_operand:QI 7 "register_operand" "Yk")))] +- "TARGET_AVX512F && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1) +- && (INTVAL (operands[3]) = INTVAL (operands[4]) - 1) +- && (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)" ++ "TARGET_AVX512F ++ && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1) ++ && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1) ++ && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))" + { + operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2); + return "vextract32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}"; +@@ -5909,9 +5910,10 @@ + (match_operand 3 "const_0_to_15_operand") + (match_operand 4 "const_0_to_15_operand") + (match_operand 5 "const_0_to_15_operand")])))] +- "TARGET_AVX512F && (INTVAL (operands[2]) = INTVAL (operands[3]) - 1) +- && (INTVAL (operands[3]) = INTVAL (operands[4]) - 1) +- && (INTVAL (operands[4]) = INTVAL (operands[5]) - 1)" ++ "TARGET_AVX512F ++ && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1) ++ && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1) ++ && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))" + { + operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2); + return "vextract32x4\t{%2, %1, %0|%0, %1, %2}"; +Index: gcc/config/i386/avx512fintrin.h +=================================================================== +--- a/src/gcc/config/i386/avx512fintrin.h (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/i386/avx512fintrin.h (.../branches/gcc-4_9-branch) +@@ -8103,6 +8103,22 @@ + return __builtin_ia32_movntdqa512 ((__v8di *)__P); + } + ++/* Constants for mantissa extraction */ ++typedef enum ++{ ++ _MM_MANT_NORM_1_2, /* interval [1, 2) */ ++ _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */ ++ _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */ ++ _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */ ++} _MM_MANTISSA_NORM_ENUM; ++ ++typedef enum ++{ ++ _MM_MANT_SIGN_src, /* sign = sign(SRC) */ ++ _MM_MANT_SIGN_zero, /* sign = 0 */ ++ _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */ ++} _MM_MANTISSA_SIGN_ENUM; ++ + #ifdef __OPTIMIZE__ + extern __inline __m128 + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +@@ -8182,22 +8198,6 @@ + (__mmask8) __U, __R); + } + +-/* Constants for mantissa extraction */ +-typedef enum +-{ +- _MM_MANT_NORM_1_2, /* interval [1, 2) */ +- _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */ +- _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */ +- _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */ +-} _MM_MANTISSA_NORM_ENUM; +- +-typedef enum +-{ +- _MM_MANT_SIGN_src, /* sign = sign(SRC) */ +- _MM_MANT_SIGN_zero, /* sign = 0 */ +- _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */ +-} _MM_MANTISSA_SIGN_ENUM; +- + extern __inline __m512d + __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) + _mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B, +Index: gcc/config/i386/ia32intrin.h +=================================================================== +--- a/src/gcc/config/i386/ia32intrin.h (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/i386/ia32intrin.h (.../branches/gcc-4_9-branch) +@@ -256,11 +256,7 @@ + + #define _bswap64(a) __bswapq(a) + #define _popcnt64(a) __popcntq(a) +-#define _lrotl(a,b) __rolq((a), (b)) +-#define _lrotr(a,b) __rorq((a), (b)) + #else +-#define _lrotl(a,b) __rold((a), (b)) +-#define _lrotr(a,b) __rord((a), (b)) + + /* Read flags register */ + extern __inline unsigned int +@@ -280,6 +276,16 @@ + + #endif + ++/* On LP64 systems, longs are 64-bit. Use the appropriate rotate ++ * function. */ ++#ifdef __LP64__ ++#define _lrotl(a,b) __rolq((a), (b)) ++#define _lrotr(a,b) __rorq((a), (b)) ++#else ++#define _lrotl(a,b) __rold((a), (b)) ++#define _lrotr(a,b) __rord((a), (b)) ++#endif ++ + #define _bit_scan_forward(a) __bsfd(a) + #define _bit_scan_reverse(a) __bsrd(a) + #define _bswap(a) __bswapd(a) +Index: gcc/config/nios2/rtems.h +=================================================================== +--- a/src/gcc/config/nios2/rtems.h (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/nios2/rtems.h (.../branches/gcc-4_9-branch) +@@ -0,0 +1,34 @@ ++/* Definitions for rtems targeting a NIOS2 using ELF. ++ Copyright (C) 2011-2014 Free Software Foundation, Inc. ++ ++ Contributed by Chris Johns (chrisj@rtems.org). ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 3, or (at your option) ++any later version. ++ ++GCC is distributed in the hope that it will be useful, ++but WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++GNU General Public License for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++/* Specify predefined symbols in preprocessor. */ ++#define TARGET_OS_CPP_BUILTINS() \ ++do { \ ++ builtin_define ("__rtems__"); \ ++ builtin_define ("__USE_INIT_FINI__"); \ ++ builtin_assert ("system=rtems"); \ ++} while (0) ++ ++/* This toolchain implements the ABI for Linux Systems documented in the ++ Nios II Processor Reference Handbook. ++ ++ This is done so RTEMS targets have Thread Local Storage like Linux. */ ++#define TARGET_LINUX_ABI 1 +Index: gcc/config/nios2/t-rtems +=================================================================== +--- a/src/gcc/config/nios2/t-rtems (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/nios2/t-rtems (.../branches/gcc-4_9-branch) +@@ -0,0 +1,133 @@ ++# Custom RTEMS multilibs ++ ++MULTILIB_OPTIONS = mhw-mul mhw-mulx mhw-div mcustom-fadds=253 mcustom-fdivs=255 mcustom-fmuls=252 mcustom-fsubs=254 ++ ++# Enumeration of multilibs ++ ++# MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mhw-div/mcustom-fadds=253/mcustom-fdivs=255/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mhw-div/mcustom-fadds=253/mcustom-fdivs=255/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mhw-div/mcustom-fadds=253/mcustom-fdivs=255/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mhw-div/mcustom-fadds=253/mcustom-fdivs=255 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mhw-div/mcustom-fadds=253/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mhw-div/mcustom-fadds=253/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mhw-div/mcustom-fadds=253/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mhw-div/mcustom-fadds=253 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mhw-div/mcustom-fdivs=255/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mhw-div/mcustom-fdivs=255/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mhw-div/mcustom-fdivs=255/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mhw-div/mcustom-fdivs=255 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mhw-div/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mhw-div/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mhw-div/mcustom-fsubs=254 ++# MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mhw-div ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mcustom-fadds=253/mcustom-fdivs=255/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mcustom-fadds=253/mcustom-fdivs=255/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mcustom-fadds=253/mcustom-fdivs=255/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mcustom-fadds=253/mcustom-fdivs=255 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mcustom-fadds=253/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mcustom-fadds=253/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mcustom-fadds=253/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mcustom-fadds=253 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mcustom-fdivs=255/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mcustom-fdivs=255/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mcustom-fdivs=255/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mcustom-fdivs=255 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-mulx ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-div/mcustom-fadds=253/mcustom-fdivs=255/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-div/mcustom-fadds=253/mcustom-fdivs=255/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-div/mcustom-fadds=253/mcustom-fdivs=255/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-div/mcustom-fadds=253/mcustom-fdivs=255 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-div/mcustom-fadds=253/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-div/mcustom-fadds=253/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-div/mcustom-fadds=253/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-div/mcustom-fadds=253 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-div/mcustom-fdivs=255/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-div/mcustom-fdivs=255/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-div/mcustom-fdivs=255/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-div/mcustom-fdivs=255 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-div/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-div/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-div/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mhw-div ++MULTILIB_EXCEPTIONS += mhw-mul/mcustom-fadds=253/mcustom-fdivs=255/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mcustom-fadds=253/mcustom-fdivs=255/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mul/mcustom-fadds=253/mcustom-fdivs=255/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mcustom-fadds=253/mcustom-fdivs=255 ++MULTILIB_EXCEPTIONS += mhw-mul/mcustom-fadds=253/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mcustom-fadds=253/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mul/mcustom-fadds=253/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mcustom-fadds=253 ++MULTILIB_EXCEPTIONS += mhw-mul/mcustom-fdivs=255/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mcustom-fdivs=255/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mul/mcustom-fdivs=255/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mcustom-fdivs=255 ++MULTILIB_EXCEPTIONS += mhw-mul/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mul/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mul/mcustom-fsubs=254 ++# MULTILIB_EXCEPTIONS += mhw-mul ++MULTILIB_EXCEPTIONS += mhw-mulx/mhw-div/mcustom-fadds=253/mcustom-fdivs=255/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mulx/mhw-div/mcustom-fadds=253/mcustom-fdivs=255/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mulx/mhw-div/mcustom-fadds=253/mcustom-fdivs=255/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mulx/mhw-div/mcustom-fadds=253/mcustom-fdivs=255 ++MULTILIB_EXCEPTIONS += mhw-mulx/mhw-div/mcustom-fadds=253/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mulx/mhw-div/mcustom-fadds=253/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mulx/mhw-div/mcustom-fadds=253/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mulx/mhw-div/mcustom-fadds=253 ++MULTILIB_EXCEPTIONS += mhw-mulx/mhw-div/mcustom-fdivs=255/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mulx/mhw-div/mcustom-fdivs=255/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mulx/mhw-div/mcustom-fdivs=255/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mulx/mhw-div/mcustom-fdivs=255 ++MULTILIB_EXCEPTIONS += mhw-mulx/mhw-div/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mulx/mhw-div/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mulx/mhw-div/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mulx/mhw-div ++MULTILIB_EXCEPTIONS += mhw-mulx/mcustom-fadds=253/mcustom-fdivs=255/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mulx/mcustom-fadds=253/mcustom-fdivs=255/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mulx/mcustom-fadds=253/mcustom-fdivs=255/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mulx/mcustom-fadds=253/mcustom-fdivs=255 ++MULTILIB_EXCEPTIONS += mhw-mulx/mcustom-fadds=253/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mulx/mcustom-fadds=253/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mulx/mcustom-fadds=253/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mulx/mcustom-fadds=253 ++MULTILIB_EXCEPTIONS += mhw-mulx/mcustom-fdivs=255/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mulx/mcustom-fdivs=255/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mulx/mcustom-fdivs=255/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mulx/mcustom-fdivs=255 ++MULTILIB_EXCEPTIONS += mhw-mulx/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mulx/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-mulx/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-mulx ++MULTILIB_EXCEPTIONS += mhw-div/mcustom-fadds=253/mcustom-fdivs=255/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-div/mcustom-fadds=253/mcustom-fdivs=255/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-div/mcustom-fadds=253/mcustom-fdivs=255/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-div/mcustom-fadds=253/mcustom-fdivs=255 ++MULTILIB_EXCEPTIONS += mhw-div/mcustom-fadds=253/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-div/mcustom-fadds=253/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-div/mcustom-fadds=253/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-div/mcustom-fadds=253 ++MULTILIB_EXCEPTIONS += mhw-div/mcustom-fdivs=255/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-div/mcustom-fdivs=255/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-div/mcustom-fdivs=255/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-div/mcustom-fdivs=255 ++MULTILIB_EXCEPTIONS += mhw-div/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-div/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mhw-div/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mhw-div ++MULTILIB_EXCEPTIONS += mcustom-fadds=253/mcustom-fdivs=255/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mcustom-fadds=253/mcustom-fdivs=255/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mcustom-fadds=253/mcustom-fdivs=255/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mcustom-fadds=253/mcustom-fdivs=255 ++MULTILIB_EXCEPTIONS += mcustom-fadds=253/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mcustom-fadds=253/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mcustom-fadds=253/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mcustom-fadds=253 ++MULTILIB_EXCEPTIONS += mcustom-fdivs=255/mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mcustom-fdivs=255/mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mcustom-fdivs=255/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mcustom-fdivs=255 ++MULTILIB_EXCEPTIONS += mcustom-fmuls=252/mcustom-fsubs=254 ++MULTILIB_EXCEPTIONS += mcustom-fmuls=252 ++MULTILIB_EXCEPTIONS += mcustom-fsubs=254 +Index: gcc/config/cris/cris.md +=================================================================== +--- a/src/gcc/config/cris/cris.md (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/cris/cris.md (.../branches/gcc-4_9-branch) +@@ -919,6 +919,8 @@ + (match_operand:SI 1 "cris_general_operand_or_symbol" ""))] + "" + { ++ enum cris_symbol_type t; ++ + /* If the output goes to a MEM, make sure we have zero or a register as + input. */ + if (MEM_P (operands[0]) +@@ -934,12 +936,12 @@ + valid symbol? Can we exclude global PIC addresses with an added + offset? */ + if (flag_pic +- && CONSTANT_ADDRESS_P (operands[1]) ++ && CONSTANT_P (operands[1]) + && !cris_valid_pic_const (operands[1], false)) + { +- enum cris_pic_symbol_type t = cris_pic_symbol_type_of (operands[1]); ++ t = cris_symbol_type_of (operands[1]); + +- gcc_assert (t != cris_no_symbol); ++ gcc_assert (t != cris_no_symbol && t != cris_offsettable_symbol); + + if (! REG_S_P (operands[0])) + { +@@ -1086,7 +1088,12 @@ + if (!flag_pic + && (GET_CODE (operands[1]) == SYMBOL_REF + || GET_CODE (operands[1]) == LABEL_REF +- || GET_CODE (operands[1]) == CONST)) ++ || (GET_CODE (operands[1]) == CONST ++ && (GET_CODE (XEXP (operands[1], 0)) != UNSPEC ++ || (XINT (XEXP (operands[1], 0), 1) ++ == CRIS_UNSPEC_PLT_PCREL) ++ || (XINT (XEXP (operands[1], 0), 1) ++ == CRIS_UNSPEC_PCREL))))) + { + /* FIXME: Express this through (set_attr cc none) instead, + since we can't express the ``none'' at this point. FIXME: +@@ -1169,6 +1176,12 @@ + case CRIS_UNSPEC_PCREL: + case CRIS_UNSPEC_PLT_PCREL: + gcc_assert (TARGET_V32); ++ /* LAPC doesn't set condition codes; clear them to make the ++ (equivalence-marked) result of this insn not presumed ++ present. This instruction can be a PIC symbol load (for ++ a hidden symbol) which for weak symbols will be followed ++ by a test for NULL. */ ++ CC_STATUS_INIT; + return "lapc %1,%0"; + + default: +@@ -3710,15 +3723,16 @@ + { + gcc_assert (MEM_P (operands[0])); + if (flag_pic) +- cris_expand_pic_call_address (&operands[0]); ++ cris_expand_pic_call_address (&operands[0], &operands[1]); ++ else ++ operands[1] = const0_rtx; + }) + +-;; Accept *anything* as operand 1. Accept operands for operand 0 in +-;; order of preference. ++;; Accept operands for operand 0 in order of preference. + + (define_insn "*expanded_call_non_v32" + [(call (mem:QI (match_operand:SI 0 "general_operand" "r,Q>,g")) +- (match_operand 1 "" "")) ++ (match_operand:SI 1 "cris_call_type_marker" "rM,rM,rM")) + (clobber (reg:SI CRIS_SRP_REGNUM))] + "!TARGET_V32" + "jsr %0") +@@ -3727,7 +3741,7 @@ + [(call + (mem:QI + (match_operand:SI 0 "cris_nonmemory_operand_or_callable_symbol" "n,r,U,i")) +- (match_operand 1 "" "")) ++ (match_operand:SI 1 "cris_call_type_marker" "rM,rM,rM,rM")) + (clobber (reg:SI CRIS_SRP_REGNUM))] + "TARGET_V32" + "@ +@@ -3740,7 +3754,7 @@ + ;; Parallel when calculating and reusing address of indirect pointer + ;; with simple offset. (Makes most sense with PIC.) It looks a bit + ;; wrong not to have the clobber last, but that's the way combine +-;; generates it (except it doesn' look into the *inner* mem, so this ++;; generates it (except it doesn't look into the *inner* mem, so this + ;; just matches a peephole2). FIXME: investigate that. + (define_insn "*expanded_call_side" + [(call (mem:QI +@@ -3747,12 +3761,14 @@ + (mem:SI + (plus:SI (match_operand:SI 0 "cris_bdap_operand" "%r, r,r") + (match_operand:SI 1 "cris_bdap_operand" "r>Rn,r,>Rn")))) +- (match_operand 2 "" "")) ++ (match_operand:SI 2 "cris_call_type_marker" "rM,rM,rM")) + (clobber (reg:SI CRIS_SRP_REGNUM)) + (set (match_operand:SI 3 "register_operand" "=*0,r,r") + (plus:SI (match_dup 0) + (match_dup 1)))] +- "!TARGET_AVOID_GOTPLT && !TARGET_V32" ++ ;; Disabled until after reload until we can avoid an output reload for ++ ;; operand 3 (being forbidden for call insns). ++ "reload_completed && !TARGET_AVOID_GOTPLT && !TARGET_V32" + "jsr [%3=%0%S1]") + + (define_expand "call_value" +@@ -3764,10 +3780,12 @@ + { + gcc_assert (MEM_P (operands[1])); + if (flag_pic) +- cris_expand_pic_call_address (&operands[1]); ++ cris_expand_pic_call_address (&operands[1], &operands[2]); ++ else ++ operands[2] = const0_rtx; + }) + +-;; Accept *anything* as operand 2. The validity other than "general" of ++;; The validity other than "general" of + ;; operand 0 will be checked elsewhere. Accept operands for operand 1 in + ;; order of preference (Q includes r, but r is shorter, faster). + ;; We also accept a PLT symbol. We output it as [rPIC+sym:GOTPLT] rather +@@ -3776,7 +3794,7 @@ + (define_insn "*expanded_call_value_non_v32" + [(set (match_operand 0 "nonimmediate_operand" "=g,g,g") + (call (mem:QI (match_operand:SI 1 "general_operand" "r,Q>,g")) +- (match_operand 2 "" ""))) ++ (match_operand:SI 2 "cris_call_type_marker" "rM,rM,rM"))) + (clobber (reg:SI CRIS_SRP_REGNUM))] + "!TARGET_V32" + "Jsr %1" +@@ -3790,12 +3808,14 @@ + (mem:SI + (plus:SI (match_operand:SI 1 "cris_bdap_operand" "%r, r,r") + (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn")))) +- (match_operand 3 "" ""))) ++ (match_operand:SI 3 "cris_call_type_marker" "rM,rM,rM"))) + (clobber (reg:SI CRIS_SRP_REGNUM)) + (set (match_operand:SI 4 "register_operand" "=*1,r,r") + (plus:SI (match_dup 1) + (match_dup 2)))] +- "!TARGET_AVOID_GOTPLT && !TARGET_V32" ++ ;; Disabled until after reload until we can avoid an output reload for ++ ;; operand 4 (being forbidden for call insns). ++ "reload_completed && !TARGET_AVOID_GOTPLT && !TARGET_V32" + "Jsr [%4=%1%S2]" + [(set_attr "cc" "clobber")]) + +@@ -3805,7 +3825,7 @@ + (call + (mem:QI + (match_operand:SI 1 "cris_nonmemory_operand_or_callable_symbol" "n,r,U,i")) +- (match_operand 2 "" ""))) ++ (match_operand:SI 2 "cris_call_type_marker" "rM,rM,rM,rM"))) + (clobber (reg:SI 16))] + "TARGET_V32" + "@ +@@ -4827,7 +4847,7 @@ + /* Make sure we have canonical RTX so we match the insn pattern - + not a constant in the first operand. We also require the order + (plus reg mem) to match the final pattern. */ +- if (CONSTANT_P (otherop) || MEM_P (otherop)) ++ if (CRIS_CONSTANT_P (otherop) || MEM_P (otherop)) + { + operands[7] = operands[1]; + operands[8] = otherop; +@@ -4878,7 +4898,7 @@ + /* Make sure we have canonical RTX so we match the insn pattern - + not a constant in the first operand. We also require the order + (plus reg mem) to match the final pattern. */ +- if (CONSTANT_P (otherop) || MEM_P (otherop)) ++ if (CRIS_CONSTANT_P (otherop) || MEM_P (otherop)) + { + operands[7] = operands[1]; + operands[8] = otherop; +Index: gcc/config/cris/cris.c +=================================================================== +--- a/src/gcc/config/cris/cris.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/cris/cris.c (.../branches/gcc-4_9-branch) +@@ -147,6 +147,7 @@ + static void cris_function_arg_advance (cumulative_args_t, enum machine_mode, + const_tree, bool); + static tree cris_md_asm_clobbers (tree, tree, tree); ++static bool cris_cannot_force_const_mem (enum machine_mode, rtx); + + static void cris_option_override (void); + +@@ -214,6 +215,9 @@ + #undef TARGET_LEGITIMATE_ADDRESS_P + #define TARGET_LEGITIMATE_ADDRESS_P cris_legitimate_address_p + ++#undef TARGET_LEGITIMATE_CONSTANT_P ++#define TARGET_LEGITIMATE_CONSTANT_P cris_legitimate_constant_p ++ + #undef TARGET_PREFERRED_RELOAD_CLASS + #define TARGET_PREFERRED_RELOAD_CLASS cris_preferred_reload_class + +@@ -248,6 +252,10 @@ + #define TARGET_FUNCTION_ARG_ADVANCE cris_function_arg_advance + #undef TARGET_MD_ASM_CLOBBERS + #define TARGET_MD_ASM_CLOBBERS cris_md_asm_clobbers ++ ++#undef TARGET_CANNOT_FORCE_CONST_MEM ++#define TARGET_CANNOT_FORCE_CONST_MEM cris_cannot_force_const_mem ++ + #undef TARGET_FRAME_POINTER_REQUIRED + #define TARGET_FRAME_POINTER_REQUIRED cris_frame_pointer_required + +@@ -506,6 +514,21 @@ + return crtl->uses_pic_offset_table; + } + ++/* Worker function for TARGET_CANNOT_FORCE_CONST_MEM. ++ We can't put PIC addresses in the constant pool, not even the ones that ++ can be reached as pc-relative as we can't tell when or how to do that. */ ++ ++static bool ++cris_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) ++{ ++ enum cris_symbol_type t = cris_symbol_type_of (x); ++ ++ return ++ t == cris_unspec ++ || t == cris_got_symbol ++ || t == cris_rel_symbol; ++} ++ + /* Given an rtx, return the text string corresponding to the CODE of X. + Intended for use in the assembly language output section of a + define_insn. */ +@@ -601,7 +624,7 @@ + + if (REG_P (index)) + fprintf (file, "$%s.b", reg_names[REGNO (index)]); +- else if (CONSTANT_P (index)) ++ else if (CRIS_CONSTANT_P (index)) + cris_output_addr_const (file, index); + else if (GET_CODE (index) == MULT) + { +@@ -1041,7 +1064,7 @@ + /* If this is a GOT symbol, force it to be emitted as :GOT and + :GOTPLT regardless of -fpic (i.e. not as :GOT16, :GOTPLT16). + Avoid making this too much of a special case. */ +- if (flag_pic == 1 && CONSTANT_P (operand)) ++ if (flag_pic == 1 && CRIS_CONSTANT_P (operand)) + { + int flag_pic_save = flag_pic; + +@@ -1161,7 +1184,7 @@ + default: + /* No need to handle all strange variants, let output_addr_const + do it for us. */ +- if (CONSTANT_P (operand)) ++ if (CRIS_CONSTANT_P (operand)) + { + cris_output_addr_const (file, operand); + return; +@@ -1358,7 +1381,7 @@ + bool + cris_constant_index_p (const_rtx x) + { +- return (CONSTANT_P (x) && (!flag_pic || cris_valid_pic_const (x, true))); ++ return (CRIS_CONSTANT_P (x) && (!flag_pic || cris_valid_pic_const (x, true))); + } + + /* True if X is a valid base register. */ +@@ -1467,6 +1490,29 @@ + return false; + } + ++/* Worker function for TARGET_LEGITIMATE_CONSTANT_P. We have to handle ++ PIC constants that aren't legitimized. FIXME: there used to be a ++ guarantee that the target LEGITIMATE_CONSTANT_P didn't have to handle ++ PIC constants, but no more (4.7 era); testcase: glibc init-first.c. ++ While that may be seen as a bug, that guarantee seems a wart by design, ++ so don't bother; fix the documentation instead. */ ++ ++bool ++cris_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x) ++{ ++ enum cris_symbol_type t; ++ ++ if (flag_pic) ++ return LEGITIMATE_PIC_OPERAND_P (x); ++ ++ t = cris_symbol_type_of (x); ++ ++ return ++ t == cris_no_symbol ++ || t == cris_offsettable_symbol ++ || t == cris_unspec; ++} ++ + /* Worker function for LEGITIMIZE_RELOAD_ADDRESS. */ + + bool +@@ -2214,7 +2260,7 @@ + return (2 + 2) / 2; + + /* A BDAP with some other constant is 2 bytes extra. */ +- if (CONSTANT_P (tem2)) ++ if (CRIS_CONSTANT_P (tem2)) + return (2 + 2 + 2) / 2; + + /* BDAP with something indirect should have a higher cost than +@@ -2312,7 +2358,7 @@ + return 0; + + /* Check allowed cases, like [r(+)?].[bwd] and const. */ +- if (CONSTANT_P (val_rtx)) ++ if (CRIS_CONSTANT_P (val_rtx)) + return 1; + + if (MEM_P (val_rtx) +@@ -2464,32 +2510,34 @@ + gcc_unreachable (); + } + +- return cris_pic_symbol_type_of (x) == cris_no_symbol; ++ return cris_symbol_type_of (x) == cris_no_symbol; + } + +-/* Helper function to find the right PIC-type symbol to generate, ++/* Helper function to find the right symbol-type to generate, + given the original (non-PIC) representation. */ + +-enum cris_pic_symbol_type +-cris_pic_symbol_type_of (const_rtx x) ++enum cris_symbol_type ++cris_symbol_type_of (const_rtx x) + { + switch (GET_CODE (x)) + { + case SYMBOL_REF: +- return SYMBOL_REF_LOCAL_P (x) +- ? cris_rel_symbol : cris_got_symbol; ++ return flag_pic ++ ? (SYMBOL_REF_LOCAL_P (x) ++ ? cris_rel_symbol : cris_got_symbol) ++ : cris_offsettable_symbol; + + case LABEL_REF: +- return cris_rel_symbol; ++ return flag_pic ? cris_rel_symbol : cris_offsettable_symbol; + + case CONST: +- return cris_pic_symbol_type_of (XEXP (x, 0)); ++ return cris_symbol_type_of (XEXP (x, 0)); + + case PLUS: + case MINUS: + { +- enum cris_pic_symbol_type t1 = cris_pic_symbol_type_of (XEXP (x, 0)); +- enum cris_pic_symbol_type t2 = cris_pic_symbol_type_of (XEXP (x, 1)); ++ enum cris_symbol_type t1 = cris_symbol_type_of (XEXP (x, 0)); ++ enum cris_symbol_type t2 = cris_symbol_type_of (XEXP (x, 1)); + + gcc_assert (t1 == cris_no_symbol || t2 == cris_no_symbol); + +@@ -2504,9 +2552,7 @@ + return cris_no_symbol; + + case UNSPEC: +- /* Likely an offsettability-test attempting to add a constant to +- a GOTREAD symbol, which can't be handled. */ +- return cris_invalid_pic_symbol; ++ return cris_unspec; + + default: + fatal_insn ("unrecognized supposed constant", x); +@@ -3714,19 +3760,19 @@ + /* Worker function for expanding the address for PIC function calls. */ + + void +-cris_expand_pic_call_address (rtx *opp) ++cris_expand_pic_call_address (rtx *opp, rtx *markerp) + { + rtx op = *opp; + +- gcc_assert (MEM_P (op)); ++ gcc_assert (flag_pic && MEM_P (op)); + op = XEXP (op, 0); + + /* It might be that code can be generated that jumps to 0 (or to a + specific address). Don't die on that. (There is a + testcase.) */ +- if (CONSTANT_ADDRESS_P (op) && !CONST_INT_P (op)) ++ if (CONSTANT_P (op) && !CONST_INT_P (op)) + { +- enum cris_pic_symbol_type t = cris_pic_symbol_type_of (op); ++ enum cris_symbol_type t = cris_symbol_type_of (op); + + CRIS_ASSERT (can_create_pseudo_p ()); + +@@ -3752,6 +3798,9 @@ + } + else + op = force_reg (Pmode, op); ++ ++ /* A local call. */ ++ *markerp = const0_rtx; + } + else if (t == cris_got_symbol) + { +@@ -3758,12 +3807,12 @@ + if (TARGET_AVOID_GOTPLT) + { + /* Change a "jsr sym" into (allocate register rM, rO) +- "move.d (const (unspec [sym rPIC] CRIS_UNSPEC_PLT_GOTREL)),rM" ++ "move.d (const (unspec [sym] CRIS_UNSPEC_PLT_GOTREL)),rM" + "add.d rPIC,rM,rO", "jsr rO" for pre-v32 and +- "jsr (const (unspec [sym rPIC] CRIS_UNSPEC_PLT_PCREL))" ++ "jsr (const (unspec [sym] CRIS_UNSPEC_PLT_PCREL))" + for v32. */ + rtx tem, rm, ro; +- gcc_assert (can_create_pseudo_p ()); ++ + crtl->uses_pic_offset_table = 1; + tem = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op), + TARGET_V32 +@@ -3817,14 +3866,27 @@ + MEM_NOTRAP_P (mem) = 1; + op = mem; + } ++ ++ /* We need to prepare this call to go through the PLT; we ++ need to make GOT available. */ ++ *markerp = pic_offset_table_rtx; + } + else +- /* Can't possibly get a GOT-needing-fixup for a function-call, +- right? */ ++ /* Can't possibly get anything else for a function-call, right? */ + fatal_insn ("unidentifiable call op", op); + +- *opp = replace_equiv_address (*opp, op); ++ /* If the validizing variant is called, it will try to validize ++ the address as a valid any-operand constant, but as it's only ++ valid for calls and moves, it will fail and always be forced ++ into a register. */ ++ *opp = replace_equiv_address_nv (*opp, op); + } ++ else ++ /* Can't tell what locality a call to a non-constant address has; ++ better make the GOT register alive at it. ++ FIXME: Can we see whether the register has known constant ++ contents? */ ++ *markerp = pic_offset_table_rtx; + } + + /* Make sure operands are in the right order for an addsi3 insn as +Index: gcc/config/cris/predicates.md +=================================================================== +--- a/src/gcc/config/cris/predicates.md (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/cris/predicates.md (.../branches/gcc-4_9-branch) +@@ -142,7 +142,7 @@ + (ior (match_operand 0 "general_operand") + (and (match_code "const, symbol_ref, label_ref") + ; The following test is actually just an assertion. +- (match_test "cris_pic_symbol_type_of (op) != cris_no_symbol")))) ++ (match_test "cris_symbol_type_of (op) != cris_no_symbol")))) + + ;; A predicate for the anon movsi expansion, one that fits a PCREL + ;; operand as well as general_operand. +@@ -176,3 +176,15 @@ + (ior (match_operand 0 "memory_operand") + (match_test "cris_general_operand_or_symbol (XEXP (op, 0), + Pmode)")))) ++ ++;; A marker for the call-insn: (const_int 0) for a call to a ++;; hidden or static function and non-pic and ++;; pic_offset_table_rtx for a call that *might* go through the ++;; PLT. ++ ++(define_predicate "cris_call_type_marker" ++ (ior (and (match_operand 0 "const_int_operand") ++ (match_test "op == const0_rtx")) ++ (and (and (match_operand 0 "register_operand") ++ (match_test "op == pic_offset_table_rtx")) ++ (match_test "flag_pic != 0")))) +Index: gcc/config/cris/constraints.md +=================================================================== +--- a/src/gcc/config/cris/constraints.md (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/cris/constraints.md (.../branches/gcc-4_9-branch) +@@ -118,7 +118,7 @@ + reload_in_progress + || reload_completed)")) + ;; Just an explicit indirect reference: [const]? +- (match_test "CONSTANT_P (XEXP (op, 0))") ++ (match_test "CRIS_CONSTANT_P (XEXP (op, 0))") + ;; Something that is indexed; [...+...]? + (and (match_code "plus" "0") + ;; A BDAP constant: [reg+(8|16|32)bit offset]? +@@ -159,6 +159,8 @@ + (define_constraint "U" + "@internal" + (and (match_test "flag_pic") ++ ;; We're just interested in the ..._or_callable_symbol part. ++ ;; (Using CRIS_CONSTANT_P would exclude that too.) + (match_test "CONSTANT_P (op)") + (match_operand 0 "cris_nonmemory_operand_or_callable_symbol"))) + +Index: gcc/config/cris/cris.h +=================================================================== +--- a/src/gcc/config/cris/cris.h (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/cris/cris.h (.../branches/gcc-4_9-branch) +@@ -794,6 +794,12 @@ + } \ + while (0) + ++/* The mode argument to cris_legitimate_constant_p isn't used, so just ++ pass a cheap dummy. N.B. we have to cast away const from the ++ parameter rather than adjust the parameter, as it's type is mandated ++ by the TARGET_LEGITIMATE_CONSTANT_P target hook interface. */ ++#define CRIS_CONSTANT_P(X) \ ++ (CONSTANT_P (X) && cris_legitimate_constant_p (VOIDmode, CONST_CAST_RTX (X))) + + /* Node: Condition Code */ + +@@ -833,13 +839,14 @@ + + /* Helper type. */ + +-enum cris_pic_symbol_type ++enum cris_symbol_type + { + cris_no_symbol = 0, + cris_got_symbol = 1, + cris_rel_symbol = 2, + cris_got_symbol_needing_fixup = 3, +- cris_invalid_pic_symbol = 4 ++ cris_unspec = 7, ++ cris_offsettable_symbol = 8 + }; + + #define PIC_OFFSET_TABLE_REGNUM (flag_pic ? CRIS_GOT_REGNUM : INVALID_REGNUM) +Index: gcc/config/cris/cris-protos.h +=================================================================== +--- a/src/gcc/config/cris/cris-protos.h (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/cris/cris-protos.h (.../branches/gcc-4_9-branch) +@@ -31,8 +31,9 @@ + extern rtx cris_return_addr_rtx (int, rtx); + extern rtx cris_split_movdx (rtx *); + extern int cris_legitimate_pic_operand (rtx); +-extern enum cris_pic_symbol_type cris_pic_symbol_type_of (const_rtx); ++extern enum cris_symbol_type cris_symbol_type_of (const_rtx); + extern bool cris_valid_pic_const (const_rtx, bool); ++extern bool cris_legitimate_constant_p (enum machine_mode, rtx); + extern bool cris_constant_index_p (const_rtx); + extern bool cris_base_p (const_rtx, bool); + extern bool cris_base_or_autoincr_p (const_rtx, bool); +@@ -46,7 +47,7 @@ + extern void cris_asm_output_case_end (FILE *, int, rtx); + extern rtx cris_gen_movem_load (rtx, rtx, int); + extern rtx cris_emit_movem_store (rtx, rtx, int, bool); +-extern void cris_expand_pic_call_address (rtx *); ++extern void cris_expand_pic_call_address (rtx *, rtx *); + extern void cris_order_for_addsi3 (rtx *, int); + extern void cris_emit_trap_for_misalignment (rtx); + #endif /* RTX_CODE */ +Index: gcc/config/rs6000/sysv4.h +=================================================================== +--- a/src/gcc/config/rs6000/sysv4.h (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/rs6000/sysv4.h (.../branches/gcc-4_9-branch) +@@ -949,3 +949,19 @@ + #define TARGET_USES_SYSV4_OPT 1 + + #undef DBX_REGISTER_NUMBER ++ ++/* Link -lasan early on the command line. For -static-libasan, don't link ++ it for -shared link, the executable should be compiled with -static-libasan ++ in that case, and for executable link link with --{,no-}whole-archive around ++ it to force everything into the executable. And similarly for -ltsan. */ ++#if defined(HAVE_LD_STATIC_DYNAMIC) ++#undef LIBASAN_EARLY_SPEC ++#define LIBASAN_EARLY_SPEC "%{!shared:libasan_preinit%O%s} " \ ++ "%{static-libasan:%{!shared:" \ ++ LD_STATIC_OPTION " --whole-archive -lasan --no-whole-archive " \ ++ LD_DYNAMIC_OPTION "}}%{!static-libasan:-lasan}" ++#undef LIBTSAN_EARLY_SPEC ++#define LIBTSAN_EARLY_SPEC "%{static-libtsan:%{!shared:" \ ++ LD_STATIC_OPTION " --whole-archive -ltsan --no-whole-archive " \ ++ LD_DYNAMIC_OPTION "}}%{!static-libtsan:-ltsan}" ++#endif +Index: gcc/config/arm/t-rtems-eabi +=================================================================== +--- a/src/gcc/config/arm/t-rtems-eabi (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/arm/t-rtems-eabi (.../branches/gcc-4_9-branch) +@@ -1,47 +1,167 @@ + # Custom RTEMS EABI multilibs + +-MULTILIB_OPTIONS = mthumb march=armv6-m/march=armv7-a/march=armv7-r/march=armv7-m mfpu=neon mfloat-abi=hard +-MULTILIB_DIRNAMES = thumb armv6-m armv7-a armv7-r armv7-m neon hard ++MULTILIB_OPTIONS = mbig-endian mthumb march=armv6-m/march=armv7-a/march=armv7-r/march=armv7-m mfpu=neon/mfpu=vfpv3-d16/mfpu=fpv4-sp-d16 mfloat-abi=hard ++MULTILIB_DIRNAMES = eb thumb armv6-m armv7-a armv7-r armv7-m neon vfpv3-d16 fpv4-sp-d16 hard + + # Enumeration of multilibs + + MULTILIB_EXCEPTIONS = ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv6-m/mfpu=neon/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv6-m/mfpu=neon ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv6-m/mfpu=vfpv3-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv6-m/mfpu=vfpv3-d16 ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv6-m/mfpu=fpv4-sp-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv6-m/mfpu=fpv4-sp-d16 ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv6-m/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv6-m ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-a/mfpu=neon/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-a/mfpu=neon ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-a/mfpu=vfpv3-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-a/mfpu=vfpv3-d16 ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-a/mfpu=fpv4-sp-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-a/mfpu=fpv4-sp-d16 ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-a/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-a ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-r/mfpu=neon/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-r/mfpu=neon ++# MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-r/mfpu=vfpv3-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-r/mfpu=vfpv3-d16 ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-r/mfpu=fpv4-sp-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-r/mfpu=fpv4-sp-d16 ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-r/mfloat-abi=hard ++# MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-r ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-m/mfpu=neon/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-m/mfpu=neon ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-m/mfpu=vfpv3-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-m/mfpu=vfpv3-d16 ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-m/mfpu=fpv4-sp-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-m/mfpu=fpv4-sp-d16 ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-m/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/march=armv7-m ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/mfpu=neon/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/mfpu=neon ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/mfpu=vfpv3-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/mfpu=vfpv3-d16 ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/mfpu=fpv4-sp-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/mfpu=fpv4-sp-d16 ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/mthumb ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv6-m/mfpu=neon/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv6-m/mfpu=neon ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv6-m/mfpu=vfpv3-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv6-m/mfpu=vfpv3-d16 ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv6-m/mfpu=fpv4-sp-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv6-m/mfpu=fpv4-sp-d16 ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv6-m/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv6-m ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-a/mfpu=neon/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-a/mfpu=neon ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-a/mfpu=vfpv3-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-a/mfpu=vfpv3-d16 ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-a/mfpu=fpv4-sp-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-a/mfpu=fpv4-sp-d16 ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-a/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-a ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-r/mfpu=neon/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-r/mfpu=neon ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-r/mfpu=vfpv3-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-r/mfpu=vfpv3-d16 ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-r/mfpu=fpv4-sp-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-r/mfpu=fpv4-sp-d16 ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-r/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-r ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-m/mfpu=neon/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-m/mfpu=neon ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-m/mfpu=vfpv3-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-m/mfpu=vfpv3-d16 ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-m/mfpu=fpv4-sp-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-m/mfpu=fpv4-sp-d16 ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-m/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/march=armv7-m ++MULTILIB_EXCEPTIONS += mbig-endian/mfpu=neon/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/mfpu=neon ++MULTILIB_EXCEPTIONS += mbig-endian/mfpu=vfpv3-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/mfpu=vfpv3-d16 ++MULTILIB_EXCEPTIONS += mbig-endian/mfpu=fpv4-sp-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian/mfpu=fpv4-sp-d16 ++MULTILIB_EXCEPTIONS += mbig-endian/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mbig-endian + MULTILIB_EXCEPTIONS += mthumb/march=armv6-m/mfpu=neon/mfloat-abi=hard + MULTILIB_EXCEPTIONS += mthumb/march=armv6-m/mfpu=neon ++MULTILIB_EXCEPTIONS += mthumb/march=armv6-m/mfpu=vfpv3-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mthumb/march=armv6-m/mfpu=vfpv3-d16 ++MULTILIB_EXCEPTIONS += mthumb/march=armv6-m/mfpu=fpv4-sp-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mthumb/march=armv6-m/mfpu=fpv4-sp-d16 + MULTILIB_EXCEPTIONS += mthumb/march=armv6-m/mfloat-abi=hard + # MULTILIB_EXCEPTIONS += mthumb/march=armv6-m + # MULTILIB_EXCEPTIONS += mthumb/march=armv7-a/mfpu=neon/mfloat-abi=hard + MULTILIB_EXCEPTIONS += mthumb/march=armv7-a/mfpu=neon ++MULTILIB_EXCEPTIONS += mthumb/march=armv7-a/mfpu=vfpv3-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mthumb/march=armv7-a/mfpu=vfpv3-d16 ++MULTILIB_EXCEPTIONS += mthumb/march=armv7-a/mfpu=fpv4-sp-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mthumb/march=armv7-a/mfpu=fpv4-sp-d16 + MULTILIB_EXCEPTIONS += mthumb/march=armv7-a/mfloat-abi=hard + # MULTILIB_EXCEPTIONS += mthumb/march=armv7-a + MULTILIB_EXCEPTIONS += mthumb/march=armv7-r/mfpu=neon/mfloat-abi=hard + MULTILIB_EXCEPTIONS += mthumb/march=armv7-r/mfpu=neon ++# MULTILIB_EXCEPTIONS += mthumb/march=armv7-r/mfpu=vfpv3-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mthumb/march=armv7-r/mfpu=vfpv3-d16 ++MULTILIB_EXCEPTIONS += mthumb/march=armv7-r/mfpu=fpv4-sp-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mthumb/march=armv7-r/mfpu=fpv4-sp-d16 + MULTILIB_EXCEPTIONS += mthumb/march=armv7-r/mfloat-abi=hard + # MULTILIB_EXCEPTIONS += mthumb/march=armv7-r + MULTILIB_EXCEPTIONS += mthumb/march=armv7-m/mfpu=neon/mfloat-abi=hard + MULTILIB_EXCEPTIONS += mthumb/march=armv7-m/mfpu=neon ++MULTILIB_EXCEPTIONS += mthumb/march=armv7-m/mfpu=vfpv3-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mthumb/march=armv7-m/mfpu=vfpv3-d16 ++# MULTILIB_EXCEPTIONS += mthumb/march=armv7-m/mfpu=fpv4-sp-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mthumb/march=armv7-m/mfpu=fpv4-sp-d16 + MULTILIB_EXCEPTIONS += mthumb/march=armv7-m/mfloat-abi=hard + # MULTILIB_EXCEPTIONS += mthumb/march=armv7-m + MULTILIB_EXCEPTIONS += mthumb/mfpu=neon/mfloat-abi=hard + MULTILIB_EXCEPTIONS += mthumb/mfpu=neon ++MULTILIB_EXCEPTIONS += mthumb/mfpu=vfpv3-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mthumb/mfpu=vfpv3-d16 ++MULTILIB_EXCEPTIONS += mthumb/mfpu=fpv4-sp-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mthumb/mfpu=fpv4-sp-d16 + MULTILIB_EXCEPTIONS += mthumb/mfloat-abi=hard + # MULTILIB_EXCEPTIONS += mthumb + MULTILIB_EXCEPTIONS += march=armv6-m/mfpu=neon/mfloat-abi=hard + MULTILIB_EXCEPTIONS += march=armv6-m/mfpu=neon ++MULTILIB_EXCEPTIONS += march=armv6-m/mfpu=vfpv3-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += march=armv6-m/mfpu=vfpv3-d16 ++MULTILIB_EXCEPTIONS += march=armv6-m/mfpu=fpv4-sp-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += march=armv6-m/mfpu=fpv4-sp-d16 + MULTILIB_EXCEPTIONS += march=armv6-m/mfloat-abi=hard + MULTILIB_EXCEPTIONS += march=armv6-m + MULTILIB_EXCEPTIONS += march=armv7-a/mfpu=neon/mfloat-abi=hard + MULTILIB_EXCEPTIONS += march=armv7-a/mfpu=neon ++MULTILIB_EXCEPTIONS += march=armv7-a/mfpu=vfpv3-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += march=armv7-a/mfpu=vfpv3-d16 ++MULTILIB_EXCEPTIONS += march=armv7-a/mfpu=fpv4-sp-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += march=armv7-a/mfpu=fpv4-sp-d16 + MULTILIB_EXCEPTIONS += march=armv7-a/mfloat-abi=hard + MULTILIB_EXCEPTIONS += march=armv7-a + MULTILIB_EXCEPTIONS += march=armv7-r/mfpu=neon/mfloat-abi=hard + MULTILIB_EXCEPTIONS += march=armv7-r/mfpu=neon ++MULTILIB_EXCEPTIONS += march=armv7-r/mfpu=vfpv3-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += march=armv7-r/mfpu=vfpv3-d16 ++MULTILIB_EXCEPTIONS += march=armv7-r/mfpu=fpv4-sp-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += march=armv7-r/mfpu=fpv4-sp-d16 + MULTILIB_EXCEPTIONS += march=armv7-r/mfloat-abi=hard + MULTILIB_EXCEPTIONS += march=armv7-r + MULTILIB_EXCEPTIONS += march=armv7-m/mfpu=neon/mfloat-abi=hard + MULTILIB_EXCEPTIONS += march=armv7-m/mfpu=neon ++MULTILIB_EXCEPTIONS += march=armv7-m/mfpu=vfpv3-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += march=armv7-m/mfpu=vfpv3-d16 ++MULTILIB_EXCEPTIONS += march=armv7-m/mfpu=fpv4-sp-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += march=armv7-m/mfpu=fpv4-sp-d16 + MULTILIB_EXCEPTIONS += march=armv7-m/mfloat-abi=hard + MULTILIB_EXCEPTIONS += march=armv7-m + MULTILIB_EXCEPTIONS += mfpu=neon/mfloat-abi=hard + MULTILIB_EXCEPTIONS += mfpu=neon ++MULTILIB_EXCEPTIONS += mfpu=vfpv3-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mfpu=vfpv3-d16 ++MULTILIB_EXCEPTIONS += mfpu=fpv4-sp-d16/mfloat-abi=hard ++MULTILIB_EXCEPTIONS += mfpu=fpv4-sp-d16 + MULTILIB_EXCEPTIONS += mfloat-abi=hard diff --git a/debian/rules.patch b/debian/rules.patch index 8630d06..829f6b7 100644 --- a/debian/rules.patch +++ b/debian/rules.patch @@ -13,6 +13,7 @@ series_file ?= $(patchdir)/series # which patches should be applied? debian_patches = \ + svn-updates \ $(if $(with_linaro_branch),gcc-linaro) \ # svn-updates \ @@ -265,7 +266,11 @@ else ifeq ($(distribution),Ubuntu) endif debian_patches += libffi-ro-eh_frame_sect -debian_patches += gcc-multiarch$(if $(trunk_build),-trunk) +ifeq ($(trunk_build),yes) + debian_patches += gcc-multiarch-trunk +else + debian_patches += gcc-multiarch$(if $(with_linaro_branch),-linaro) +endif ifeq ($(with_multiarch_lib),yes) ifneq ($(single_package),yes) debian_patches += libjava-multiarch -- cgit v1.2.3 From 26d8993a95b7cba59d15b5cb40397acf34125603 Mon Sep 17 00:00:00 2001 From: doko Date: Thu, 24 Jul 2014 22:16:18 +0000 Subject: * Update to SVN 20140724 (r213031) from the gcc-4_9-branch. git-svn-id: svn://svn.debian.org/svn/gcccvs/branches/sid/gcc-4.9@7527 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca --- debian/changelog | 6 +- debian/patches/gcc-elfv2-abi-warn1.diff | 99 -------- debian/patches/gcc-elfv2-abi-warn2.diff | 106 --------- debian/patches/gcc-elfv2-abi-warn3.diff | 158 ------------- debian/patches/gcc-elfv2-abi-warn4.diff | 56 ----- debian/patches/svn-updates.diff | 386 ++++++++++++++++++++++++++++++-- debian/rules.patch | 4 - 7 files changed, 375 insertions(+), 440 deletions(-) delete mode 100644 debian/patches/gcc-elfv2-abi-warn1.diff delete mode 100644 debian/patches/gcc-elfv2-abi-warn2.diff delete mode 100644 debian/patches/gcc-elfv2-abi-warn3.diff delete mode 100644 debian/patches/gcc-elfv2-abi-warn4.diff (limited to 'debian') diff --git a/debian/changelog b/debian/changelog index 47c7ab6..cf571ce 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,6 +1,6 @@ -gcc-4.9 (4.9.1-2) UNRELEASED; urgency=medium +gcc-4.9 (4.9.1-2) unstable; urgency=medium - * Update to SVN 20140724 (r212995) from the gcc-4_9-branch. + * Update to SVN 20140724 (r213031) from the gcc-4_9-branch. * Fix installing test logs and summaries. * Warn about ppc ELFv2 ABI issues, which will change in GCC 4.10. @@ -8,7 +8,7 @@ gcc-4.9 (4.9.1-2) UNRELEASED; urgency=medium * Build libphobos on armel and armhf. Closes: #755390. * Update the Linaro support to the 4.9-2014.07 release. - -- Matthias Klose Thu, 24 Jul 2014 16:47:07 +0200 + -- Matthias Klose Thu, 24 Jul 2014 23:59:49 +0200 gcc-4.9 (4.9.1-1) unstable; urgency=medium diff --git a/debian/patches/gcc-elfv2-abi-warn1.diff b/debian/patches/gcc-elfv2-abi-warn1.diff deleted file mode 100644 index 437ba17..0000000 --- a/debian/patches/gcc-elfv2-abi-warn1.diff +++ /dev/null @@ -1,99 +0,0 @@ -# DP: ppc64el, fix ELFv2 homogeneous float aggregate ABI bug - -Subject: [PATCH, rs6000, 4.8/4.9] Fix ELFv2 homogeneous float aggregate ABI bug - -Hello, - -this is the variant intended for the 4.8/4.9 branches of the patch: -https://gcc.gnu.org/ml/gcc-patches/2014-07/msg00994.html - -As discussed, it does *not* actually change ABI, but only warn when -encountering a situation where the ABI will change in a future GCC. -(Avoiding the specific term "GCC 4.10" here since I'm not certain -whether the next GCC release will in fact be called that ...) - -Tested on powerpc64-linux and powerpc64le-linux; also verified using -the ABI compat suite (against an unpatched GCC) that this patch does -not change the ABI. - -OK for 4.8/4.9 once the mainline patch is in? - -Bye, -Ulrich - - -gcc/ChangeLog: - - * config/rs6000/rs6000.c (rs6000_function_arg): If a float argument - does not fit fully into floating-point registers, and there is still - space in the register parameter area, issue -Wpsabi note that the ABI - will change in a future GCC release. - -gcc/testsuite/ChangeLog: - - * gcc.target/powerpc/ppc64-abi-warn-1.c: New test. - - ---- a/src/gcc/config/rs6000/rs6000.c -+++ b/src/gcc/config/rs6000/rs6000.c -@@ -10225,6 +10225,7 @@ rs6000_function_arg (cumulative_args_t c - rtx r, off; - int i, k = 0; - unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3; -+ int fpr_words; - - /* Do we also need to pass this argument in the parameter - save area? */ -@@ -10253,6 +10254,37 @@ rs6000_function_arg (cumulative_args_t c - rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off); - } - -+ /* If there were not enough FPRs to hold the argument, the rest -+ usually goes into memory. However, if the current position -+ is still within the register parameter area, a portion may -+ actually have to go into GPRs. -+ -+ Note that it may happen that the portion of the argument -+ passed in the first "half" of the first GPR was already -+ passed in the last FPR as well. -+ -+ For unnamed arguments, we already set up GPRs to cover the -+ whole argument in rs6000_psave_function_arg, so there is -+ nothing further to do at this point. -+ -+ GCC 4.8/4.9 Note: This was implemented incorrectly in earlier -+ GCC releases. To avoid any ABI change on the release branch, -+ we retain that original implementation here, but warn if we -+ encounter a case where the ABI will change in the future. */ -+ fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8); -+ if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG -+ && cum->nargs_prototype > 0) -+ { -+ static bool warned; -+ if (!warned && warn_psabi) -+ { -+ warned = true; -+ inform (input_location, -+ "the ABI of passing homogeneous float aggregates" -+ " will change in a future GCC release"); -+ } -+ } -+ - return rs6000_finish_function_arg (mode, rvec, k); - } - else if (align_words < GP_ARG_NUM_REG) ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/powerpc/ppc64-abi-warn-1.c -@@ -0,0 +1,12 @@ -+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */ -+/* { dg-options "-mabi=elfv2" } */ -+ -+struct f8 -+ { -+ float x[8]; -+ }; -+ -+void test (struct f8 a, struct f8 b) /* { dg-message "note: the ABI of passing homogeneous float aggregates will change" } */ -+{ -+} -+ diff --git a/debian/patches/gcc-elfv2-abi-warn2.diff b/debian/patches/gcc-elfv2-abi-warn2.diff deleted file mode 100644 index 5309998..0000000 --- a/debian/patches/gcc-elfv2-abi-warn2.diff +++ /dev/null @@ -1,106 +0,0 @@ -# DP: ppc64el, fix aggregate alignment ABI issue - -this is the variant intended for the 4.8/4.9 branches of the patch: -https://gcc.gnu.org/ml/gcc-patches/2014-07/msg00995.html - -As discussed, it does *not* actually change ABI, but only warn when -encountering a situation where the ABI will change in a future GCC. -(Avoiding the specific term "GCC 4.10" here since I'm not certain -whether the next GCC release will in fact be called that ...) - -Tested on powerpc64-linux and powerpc64le-linux; also verified using -the ABI compat suite (against an unpatched GCC) that this patch does -not change the ABI. - -OK for 4.8/4.9 once the mainline patch is in? - -Bye, -Ulrich - - -gcc/ChangeLog: - - * config/rs6000/rs6000.c (rs6000_function_arg_boundary): Issue - -Wpsabi note when encountering a type where future GCC releases - will apply different alignment requirements. - -gcc/testsuite/ChangeLog: - - * gcc.target/powerpc/ppc64-abi-warn-2.c: New test. - - ---- a/src/gcc/config/rs6000/rs6000.c -+++ b/src/gcc/config/rs6000/rs6000.c -@@ -9180,14 +9180,51 @@ rs6000_function_arg_boundary (enum machi - || (type && TREE_CODE (type) == VECTOR_TYPE - && int_size_in_bytes (type) >= 16)) - return 128; -- else if (((TARGET_MACHO && rs6000_darwin64_abi) -- || DEFAULT_ABI == ABI_ELFv2 -- || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)) -- && mode == BLKmode -- && type && TYPE_ALIGN (type) > 64) -+ -+ /* Aggregate types that need > 8 byte alignment are quadword-aligned -+ in the parameter area in the ELFv2 ABI, and in the AIX ABI unless -+ -mcompat-align-parm is used. */ -+ if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm) -+ || DEFAULT_ABI == ABI_ELFv2) -+ && type && TYPE_ALIGN (type) > 64) -+ { -+ /* "Aggregate" means any AGGREGATE_TYPE except for single-element -+ or homogeneous float/vector aggregates here. We already handled -+ vector aggregates above, but still need to check for float here. */ -+ bool aggregate_p = (AGGREGATE_TYPE_P (type) -+ && !SCALAR_FLOAT_MODE_P (elt_mode)); -+ -+ /* We used to check for BLKmode instead of the above aggregate type -+ check. Warn when this results in any difference to the ABI. */ -+ if (aggregate_p != (mode == BLKmode)) -+ { -+ static bool warned; -+ if (!warned && warn_psabi) -+ { -+ warned = true; -+ inform (input_location, -+ "the ABI of passing aggregates with %d-byte alignment" -+ " will change in a future GCC release", -+ (int) TYPE_ALIGN (type) / BITS_PER_UNIT); -+ } -+ } -+ -+ /* GCC 4.8/4.9 Note: To avoid any ABI change on a release branch, we -+ keep using the BLKmode check, but warn if there will be differences -+ in future GCC releases. */ -+ if (mode == BLKmode) -+ return 128; -+ } -+ -+ /* Similar for the Darwin64 ABI. Note that for historical reasons we -+ implement the "aggregate type" check as a BLKmode check here; this -+ means certain aggregate types are in fact not aligned. */ -+ if (TARGET_MACHO && rs6000_darwin64_abi -+ && mode == BLKmode -+ && type && TYPE_ALIGN (type) > 64) - return 128; -- else -- return PARM_BOUNDARY; -+ -+ return PARM_BOUNDARY; - } - - /* The offset in words to the start of the parameter save area. */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/powerpc/ppc64-abi-warn-2.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */ -+ -+struct test -+ { -+ long a __attribute__((aligned (16))); -+ }; -+ -+void test (struct test a) /* { dg-message "note: the ABI of passing aggregates with 16-byte alignment will change" } */ -+{ -+} -+ diff --git a/debian/patches/gcc-elfv2-abi-warn3.diff b/debian/patches/gcc-elfv2-abi-warn3.diff deleted file mode 100644 index 637d8d7..0000000 --- a/debian/patches/gcc-elfv2-abi-warn3.diff +++ /dev/null @@ -1,158 +0,0 @@ -# DP: ppc64*, fix alignment of non-Altivec vector struct fields - -this is the variant intended for the 4.8/4.9 branches of the patch: -https://gcc.gnu.org/ml/gcc-patches/2014-07/msg01072.html - -As discussed, it does *not* actually change ABI, but only warn when -encountering a situation where the ABI will change in a future GCC. -(Avoiding the specific term "GCC 4.10" here since I'm not certain -whether the next GCC release will in fact be called that ...) - -Tested on powerpc64-linux and powerpc64le-linux; also verified using -the ABI compat suite (against an unpatched GCC) that this patch does -not change the ABI. - -OK for 4.8/4.9 once the mainline patch is in? - -Bye, -Ulrich - - -gcc/ChangeLog: - - * config/rs6000/rs6000-protos.h (rs6000_special_adjust_field_align_p): - Add prototype. - * config/rs6000/rs6000.c (rs6000_special_adjust_field_align_p): New - function. Issue -Wpsabi warning if future GCC releases will use - different field alignment rules for this type. - * config/rs6000/sysv4.h (ADJUST_FIELD_ALIGN): Call it. - * config/rs6000/linux64.h (ADJUST_FIELD_ALIGN): Likewise. - * config/rs6000/freebsd64.h (ADJUST_FIELD_ALIGN): Likewise. - -gcc/testsuite/ChangeLog: - - * gcc.target/powerpc/ppc64-abi-warn-3.c: New test. - - * gcc.c-torture/execute/20050316-1.x: Add -Wno-psabi. - * gcc.c-torture/execute/20050604-1.x: Add -Wno-psabi. - * gcc.c-torture/execute/20050316-3.x: New file. Add -Wno-psabi. - * gcc.c-torture/execute/pr23135.x: Likewise. - ---- a/src/gcc/config/rs6000/rs6000-protos.h -+++ b/src/gcc/config/rs6000/rs6000-protos.h -@@ -155,6 +155,7 @@ extern void rs6000_split_logical (rtx [] - - #ifdef TREE_CODE - extern unsigned int rs6000_data_alignment (tree, unsigned int, enum data_align); -+extern bool rs6000_special_adjust_field_align_p (tree, unsigned int); - extern unsigned int rs6000_special_round_type_align (tree, unsigned int, - unsigned int); - extern unsigned int darwin_rs6000_special_round_type_align (tree, unsigned int, ---- a/src/gcc/config/rs6000/rs6000.c -+++ b/src/gcc/config/rs6000/rs6000.c -@@ -5871,6 +5871,34 @@ rs6000_data_alignment (tree type, unsign - return align; - } - -+/* Previous GCC releases forced all vector types to have 16-byte alignment. */ -+ -+bool -+rs6000_special_adjust_field_align_p (tree field, unsigned int computed) -+{ -+ if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE) -+ { -+ if (computed != 128) -+ { -+ static bool warned; -+ if (!warned && warn_psabi) -+ { -+ warned = true; -+ inform (input_location, -+ "the layout of aggregates containing vectors with" -+ " %d-byte alignment will change in a future GCC release", -+ computed / BITS_PER_UNIT); -+ } -+ } -+ /* GCC 4.8/4.9 Note: To avoid any ABI change on a release branch, we -+ keep the special treatment of vector types, but warn if there will -+ be differences in future GCC releases. */ -+ return true; -+ } -+ -+ return false; -+} -+ - /* AIX increases natural record alignment to doubleword if the first - field is an FP double while the FP fields remain word aligned. */ - ---- a/src/gcc/config/rs6000/sysv4.h -+++ b/src/gcc/config/rs6000/sysv4.h -@@ -292,7 +292,7 @@ do { \ - /* An expression for the alignment of a structure field FIELD if the - alignment computed in the usual way is COMPUTED. */ - #define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) \ -- ((TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (FIELD)) == VECTOR_TYPE) \ -+ (rs6000_special_adjust_field_align_p ((FIELD), (COMPUTED)) \ - ? 128 : COMPUTED) - - #undef BIGGEST_FIELD_ALIGNMENT ---- a/src/gcc/config/rs6000/linux64.h -+++ b/src/gcc/config/rs6000/linux64.h -@@ -246,7 +246,7 @@ extern int dot_symbols; - /* PowerPC64 Linux word-aligns FP doubles when -malign-power is given. */ - #undef ADJUST_FIELD_ALIGN - #define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) \ -- ((TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (FIELD)) == VECTOR_TYPE) \ -+ (rs6000_special_adjust_field_align_p ((FIELD), (COMPUTED)) \ - ? 128 \ - : (TARGET_64BIT \ - && TARGET_ALIGN_NATURAL == 0 \ ---- a/src/gcc/config/rs6000/freebsd64.h -+++ b/src/gcc/config/rs6000/freebsd64.h -@@ -367,7 +367,7 @@ extern int dot_symbols; - /* PowerPC64 Linux word-aligns FP doubles when -malign-power is given. */ - #undef ADJUST_FIELD_ALIGN - #define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) \ -- ((TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (FIELD)) == VECTOR_TYPE) \ -+ (rs6000_special_adjust_field_align_p ((FIELD), (COMPUTED)) \ - ? 128 \ - : (TARGET_64BIT \ - && TARGET_ALIGN_NATURAL == 0 \ ---- a/src/gcc/testsuite/gcc.c-torture/execute/20050316-1.x -+++ b/src/gcc/testsuite/gcc.c-torture/execute/20050316-1.x -@@ -4,4 +4,5 @@ if { [check_effective_target_int16] } { - return 1 - } - -+set additional_flags "-Wno-psabi" - return 0; ---- /dev/null -+++ b/src/gcc/testsuite/gcc.c-torture/execute/20050316-3.x -@@ -0,0 +1,2 @@ -+set additional_flags "-Wno-psabi" -+return 0 ---- a/src/gcc/testsuite/gcc.c-torture/execute/20050604-1.x -+++ b/src/gcc/testsuite/gcc.c-torture/execute/20050604-1.x -@@ -6,4 +6,5 @@ if { [istarget "i?86-*-*"] || [istarget - set additional_flags "-mno-mmx" - } - -+set additional_flags "-Wno-psabi" - return 0 ---- /dev/null -+++ b/src/gcc/testsuite/gcc.c-torture/execute/pr23135.x -@@ -0,0 +1,2 @@ -+set additional_flags "-Wno-psabi" -+return 0 ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/powerpc/ppc64-abi-warn-3.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */ -+/* { dg-require-effective-target powerpc_altivec_ok } */ -+/* { dg-options "-maltivec" } */ -+ -+struct test -+ { -+ int a __attribute__((vector_size (8))); -+ }; /* { dg-message "note: the layout of aggregates containing vectors with 8-byte alignment will change" } */ -+ diff --git a/debian/patches/gcc-elfv2-abi-warn4.diff b/debian/patches/gcc-elfv2-abi-warn4.diff deleted file mode 100644 index 5b11ece..0000000 --- a/debian/patches/gcc-elfv2-abi-warn4.diff +++ /dev/null @@ -1,56 +0,0 @@ -# DP: Reliably prune GCC notes in C++ compat suite - -in testing the rs6000 ABI patches I noted a weird effect: usually, the --Wpsabi warning notes are ignored in the compat test suites, so we get -a clean test run anyway. - -However, when running the C++ version of the struct-layout-1.exp case -*alone* (using RUNTESTFLAGS=struct-layout-1.exp), suddenly tests are -failing because of those extra notes. This does *not* happen with -the C version of that suite ... - -It turns out that that pruning those notes is supposed to happen -from within gcc-defs.exp:${tool}_check_compile: - if { [info proc ${tool}-dg-prune] != "" } { - global target_triplet - set gcc_output [${tool}-dg-prune $target_triplet $gcc_output] - } - -However, the g++-dg-prune routine is defined in g++-dg.exp, which -is never included from g++.dg/compat/struct-layout-1.exp (directly -or indirectly). Now, when running the full suite, that file would -have been loaded by some earlier g++.dg .exp file, so everything -works out. But when running struct-layout-1.exp stand-alone, the -g++-dg-prune routine is never defined and thus silently no pruning -takes place. - -To fix this, the following patch simply loads g++-dg.exp directly -from g++.dg/compat/struct-layout-1.exp. - -Tested on powerpc64-linux and powerpc64le-linux. - -OK for mainline (and 4.8/4.9 once the rs6000 ABI patches are -backported there)? - -Bye, -Ulrich - - -gcc/testsuite/ChangeLog: - - * g++.dg/compat/struct-layout-1.exp: Load g++-dg.exp - - ---- a/src/gcc/testsuite/g++.dg/compat/struct-layout-1.exp -+++ b/src/gcc/testsuite/g++.dg/compat/struct-layout-1.exp -@@ -89,6 +89,9 @@ proc compat-use-tst-compiler { } { - # This must be done after the compat-use-*-compiler definitions. - load_lib compat.exp - -+# Provide the g++-dg-prune routine (gcc-dp.exp is loaded by compat.exp) -+load_lib g++-dg.exp -+ - g++_init - - # Save variables for the C++ compiler under test, which each test will - diff --git a/debian/patches/svn-updates.diff b/debian/patches/svn-updates.diff index 5d2874f..c8050f7 100644 --- a/debian/patches/svn-updates.diff +++ b/debian/patches/svn-updates.diff @@ -1,10 +1,10 @@ -# DP: updates from the 4.9 branch upto 20140724 (r212995). +# DP: updates from the 4.9 branch upto 20140724 (r213031). last_update() { cat > ${dir}LAST_UPDATED ++ ++ * config/aarch64/aarch64-linux.h (TARGET_ASM_FILE_END): Define. ++ ++2014-07-24 Ulrich Weigand ++ ++ * config/rs6000/rs6000-protos.h (rs6000_special_adjust_field_align_p): ++ Add prototype. ++ * config/rs6000/rs6000.c (rs6000_special_adjust_field_align_p): New ++ function. Issue -Wpsabi warning if future GCC releases will use ++ different field alignment rules for this type. ++ * config/rs6000/sysv4.h (ADJUST_FIELD_ALIGN): Call it. ++ * config/rs6000/linux64.h (ADJUST_FIELD_ALIGN): Likewise. ++ * config/rs6000/freebsd64.h (ADJUST_FIELD_ALIGN): Likewise. ++ ++2014-07-24 Ulrich Weigand ++ ++ * config/rs6000/rs6000.c (rs6000_function_arg_boundary): Issue ++ -Wpsabi note when encountering a type where future GCC releases ++ will apply different alignment requirements. ++ ++2014-07-24 Ulrich Weigand ++ ++ * config/rs6000/rs6000.c (rs6000_function_arg): If a float argument ++ does not fit fully into floating-point registers, and there is still ++ space in the register parameter area, issue -Wpsabi note that the ABI ++ will change in a future GCC release. ++ +2014-07-23 Sebastian Huber + + * config/arm/t-rtems-eabi: Add @@ -236,7 +264,7 @@ Index: gcc/ChangeLog 2014-07-16 Release Manager * GCC 4.9.1 released. -@@ -4,14 +142,14 @@ +@@ -4,14 +170,14 @@ 2014-07-10 Cary Coutant @@ -255,7 +283,7 @@ Index: gcc/ChangeLog 2014-07-10 Tom G. Christensen -@@ -33,13 +171,13 @@ +@@ -33,13 +199,13 @@ PR target/61062 * config/arm/arm_neon.h (vtrn_s8, vtrn_s16, vtrn_u8, vtrn_u16, vtrn_p8, vtrn_p16, vtrn_s32, vtrn_f32, vtrn_u32, vtrnq_s8, vtrnq_s16, vtrnq_s32, @@ -276,7 +304,7 @@ Index: gcc/ChangeLog 2014-07-09 Alan Lawrence -@@ -157,11 +295,9 @@ +@@ -157,11 +323,9 @@ 2014-06-24 Jakub Jelinek * gimplify.c (gimplify_scan_omp_clauses) @@ -302,7 +330,7 @@ Index: gcc/ChangeLog (struct gimplify_adjust_omp_clauses_data): New type. (gimplify_adjust_omp_clauses_1): Adjust for data being a struct gimplify_adjust_omp_clauses_data pointer instead -@@ -196,14 +331,12 @@ +@@ -196,14 +359,12 @@ gimple_seq * argument to omp_finish_clause hook. * omp-low.c (scan_sharing_clauses): Call scan_omp_op on non-DECL_P OMP_CLAUSE_DECL if ctx->outer. @@ -320,7 +348,7 @@ Index: gcc/ChangeLog 2014-06-10 Jakub Jelinek -@@ -227,8 +360,7 @@ +@@ -227,8 +388,7 @@ OMP_CLAUSE_LINEAR_STMT. * omp-low.c (lower_rec_input_clauses): Fix typo. (maybe_add_implicit_barrier_cancel, lower_omp_1): Add @@ -330,7 +358,7 @@ Index: gcc/ChangeLog 2014-06-30 Jason Merrill -@@ -279,8 +411,7 @@ +@@ -279,8 +439,7 @@ (aarch64_sqdmlsl_lane): Likewise. (aarch64_sqdmull_lane): Likewise. (aarch64_sqdmull2_lane): Likewise. @@ -340,6 +368,53 @@ Index: gcc/ChangeLog Emit aarch64_sqdmlal_laneq_internal insn. (aarch64_sqdmlal2_laneq): Emit aarch64_sqdmlal2_laneq_internal insn. +Index: gcc/testsuite/gcc.target/powerpc/ppc64-abi-warn-1.c +=================================================================== +--- a/src/gcc/testsuite/gcc.target/powerpc/ppc64-abi-warn-1.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.target/powerpc/ppc64-abi-warn-1.c (.../branches/gcc-4_9-branch) +@@ -0,0 +1,12 @@ ++/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */ ++/* { dg-options "-mabi=elfv2" } */ ++ ++struct f8 ++ { ++ float x[8]; ++ }; ++ ++void test (struct f8 a, struct f8 b) /* { dg-message "note: the ABI of passing homogeneous float aggregates will change" } */ ++{ ++} ++ +Index: gcc/testsuite/gcc.target/powerpc/ppc64-abi-warn-2.c +=================================================================== +--- a/src/gcc/testsuite/gcc.target/powerpc/ppc64-abi-warn-2.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.target/powerpc/ppc64-abi-warn-2.c (.../branches/gcc-4_9-branch) +@@ -0,0 +1,11 @@ ++/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */ ++ ++struct test ++ { ++ long a __attribute__((aligned (16))); ++ }; ++ ++void test (struct test a) /* { dg-message "note: the ABI of passing aggregates with 16-byte alignment will change" } */ ++{ ++} ++ +Index: gcc/testsuite/gcc.target/powerpc/ppc64-abi-warn-3.c +=================================================================== +--- a/src/gcc/testsuite/gcc.target/powerpc/ppc64-abi-warn-3.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.target/powerpc/ppc64-abi-warn-3.c (.../branches/gcc-4_9-branch) +@@ -0,0 +1,9 @@ ++/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */ ++/* { dg-require-effective-target powerpc_altivec_ok } */ ++/* { dg-options "-maltivec" } */ ++ ++struct test ++ { ++ int a __attribute__((vector_size (8))); ++ }; /* { dg-message "note: the layout of aggregates containing vectors with 8-byte alignment will change" } */ ++ Index: gcc/testsuite/gcc.target/i386/pr61855.c =================================================================== --- a/src/gcc/testsuite/gcc.target/i386/pr61855.c (.../tags/gcc_4_9_1_release) @@ -413,6 +488,40 @@ Index: gcc/testsuite/gfortran.dg/dependency_44.f90 +! print *, res2 + if (any(res1 /= res2)) call abort () +end program prgm3 +Index: gcc/testsuite/gcc.c-torture/execute/pr23135.x +=================================================================== +--- a/src/gcc/testsuite/gcc.c-torture/execute/pr23135.x (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.c-torture/execute/pr23135.x (.../branches/gcc-4_9-branch) +@@ -0,0 +1,2 @@ ++set additional_flags "-Wno-psabi" ++return 0 +Index: gcc/testsuite/gcc.c-torture/execute/20050604-1.x +=================================================================== +--- a/src/gcc/testsuite/gcc.c-torture/execute/20050604-1.x (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.c-torture/execute/20050604-1.x (.../branches/gcc-4_9-branch) +@@ -6,4 +6,5 @@ + set additional_flags "-mno-mmx" + } + ++set additional_flags "-Wno-psabi" + return 0 +Index: gcc/testsuite/gcc.c-torture/execute/20050316-1.x +=================================================================== +--- a/src/gcc/testsuite/gcc.c-torture/execute/20050316-1.x (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.c-torture/execute/20050316-1.x (.../branches/gcc-4_9-branch) +@@ -4,4 +4,5 @@ + return 1 + } + ++set additional_flags "-Wno-psabi" + return 0; +Index: gcc/testsuite/gcc.c-torture/execute/20050316-3.x +=================================================================== +--- a/src/gcc/testsuite/gcc.c-torture/execute/20050316-3.x (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.c-torture/execute/20050316-3.x (.../branches/gcc-4_9-branch) +@@ -0,0 +1,2 @@ ++set additional_flags "-Wno-psabi" ++return 0 Index: gcc/testsuite/gnat.dg/pack20.adb =================================================================== --- a/src/gcc/testsuite/gnat.dg/pack20.adb (.../tags/gcc_4_9_1_release) @@ -528,7 +637,40 @@ Index: gcc/testsuite/ChangeLog =================================================================== --- a/src/gcc/testsuite/ChangeLog (.../tags/gcc_4_9_1_release) +++ b/src/gcc/testsuite/ChangeLog (.../branches/gcc-4_9-branch) -@@ -1,3 +1,61 @@ +@@ -1,3 +1,94 @@ ++2014-07-24 Ulrich Weigand ++ ++ Backport from mainline ++ 2014-07-24 Ulrich Weigand ++ ++ * gcc.target/powerpc/ppc64-abi-warn-3.c: New test. ++ ++ * gcc.c-torture/execute/20050316-1.x: Add -Wno-psabi. ++ * gcc.c-torture/execute/20050604-1.x: Add -Wno-psabi. ++ * gcc.c-torture/execute/20050316-3.x: New file. Add -Wno-psabi. ++ * gcc.c-torture/execute/pr23135.x: Likewise. ++ ++2014-07-24 Ulrich Weigand ++ ++ Backport from mainline ++ 2014-07-24 Ulrich Weigand ++ ++ * gcc.target/powerpc/ppc64-abi-warn-2.c: New test. ++ ++2014-07-24 Ulrich Weigand ++ ++ Backport from mainline ++ 2014-07-24 Ulrich Weigand ++ ++ * gcc.target/powerpc/ppc64-abi-warn-1.c: New test. ++ ++2014-07-24 Ulrich Weigand ++ ++ Backport from mainline ++ 2014-07-24 Ulrich Weigand ++ ++ * g++.dg/compat/struct-layout-1.exp: Load g++-dg.exp. ++ +2014-07-24 Martin Jambor + + PR ipa/61160 @@ -590,7 +732,7 @@ Index: gcc/testsuite/ChangeLog 2014-07-16 Release Manager * GCC 4.9.1 released. -@@ -17,7 +75,8 @@ +@@ -17,7 +108,8 @@ 2014-06-09 Alan Lawrence PR target/61062 @@ -600,7 +742,7 @@ Index: gcc/testsuite/ChangeLog 2014-07-08 Jakub Jelinek -@@ -34,8 +93,8 @@ +@@ -34,8 +126,8 @@ 2014-07-08 Alan Lawrence @@ -611,6 +753,20 @@ Index: gcc/testsuite/ChangeLog PR target/59843 * gcc.dg/vect/vect-singleton_1.c: New file. +Index: gcc/testsuite/g++.dg/compat/struct-layout-1.exp +=================================================================== +--- a/src/gcc/testsuite/g++.dg/compat/struct-layout-1.exp (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/g++.dg/compat/struct-layout-1.exp (.../branches/gcc-4_9-branch) +@@ -89,6 +89,9 @@ + # This must be done after the compat-use-*-compiler definitions. + load_lib compat.exp + ++# Provide the g++-dg-prune routine (gcc-dp.exp is loaded by compat.exp) ++load_lib g++-dg.exp ++ + g++_init + + # Save variables for the C++ compiler under test, which each test will Index: gcc/testsuite/g++.dg/ipa/pr61160-1.C =================================================================== --- a/src/gcc/testsuite/g++.dg/ipa/pr61160-1.C (.../tags/gcc_4_9_1_release) @@ -1759,10 +1915,212 @@ Index: gcc/config/cris/cris-protos.h extern void cris_order_for_addsi3 (rtx *, int); extern void cris_emit_trap_for_misalignment (rtx); #endif /* RTX_CODE */ +Index: gcc/config/aarch64/aarch64-linux.h +=================================================================== +--- a/src/gcc/config/aarch64/aarch64-linux.h (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/aarch64/aarch64-linux.h (.../branches/gcc-4_9-branch) +@@ -44,4 +44,6 @@ + } \ + while (0) + ++#define TARGET_ASM_FILE_END file_end_indicate_exec_stack ++ + #endif /* GCC_AARCH64_LINUX_H */ +Index: gcc/config/rs6000/freebsd64.h +=================================================================== +--- a/src/gcc/config/rs6000/freebsd64.h (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/rs6000/freebsd64.h (.../branches/gcc-4_9-branch) +@@ -367,7 +367,7 @@ + /* PowerPC64 Linux word-aligns FP doubles when -malign-power is given. */ + #undef ADJUST_FIELD_ALIGN + #define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) \ +- ((TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (FIELD)) == VECTOR_TYPE) \ ++ (rs6000_special_adjust_field_align_p ((FIELD), (COMPUTED)) \ + ? 128 \ + : (TARGET_64BIT \ + && TARGET_ALIGN_NATURAL == 0 \ +Index: gcc/config/rs6000/rs6000-protos.h +=================================================================== +--- a/src/gcc/config/rs6000/rs6000-protos.h (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/rs6000/rs6000-protos.h (.../branches/gcc-4_9-branch) +@@ -155,6 +155,7 @@ + + #ifdef TREE_CODE + extern unsigned int rs6000_data_alignment (tree, unsigned int, enum data_align); ++extern bool rs6000_special_adjust_field_align_p (tree, unsigned int); + extern unsigned int rs6000_special_round_type_align (tree, unsigned int, + unsigned int); + extern unsigned int darwin_rs6000_special_round_type_align (tree, unsigned int, +Index: gcc/config/rs6000/linux64.h +=================================================================== +--- a/src/gcc/config/rs6000/linux64.h (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/rs6000/linux64.h (.../branches/gcc-4_9-branch) +@@ -246,7 +246,7 @@ + /* PowerPC64 Linux word-aligns FP doubles when -malign-power is given. */ + #undef ADJUST_FIELD_ALIGN + #define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) \ +- ((TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (FIELD)) == VECTOR_TYPE) \ ++ (rs6000_special_adjust_field_align_p ((FIELD), (COMPUTED)) \ + ? 128 \ + : (TARGET_64BIT \ + && TARGET_ALIGN_NATURAL == 0 \ +Index: gcc/config/rs6000/rs6000.c +=================================================================== +--- a/src/gcc/config/rs6000/rs6000.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/rs6000/rs6000.c (.../branches/gcc-4_9-branch) +@@ -5871,6 +5871,34 @@ + return align; + } + ++/* Previous GCC releases forced all vector types to have 16-byte alignment. */ ++ ++bool ++rs6000_special_adjust_field_align_p (tree field, unsigned int computed) ++{ ++ if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE) ++ { ++ if (computed != 128) ++ { ++ static bool warned; ++ if (!warned && warn_psabi) ++ { ++ warned = true; ++ inform (input_location, ++ "the layout of aggregates containing vectors with" ++ " %d-byte alignment will change in a future GCC release", ++ computed / BITS_PER_UNIT); ++ } ++ } ++ /* GCC 4.8/4.9 Note: To avoid any ABI change on a release branch, we ++ keep the special treatment of vector types, but warn if there will ++ be differences in future GCC releases. */ ++ return true; ++ } ++ ++ return false; ++} ++ + /* AIX increases natural record alignment to doubleword if the first + field is an FP double while the FP fields remain word aligned. */ + +@@ -9180,14 +9208,51 @@ + || (type && TREE_CODE (type) == VECTOR_TYPE + && int_size_in_bytes (type) >= 16)) + return 128; +- else if (((TARGET_MACHO && rs6000_darwin64_abi) +- || DEFAULT_ABI == ABI_ELFv2 +- || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)) +- && mode == BLKmode +- && type && TYPE_ALIGN (type) > 64) ++ ++ /* Aggregate types that need > 8 byte alignment are quadword-aligned ++ in the parameter area in the ELFv2 ABI, and in the AIX ABI unless ++ -mcompat-align-parm is used. */ ++ if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm) ++ || DEFAULT_ABI == ABI_ELFv2) ++ && type && TYPE_ALIGN (type) > 64) ++ { ++ /* "Aggregate" means any AGGREGATE_TYPE except for single-element ++ or homogeneous float/vector aggregates here. We already handled ++ vector aggregates above, but still need to check for float here. */ ++ bool aggregate_p = (AGGREGATE_TYPE_P (type) ++ && !SCALAR_FLOAT_MODE_P (elt_mode)); ++ ++ /* We used to check for BLKmode instead of the above aggregate type ++ check. Warn when this results in any difference to the ABI. */ ++ if (aggregate_p != (mode == BLKmode)) ++ { ++ static bool warned; ++ if (!warned && warn_psabi) ++ { ++ warned = true; ++ inform (input_location, ++ "the ABI of passing aggregates with %d-byte alignment" ++ " will change in a future GCC release", ++ (int) TYPE_ALIGN (type) / BITS_PER_UNIT); ++ } ++ } ++ ++ /* GCC 4.8/4.9 Note: To avoid any ABI change on a release branch, we ++ keep using the BLKmode check, but warn if there will be differences ++ in future GCC releases. */ ++ if (mode == BLKmode) ++ return 128; ++ } ++ ++ /* Similar for the Darwin64 ABI. Note that for historical reasons we ++ implement the "aggregate type" check as a BLKmode check here; this ++ means certain aggregate types are in fact not aligned. */ ++ if (TARGET_MACHO && rs6000_darwin64_abi ++ && mode == BLKmode ++ && type && TYPE_ALIGN (type) > 64) + return 128; +- else +- return PARM_BOUNDARY; ++ ++ return PARM_BOUNDARY; + } + + /* The offset in words to the start of the parameter save area. */ +@@ -10225,6 +10290,7 @@ + rtx r, off; + int i, k = 0; + unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3; ++ int fpr_words; + + /* Do we also need to pass this argument in the parameter + save area? */ +@@ -10253,6 +10319,37 @@ + rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off); + } + ++ /* If there were not enough FPRs to hold the argument, the rest ++ usually goes into memory. However, if the current position ++ is still within the register parameter area, a portion may ++ actually have to go into GPRs. ++ ++ Note that it may happen that the portion of the argument ++ passed in the first "half" of the first GPR was already ++ passed in the last FPR as well. ++ ++ For unnamed arguments, we already set up GPRs to cover the ++ whole argument in rs6000_psave_function_arg, so there is ++ nothing further to do at this point. ++ ++ GCC 4.8/4.9 Note: This was implemented incorrectly in earlier ++ GCC releases. To avoid any ABI change on the release branch, ++ we retain that original implementation here, but warn if we ++ encounter a case where the ABI will change in the future. */ ++ fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8); ++ if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG ++ && cum->nargs_prototype > 0) ++ { ++ static bool warned; ++ if (!warned && warn_psabi) ++ { ++ warned = true; ++ inform (input_location, ++ "the ABI of passing homogeneous float aggregates" ++ " will change in a future GCC release"); ++ } ++ } ++ + return rs6000_finish_function_arg (mode, rvec, k); + } + else if (align_words < GP_ARG_NUM_REG) Index: gcc/config/rs6000/sysv4.h =================================================================== --- a/src/gcc/config/rs6000/sysv4.h (.../tags/gcc_4_9_1_release) +++ b/src/gcc/config/rs6000/sysv4.h (.../branches/gcc-4_9-branch) +@@ -292,7 +292,7 @@ + /* An expression for the alignment of a structure field FIELD if the + alignment computed in the usual way is COMPUTED. */ + #define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) \ +- ((TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (FIELD)) == VECTOR_TYPE) \ ++ (rs6000_special_adjust_field_align_p ((FIELD), (COMPUTED)) \ + ? 128 : COMPUTED) + + #undef BIGGEST_FIELD_ALIGNMENT @@ -949,3 +949,19 @@ #define TARGET_USES_SYSV4_OPT 1 diff --git a/debian/rules.patch b/debian/rules.patch index 829f6b7..25c1c59 100644 --- a/debian/rules.patch +++ b/debian/rules.patch @@ -84,10 +84,6 @@ debian_patches += \ pr61126 \ gcc-setmultilib-fix \ sparc64-biarch-long-double-128 \ - gcc-elfv2-abi-warn1 \ - gcc-elfv2-abi-warn2 \ - gcc-elfv2-abi-warn3 \ - gcc-elfv2-abi-warn4 \ # FIXME: only needed for isl-0.13 for now # isl-0.13-compat \ -- cgit v1.2.3 From 74221fc9b62869f7e00e92a149bc73ed59ca7ab1 Mon Sep 17 00:00:00 2001 From: doko Date: Thu, 24 Jul 2014 23:43:50 +0000 Subject: - fix the aarch64 build with the gfdl docs included git-svn-id: svn://svn.debian.org/svn/gcccvs/branches/sid/gcc-4.9@7528 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca --- debian/rules.patch | 2 +- debian/rules.unpack | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'debian') diff --git a/debian/rules.patch b/debian/rules.patch index 25c1c59..6d1de3c 100644 --- a/debian/rules.patch +++ b/debian/rules.patch @@ -25,9 +25,9 @@ endif ifneq ($(GFDL_INVARIANT_FREE),yes) debian_patches += \ + $(if $(with_linaro_branch),gcc-linaro-doc) \ rename-info-files \ -# $(if $(with_linaro_branch),gcc-linaro-doc) \ # svn-doc-updates \ # $(if $(with_linaro_branch),,svn-doc-updates) \ diff --git a/debian/rules.unpack b/debian/rules.unpack index 85ead95..f824987 100644 --- a/debian/rules.unpack +++ b/debian/rules.unpack @@ -27,6 +27,7 @@ debian-chmod: gfdl_texinfo_files = \ gcc/doc/arm-neon-intrinsics.texi \ gcc/doc/arm-acle-intrinsics.texi \ + gcc/doc/aarch64-acle-intrinsics.texi \ gcc/doc/avr-mmcu.texi \ gcc/doc/bugreport.texi \ gcc/doc/cfg.texi \ -- cgit v1.2.3 From ceb2eeb7aa975eb7d69a09b9df5afc5d857ad53a Mon Sep 17 00:00:00 2001 From: doko Date: Sat, 26 Jul 2014 07:54:28 +0000 Subject: * Fix the GFDL build on AArch64. git-svn-id: svn://svn.debian.org/svn/gcccvs/branches/sid/gcc-4.9@7533 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca --- debian/changelog | 6 ++++++ debian/rules.unpack | 2 ++ 2 files changed, 8 insertions(+) (limited to 'debian') diff --git a/debian/changelog b/debian/changelog index cf571ce..b854524 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +gcc-4.9 (4.9.1-3) UNRELEASED; urgency=medium + + * Fix the GFDL build on AArch64. + + -- Matthias Klose Sat, 26 Jul 2014 09:53:18 +0200 + gcc-4.9 (4.9.1-2) unstable; urgency=medium * Update to SVN 20140724 (r213031) from the gcc-4_9-branch. diff --git a/debian/rules.unpack b/debian/rules.unpack index f824987..d5ab32b 100644 --- a/debian/rules.unpack +++ b/debian/rules.unpack @@ -166,6 +166,8 @@ ifeq ($(GFDL_INVARIANT_FREE),yes) sed "s/@name@/$$n/g" $(SOURCE_DIR)debian/gcc-dummy.texi \ > $(srcdir)/$$i; \ else \ + sed "s/@name@/$$n/g" $(SOURCE_DIR)debian/gcc-dummy.texi \ + > $(srcdir)/$$i; \ echo >&2 "$$i does not exist, fix debian/rules.unpack"; \ fi; \ done -- cgit v1.2.3 From 644b0334b54ba57982aa0e195f31e6238eb87cdc Mon Sep 17 00:00:00 2001 From: doko Date: Sun, 27 Jul 2014 13:26:31 +0000 Subject: * Update to SVN 20140727 (r213100) from the gcc-4_9-branch. * Fix PR libobjc/61920, libobjc link failure on powerpc*. Closes: #756096. git-svn-id: svn://svn.debian.org/svn/gcccvs/branches/sid/gcc-4.9@7534 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca --- debian/changelog | 4 +- debian/patches/gcc-linaro.diff | 154 ---------------- debian/patches/libitm-aarch64.diff | 155 ---------------- debian/patches/svn-updates.diff | 356 +++++++++++++++++++++++++++++++++++-- debian/rules.patch | 1 - debian/rules2 | 4 - 6 files changed, 347 insertions(+), 327 deletions(-) delete mode 100644 debian/patches/libitm-aarch64.diff (limited to 'debian') diff --git a/debian/changelog b/debian/changelog index b854524..f331edf 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,10 @@ gcc-4.9 (4.9.1-3) UNRELEASED; urgency=medium + * Update to SVN 20140727 (r213100) from the gcc-4_9-branch. * Fix the GFDL build on AArch64. + * Fix PR libobjc/61920, libobjc link failure on powerpc*. Closes: #756096. - -- Matthias Klose Sat, 26 Jul 2014 09:53:18 +0200 + -- Matthias Klose Sun, 27 Jul 2014 15:25:24 +0200 gcc-4.9 (4.9.1-2) unstable; urgency=medium diff --git a/debian/patches/gcc-linaro.diff b/debian/patches/gcc-linaro.diff index 28457c5..4ec9049 100644 --- a/debian/patches/gcc-linaro.diff +++ b/debian/patches/gcc-linaro.diff @@ -35,160 +35,6 @@ LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@212635 \ +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. ---- a/src/libitm/configure.tgt -+++ b/src/libitm/configure.tgt -@@ -46,6 +46,7 @@ - # Map the target cpu to an ARCH sub-directory. At the same time, - # work out any special compilation flags as necessary. - case "${target_cpu}" in -+ aarch64*) ARCH=aarch64 ;; - alpha*) ARCH=alpha ;; - rs6000 | powerpc*) - XCFLAGS="${XCFLAGS} -mhtm" ---- a/src/libitm/config/aarch64/sjlj.S -+++ b/src/libitm/config/aarch64/sjlj.S -@@ -0,0 +1,93 @@ -+/* Copyright (C) 2014 Free Software Foundation, Inc. -+ Contributed by Richard Henderson . -+ -+ This file is part of the GNU Transactional Memory Library (libitm). -+ -+ Libitm is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3 of the License, or -+ (at your option) any later version. -+ -+ Libitm is distributed in the hope that it will be useful, but WITHOUT ANY -+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ more details. -+ -+ Under Section 7 of GPL version 3, you are granted additional -+ permissions described in the GCC Runtime Library Exception, version -+ 3.1, as published by the Free Software Foundation. -+ -+ You should have received a copy of the GNU General Public License and -+ a copy of the GCC Runtime Library Exception along with this program; -+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -+ . */ -+ -+#include "asmcfi.h" -+ -+ .text -+ .align 2 -+ .global _ITM_beginTransaction -+ .type _ITM_beginTransaction, %function -+ -+_ITM_beginTransaction: -+ cfi_startproc -+ mov x1, sp -+ stp x29, x30, [sp, -11*16]! -+ cfi_adjust_cfa_offset(11*16) -+ cfi_rel_offset(x29, 0) -+ cfi_rel_offset(x30, 8) -+ mov x29, sp -+ stp x19, x20, [sp, 1*16] -+ stp x21, x22, [sp, 2*16] -+ stp x23, x24, [sp, 3*16] -+ stp x25, x26, [sp, 4*16] -+ stp x27, x28, [sp, 5*16] -+ stp d8, d9, [sp, 6*16] -+ stp d10, d11, [sp, 7*16] -+ stp d12, d13, [sp, 8*16] -+ stp d14, d15, [sp, 9*16] -+ str x1, [sp, 10*16] -+ -+ /* Invoke GTM_begin_transaction with the struct we just built. */ -+ mov x1, sp -+ bl GTM_begin_transaction -+ -+ /* Return; we don't need to restore any of the call-saved regs. */ -+ ldp x29, x30, [sp] -+ add sp, sp, #11*16 -+ cfi_adjust_cfa_offset(-11*16) -+ cfi_restore(x29) -+ cfi_restore(x30) -+ ret -+ cfi_endproc -+ .size _ITM_beginTransaction, . - _ITM_beginTransaction -+ -+ .align 2 -+ .global GTM_longjmp -+ .hidden GTM_longjmp -+ .type GTM_longjmp, %function -+ -+GTM_longjmp: -+ /* The first parameter becomes the return value (x0). -+ The third parameter is ignored for now. */ -+ cfi_startproc -+ ldp x19, x20, [x1, 1*16] -+ ldp x21, x22, [x1, 2*16] -+ ldp x23, x24, [x1, 3*16] -+ ldp x25, x26, [x1, 4*16] -+ ldp x27, x28, [x1, 5*16] -+ ldp d8, d9, [x1, 6*16] -+ ldp d10, d11, [x1, 7*16] -+ ldp d12, d13, [x1, 8*16] -+ ldp d14, d15, [x1, 9*16] -+ ldr x3, [x1, 10*16] -+ ldp x29, x30, [x1] -+ cfi_def_cfa(x1, 0) -+ mov sp, x3 -+ br x30 -+ cfi_endproc -+ .size GTM_longjmp, . - GTM_longjmp -+ -+#ifdef __linux__ -+.section .note.GNU-stack, "", %progbits -+#endif ---- a/src/libitm/config/aarch64/target.h -+++ b/src/libitm/config/aarch64/target.h -@@ -0,0 +1,45 @@ -+/* Copyright (C) 2014 Free Software Foundation, Inc. -+ Contributed by Richard Henderson . -+ -+ This file is part of the GNU Transactional Memory Library (libitm). -+ -+ Libitm is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3 of the License, or -+ (at your option) any later version. -+ -+ Libitm is distributed in the hope that it will be useful, but WITHOUT ANY -+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ more details. -+ -+ Under Section 7 of GPL version 3, you are granted additional -+ permissions described in the GCC Runtime Library Exception, version -+ 3.1, as published by the Free Software Foundation. -+ -+ You should have received a copy of the GNU General Public License and -+ a copy of the GCC Runtime Library Exception along with this program; -+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -+ . */ -+ -+namespace GTM HIDDEN { -+ -+typedef struct gtm_jmpbuf -+{ -+ unsigned long long fp; /* x29 */ -+ unsigned long long pc; /* x30 */ -+ unsigned long long gr[10]; /* x19-x28 */ -+ unsigned long long vr[8]; /* d8-d15 */ -+ void *cfa; -+} gtm_jmpbuf; -+ -+/* ??? The size of one line in hardware caches (in bytes). */ -+#define HW_CACHELINE_SIZE 128 -+ -+static inline void -+cpu_relax (void) -+{ -+ __asm volatile ("" : : : "memory"); -+} -+ -+} // namespace GTM --- a/src/libgomp/ChangeLog.linaro +++ b/src/libgomp/ChangeLog.linaro @@ -0,0 +1,19 @@ diff --git a/debian/patches/libitm-aarch64.diff b/debian/patches/libitm-aarch64.diff deleted file mode 100644 index faf93cd..0000000 --- a/debian/patches/libitm-aarch64.diff +++ /dev/null @@ -1,155 +0,0 @@ -# DP: Build libitm on AArch64, patch taken from the trunk. ---- /dev/null -+++ b/src/libitm/config/aarch64/sjlj.S -@@ -0,0 +1,93 @@ -+/* Copyright (C) 2014 Free Software Foundation, Inc. -+ Contributed by Richard Henderson . -+ -+ This file is part of the GNU Transactional Memory Library (libitm). -+ -+ Libitm is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3 of the License, or -+ (at your option) any later version. -+ -+ Libitm is distributed in the hope that it will be useful, but WITHOUT ANY -+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ more details. -+ -+ Under Section 7 of GPL version 3, you are granted additional -+ permissions described in the GCC Runtime Library Exception, version -+ 3.1, as published by the Free Software Foundation. -+ -+ You should have received a copy of the GNU General Public License and -+ a copy of the GCC Runtime Library Exception along with this program; -+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -+ . */ -+ -+#include "asmcfi.h" -+ -+ .text -+ .align 2 -+ .global _ITM_beginTransaction -+ .type _ITM_beginTransaction, %function -+ -+_ITM_beginTransaction: -+ cfi_startproc -+ mov x1, sp -+ stp x29, x30, [sp, -11*16]! -+ cfi_adjust_cfa_offset(11*16) -+ cfi_rel_offset(x29, 0) -+ cfi_rel_offset(x30, 8) -+ mov x29, sp -+ stp x19, x20, [sp, 1*16] -+ stp x21, x22, [sp, 2*16] -+ stp x23, x24, [sp, 3*16] -+ stp x25, x26, [sp, 4*16] -+ stp x27, x28, [sp, 5*16] -+ stp d8, d9, [sp, 6*16] -+ stp d10, d11, [sp, 7*16] -+ stp d12, d13, [sp, 8*16] -+ stp d14, d15, [sp, 9*16] -+ str x1, [sp, 10*16] -+ -+ /* Invoke GTM_begin_transaction with the struct we just built. */ -+ mov x1, sp -+ bl GTM_begin_transaction -+ -+ /* Return; we don't need to restore any of the call-saved regs. */ -+ ldp x29, x30, [sp] -+ add sp, sp, #11*16 -+ cfi_adjust_cfa_offset(-11*16) -+ cfi_restore(x29) -+ cfi_restore(x30) -+ ret -+ cfi_endproc -+ .size _ITM_beginTransaction, . - _ITM_beginTransaction -+ -+ .align 2 -+ .global GTM_longjmp -+ .hidden GTM_longjmp -+ .type GTM_longjmp, %function -+ -+GTM_longjmp: -+ /* The first parameter becomes the return value (x0). -+ The third parameter is ignored for now. */ -+ cfi_startproc -+ ldp x19, x20, [x1, 1*16] -+ ldp x21, x22, [x1, 2*16] -+ ldp x23, x24, [x1, 3*16] -+ ldp x25, x26, [x1, 4*16] -+ ldp x27, x28, [x1, 5*16] -+ ldp d8, d9, [x1, 6*16] -+ ldp d10, d11, [x1, 7*16] -+ ldp d12, d13, [x1, 8*16] -+ ldp d14, d15, [x1, 9*16] -+ ldr x3, [x1, 10*16] -+ ldp x29, x30, [x1] -+ cfi_def_cfa(x1, 0) -+ mov sp, x3 -+ br x30 -+ cfi_endproc -+ .size GTM_longjmp, . - GTM_longjmp -+ -+#ifdef __linux__ -+.section .note.GNU-stack, "", %progbits -+#endif ---- /dev/null -+++ b/src/libitm/config/aarch64/target.h -@@ -0,0 +1,45 @@ -+/* Copyright (C) 2014 Free Software Foundation, Inc. -+ Contributed by Richard Henderson . -+ -+ This file is part of the GNU Transactional Memory Library (libitm). -+ -+ Libitm is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3 of the License, or -+ (at your option) any later version. -+ -+ Libitm is distributed in the hope that it will be useful, but WITHOUT ANY -+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for -+ more details. -+ -+ Under Section 7 of GPL version 3, you are granted additional -+ permissions described in the GCC Runtime Library Exception, version -+ 3.1, as published by the Free Software Foundation. -+ -+ You should have received a copy of the GNU General Public License and -+ a copy of the GCC Runtime Library Exception along with this program; -+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -+ . */ -+ -+namespace GTM HIDDEN { -+ -+typedef struct gtm_jmpbuf -+{ -+ unsigned long long fp; /* x29 */ -+ unsigned long long pc; /* x30 */ -+ unsigned long long gr[10]; /* x19-x28 */ -+ unsigned long long vr[8]; /* d8-d15 */ -+ void *cfa; -+} gtm_jmpbuf; -+ -+/* ??? The size of one line in hardware caches (in bytes). */ -+#define HW_CACHELINE_SIZE 128 -+ -+static inline void -+cpu_relax (void) -+{ -+ __asm volatile ("" : : : "memory"); -+} -+ -+} // namespace GTM ---- a/src/libitm/configure.tgt -+++ b/src/libitm/configure.tgt -@@ -46,6 +46,7 @@ fi - # Map the target cpu to an ARCH sub-directory. At the same time, - # work out any special compilation flags as necessary. - case "${target_cpu}" in -+ aarch64*) ARCH=aarch64 ;; - alpha*) ARCH=alpha ;; - rs6000 | powerpc*) - XCFLAGS="${XCFLAGS} -mhtm" diff --git a/debian/patches/svn-updates.diff b/debian/patches/svn-updates.diff index c8050f7..58a3e4a 100644 --- a/debian/patches/svn-updates.diff +++ b/debian/patches/svn-updates.diff @@ -1,10 +1,10 @@ -# DP: updates from the 4.9 branch upto 20140724 (r213031). +# DP: updates from the 4.9 branch upto 20140727 (r213100). last_update() { cat > ${dir}LAST_UPDATED ++ ++ * config/aarch64/sjlj.S: New file. ++ * config/aarch64/target.h: New file. ++ * configure.tgt: Enable aarch64. ++ + 2014-07-16 Release Manager + + * GCC 4.9.1 released. +Index: libitm/config/aarch64/sjlj.S +=================================================================== +--- a/src/libitm/config/aarch64/sjlj.S (.../tags/gcc_4_9_1_release) ++++ b/src/libitm/config/aarch64/sjlj.S (.../branches/gcc-4_9-branch) +@@ -0,0 +1,92 @@ ++/* Copyright (C) 2014 Free Software Foundation, Inc. ++ Contributed by Richard Henderson . ++ ++ This file is part of the GNU Transactional Memory Library (libitm). ++ ++ Libitm is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3 of the License, or ++ (at your option) any later version. ++ ++ Libitm is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++#include "asmcfi.h" ++ ++ .text ++ .align 2 ++ .global _ITM_beginTransaction ++ .type _ITM_beginTransaction, %function ++ ++_ITM_beginTransaction: ++ cfi_startproc ++ mov x1, sp ++ stp x29, x30, [sp, -11*16]! ++ cfi_adjust_cfa_offset(11*16) ++ cfi_rel_offset(x29, 0) ++ cfi_rel_offset(x30, 8) ++ mov x29, sp ++ stp x19, x20, [sp, 1*16] ++ stp x21, x22, [sp, 2*16] ++ stp x23, x24, [sp, 3*16] ++ stp x25, x26, [sp, 4*16] ++ stp x27, x28, [sp, 5*16] ++ stp d8, d9, [sp, 6*16] ++ stp d10, d11, [sp, 7*16] ++ stp d12, d13, [sp, 8*16] ++ stp d14, d15, [sp, 9*16] ++ str x1, [sp, 10*16] ++ ++ /* Invoke GTM_begin_transaction with the struct we just built. */ ++ mov x1, sp ++ bl GTM_begin_transaction ++ ++ /* Return; we don't need to restore any of the call-saved regs. */ ++ ldp x29, x30, [sp], 11*16 ++ cfi_adjust_cfa_offset(-11*16) ++ cfi_restore(x29) ++ cfi_restore(x30) ++ ret ++ cfi_endproc ++ .size _ITM_beginTransaction, . - _ITM_beginTransaction ++ ++ .align 2 ++ .global GTM_longjmp ++ .hidden GTM_longjmp ++ .type GTM_longjmp, %function ++ ++GTM_longjmp: ++ /* The first parameter becomes the return value (x0). ++ The third parameter is ignored for now. */ ++ cfi_startproc ++ ldp x19, x20, [x1, 1*16] ++ ldp x21, x22, [x1, 2*16] ++ ldp x23, x24, [x1, 3*16] ++ ldp x25, x26, [x1, 4*16] ++ ldp x27, x28, [x1, 5*16] ++ ldp d8, d9, [x1, 6*16] ++ ldp d10, d11, [x1, 7*16] ++ ldp d12, d13, [x1, 8*16] ++ ldp d14, d15, [x1, 9*16] ++ ldr x3, [x1, 10*16] ++ ldp x29, x30, [x1] ++ cfi_def_cfa(x1, 0) ++ mov sp, x3 ++ br x30 ++ cfi_endproc ++ .size GTM_longjmp, . - GTM_longjmp ++ ++#ifdef __linux__ ++.section .note.GNU-stack, "", %progbits ++#endif +Index: libitm/config/aarch64/target.h +=================================================================== +--- a/src/libitm/config/aarch64/target.h (.../tags/gcc_4_9_1_release) ++++ b/src/libitm/config/aarch64/target.h (.../branches/gcc-4_9-branch) +@@ -0,0 +1,45 @@ ++/* Copyright (C) 2014 Free Software Foundation, Inc. ++ Contributed by Richard Henderson . ++ ++ This file is part of the GNU Transactional Memory Library (libitm). ++ ++ Libitm is free software; you can redistribute it and/or modify it ++ under the terms of the GNU General Public License as published by ++ the Free Software Foundation; either version 3 of the License, or ++ (at your option) any later version. ++ ++ Libitm is distributed in the hope that it will be useful, but WITHOUT ANY ++ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ++ FOR A PARTICULAR PURPOSE. See the GNU General Public License for ++ more details. ++ ++ Under Section 7 of GPL version 3, you are granted additional ++ permissions described in the GCC Runtime Library Exception, version ++ 3.1, as published by the Free Software Foundation. ++ ++ You should have received a copy of the GNU General Public License and ++ a copy of the GCC Runtime Library Exception along with this program; ++ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ++ . */ ++ ++namespace GTM HIDDEN { ++ ++typedef struct gtm_jmpbuf ++{ ++ unsigned long long fp; /* x29 */ ++ unsigned long long pc; /* x30 */ ++ unsigned long long gr[10]; /* x19-x28 */ ++ unsigned long long vr[8]; /* d8-d15 */ ++ void *cfa; ++} gtm_jmpbuf; ++ ++/* ??? The size of one line in hardware caches (in bytes). */ ++#define HW_CACHELINE_SIZE 128 ++ ++static inline void ++cpu_relax (void) ++{ ++ __asm volatile ("" : : : "memory"); ++} ++ ++} // namespace GTM +Index: configure.ac +=================================================================== +--- a/src/configure.ac (.../tags/gcc_4_9_1_release) ++++ b/src/configure.ac (.../branches/gcc-4_9-branch) +@@ -1177,6 +1177,9 @@ + *-mingw*) + host_makefile_frag="config/mh-mingw" + ;; ++ alpha*-*-linux*) ++ host_makefile_frag="config/mh-alpha-linux" ++ ;; + hppa*-hp-hpux10*) + host_makefile_frag="config/mh-pa-hpux10" + ;; +Index: ChangeLog +=================================================================== +--- a/src/ChangeLog (.../tags/gcc_4_9_1_release) ++++ b/src/ChangeLog (.../branches/gcc-4_9-branch) +@@ -1,3 +1,9 @@ ++2014-07-26 Uros Bizjak ++ ++ PR target/47230 ++ * configure.ac (alpha*-*-linux*): Use mh-alpha-linux. ++ * configure: Regenerate. ++ + 2014-07-16 Release Manager + + * GCC 4.9.1 released. +@@ -9,7 +15,7 @@ + 2014-04-04 Eric Botcazou + + PR bootstrap/60620 +- * Makefile.def (dependencies): Make gnattools depend on libstdc++-v3. ++ * Makefile.def (dependencies): Make gnattools depend on libstdc++-v3. + * Makefile.in: Regenerate. + + 2014-03-28 Yaakov Selkowitz +@@ -47,7 +53,8 @@ + + 2014-03-07 Denis Chertykov + +- * MAINTAINERS: Remove avr maintainers: Anatoly Sokolov and Eric Weddington ++ * MAINTAINERS: Remove avr maintainers: Anatoly Sokolov ++ and Eric Weddington + + 2014-03-07 Jakub Jelinek + +Index: config/mh-alpha-linux +=================================================================== +--- a/src/config/mh-alpha-linux (.../tags/gcc_4_9_1_release) ++++ b/src/config/mh-alpha-linux (.../branches/gcc-4_9-branch) +@@ -0,0 +1,3 @@ ++# Prevent GPREL16 relocation truncation ++LDFLAGS += -Wl,--no-relax ++BOOT_LDFLAGS += -Wl,--no-relax +Index: config/ChangeLog +=================================================================== +--- a/src/config/ChangeLog (.../tags/gcc_4_9_1_release) ++++ b/src/config/ChangeLog (.../branches/gcc-4_9-branch) +@@ -1,3 +1,8 @@ ++2014-07-26 Uros Bizjak ++ ++ PR target/47230 ++ * mh-alpha-linux: New file. ++ + 2014-07-16 Release Manager + + * GCC 4.9.1 released. +Index: configure +=================================================================== +--- a/src/configure (.../tags/gcc_4_9_1_release) ++++ b/src/configure (.../branches/gcc-4_9-branch) +@@ -3868,6 +3868,9 @@ + *-mingw*) + host_makefile_frag="config/mh-mingw" + ;; ++ alpha*-*-linux*) ++ host_makefile_frag="config/mh-alpha-linux" ++ ;; + hppa*-hp-hpux10*) + host_makefile_frag="config/mh-pa-hpux10" + ;; Index: gcc/c-family/c-gimplify.c =================================================================== --- a/src/gcc/c-family/c-gimplify.c (.../tags/gcc_4_9_1_release) @@ -51,7 +306,7 @@ Index: gcc/DATESTAMP +++ b/src/gcc/DATESTAMP (.../branches/gcc-4_9-branch) @@ -1 +1 @@ -20140716 -+20140724 ++20140727 Index: gcc/omp-low.c =================================================================== --- a/src/gcc/omp-low.c (.../tags/gcc_4_9_1_release) @@ -97,7 +352,20 @@ Index: gcc/ChangeLog =================================================================== --- a/src/gcc/ChangeLog (.../tags/gcc_4_9_1_release) +++ b/src/gcc/ChangeLog (.../branches/gcc-4_9-branch) -@@ -1,3 +1,166 @@ +@@ -1,3 +1,179 @@ ++2014-07-25 Uros Bizjak ++ ++ Backport from mainline ++ 2014-07-14 Jakub Jelinek ++ ++ PR target/61656 ++ * config/i386/i386.c (classify_argument): Don't merge classes above ++ number of words. ++ ++2014-07-25 Uros Bizjak ++ ++ * config/alpha/elf.h: Define TARGET_UNWIND_TABLES_DEFAULT. ++ +2014-07-24 Kyle McMartin + + * config/aarch64/aarch64-linux.h (TARGET_ASM_FILE_END): Define. @@ -264,7 +532,7 @@ Index: gcc/ChangeLog 2014-07-16 Release Manager * GCC 4.9.1 released. -@@ -4,14 +170,14 @@ +@@ -4,14 +183,14 @@ 2014-07-10 Cary Coutant @@ -283,7 +551,7 @@ Index: gcc/ChangeLog 2014-07-10 Tom G. Christensen -@@ -33,13 +199,13 @@ +@@ -33,13 +212,13 @@ PR target/61062 * config/arm/arm_neon.h (vtrn_s8, vtrn_s16, vtrn_u8, vtrn_u16, vtrn_p8, vtrn_p16, vtrn_s32, vtrn_f32, vtrn_u32, vtrnq_s8, vtrnq_s16, vtrnq_s32, @@ -304,7 +572,7 @@ Index: gcc/ChangeLog 2014-07-09 Alan Lawrence -@@ -157,11 +323,9 @@ +@@ -157,11 +336,9 @@ 2014-06-24 Jakub Jelinek * gimplify.c (gimplify_scan_omp_clauses) @@ -330,7 +598,7 @@ Index: gcc/ChangeLog (struct gimplify_adjust_omp_clauses_data): New type. (gimplify_adjust_omp_clauses_1): Adjust for data being a struct gimplify_adjust_omp_clauses_data pointer instead -@@ -196,14 +359,12 @@ +@@ -196,14 +372,12 @@ gimple_seq * argument to omp_finish_clause hook. * omp-low.c (scan_sharing_clauses): Call scan_omp_op on non-DECL_P OMP_CLAUSE_DECL if ctx->outer. @@ -348,7 +616,7 @@ Index: gcc/ChangeLog 2014-06-10 Jakub Jelinek -@@ -227,8 +388,7 @@ +@@ -227,8 +401,7 @@ OMP_CLAUSE_LINEAR_STMT. * omp-low.c (lower_rec_input_clauses): Fix typo. (maybe_add_implicit_barrier_cancel, lower_omp_1): Add @@ -358,7 +626,7 @@ Index: gcc/ChangeLog 2014-06-30 Jason Merrill -@@ -279,8 +439,7 @@ +@@ -279,8 +452,7 @@ (aarch64_sqdmlsl_lane): Likewise. (aarch64_sqdmull_lane): Likewise. (aarch64_sqdmull2_lane): Likewise. @@ -1033,6 +1301,21 @@ Index: gcc/config.gcc esac ;; pdp11-*-*) +Index: gcc/config/alpha/elf.h +=================================================================== +--- a/src/gcc/config/alpha/elf.h (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/alpha/elf.h (.../branches/gcc-4_9-branch) +@@ -126,6 +126,10 @@ + "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ + %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s" + ++/* This variable should be set to 'true' if the target ABI requires ++ unwinding tables even when exceptions are not used. */ ++#define TARGET_UNWIND_TABLES_DEFAULT true ++ + /* Select a format to encode pointers in exception handling data. CODE + is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is + true if the symbol may be affected by dynamic relocations. Index: gcc/config/sparc/sync.md =================================================================== --- a/src/gcc/config/sparc/sync.md (.../tags/gcc_4_9_1_release) @@ -1173,6 +1456,19 @@ Index: gcc/config/i386/ia32intrin.h #define _bit_scan_forward(a) __bsfd(a) #define _bit_scan_reverse(a) __bsrd(a) #define _bswap(a) __bswapd(a) +Index: gcc/config/i386/i386.c +=================================================================== +--- a/src/gcc/config/i386/i386.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/i386/i386.c (.../branches/gcc-4_9-branch) +@@ -6552,7 +6552,7 @@ + bit_offset); + if (!num) + return 0; +- for (i = 0; i < num; i++) ++ for (i = 0; i < num && i < words; i++) + classes[i] = merge_classes (subclasses[i], classes[i]); + } + } Index: gcc/config/nios2/rtems.h =================================================================== --- a/src/gcc/config/nios2/rtems.h (.../tags/gcc_4_9_1_release) @@ -2315,3 +2611,39 @@ Index: gcc/config/arm/t-rtems-eabi +MULTILIB_EXCEPTIONS += mfpu=fpv4-sp-d16/mfloat-abi=hard +MULTILIB_EXCEPTIONS += mfpu=fpv4-sp-d16 MULTILIB_EXCEPTIONS += mfloat-abi=hard +Index: libobjc/encoding.c +=================================================================== +--- a/src/libobjc/encoding.c (.../tags/gcc_4_9_1_release) ++++ b/src/libobjc/encoding.c (.../branches/gcc-4_9-branch) +@@ -192,6 +192,7 @@ + ? MAX (MAX (COMPUTED, SPECIFIED), 64) \ + : MAX (COMPUTED, SPECIFIED));}) + ++#define rs6000_special_adjust_field_align_p(FIELD, COMPUTED) 0 + + /* Skip a variable name, enclosed in quotes ("). */ + static inline +Index: libobjc/ChangeLog +=================================================================== +--- a/src/libobjc/ChangeLog (.../tags/gcc_4_9_1_release) ++++ b/src/libobjc/ChangeLog (.../branches/gcc-4_9-branch) +@@ -1,3 +1,10 @@ ++2014-07-27 Alan Modra ++ Matthias Klose ++ ++ PR libobjc/61920 ++ ++ * encoding.c: Define rs6000_special_adjust_field_align_p. ++ + 2014-07-16 Release Manager + + * GCC 4.9.1 released. +Index: . +=================================================================== +--- a/src/. (.../tags/gcc_4_9_1_release) ++++ b/src/. (.../branches/gcc-4_9-branch) + +Property changes on: . +___________________________________________________________________ +Modified: svn:mergeinfo + Merged /trunk:r210615 diff --git a/debian/rules.patch b/debian/rules.patch index 6d1de3c..688506b 100644 --- a/debian/rules.patch +++ b/debian/rules.patch @@ -77,7 +77,6 @@ debian_patches += \ libitm-no-fortify-source \ pr59758 \ pr57653 \ - $(if $(with_linaro_branch),,libitm-aarch64) \ pr61257 \ pr61046 \ pr61336 \ diff --git a/debian/rules2 b/debian/rules2 index f2f435f..402392d 100644 --- a/debian/rules2 +++ b/debian/rules2 @@ -134,10 +134,6 @@ ifneq (,$(findstring static,$(DEB_BUILD_OPTIONS))) LDFLAGS += -static endif -ifneq (,$(filter $(DEB_TARGET_ARCH),alpha)) - LDFLAGS += -Wl,--no-relax -endif - CFLAGS_TO_PASS = \ $(if $(CFLAGS),CFLAGS="$(CFLAGS)") \ $(if $(BOOT_CFLAGS),BOOT_CFLAGS="$(BOOT_CFLAGS)") \ -- cgit v1.2.3 From a3a5707b9490fd3e8b4817932d3c66a03bfde9bf Mon Sep 17 00:00:00 2001 From: doko Date: Mon, 28 Jul 2014 09:19:10 +0000 Subject: - prepare for 4.9.1-3 git-svn-id: svn://svn.debian.org/svn/gcccvs/branches/sid/gcc-4.9@7535 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca --- debian/changelog | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'debian') diff --git a/debian/changelog b/debian/changelog index f331edf..f850ad1 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,4 @@ -gcc-4.9 (4.9.1-3) UNRELEASED; urgency=medium +gcc-4.9 (4.9.1-3) unstable; urgency=medium * Update to SVN 20140727 (r213100) from the gcc-4_9-branch. * Fix the GFDL build on AArch64. -- cgit v1.2.3 From 631cb223358f0d51ab619a59e6bb55db1cdf9fd4 Mon Sep 17 00:00:00 2001 From: doko Date: Mon, 28 Jul 2014 16:28:32 +0000 Subject: * Update to SVN 20140728 (r213129) from the gcc-4_9-branch. git-svn-id: svn://svn.debian.org/svn/gcccvs/branches/sid/gcc-4.9@7539 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca --- debian/changelog | 6 ++++ debian/patches/svn-updates.diff | 64 ++++++++++++++++++++++++++++++++++------- 2 files changed, 60 insertions(+), 10 deletions(-) (limited to 'debian') diff --git a/debian/changelog b/debian/changelog index f850ad1..6cdef30 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +gcc-4.9 (4.9.1-4) UNRELEASED; urgency=medium + + * Update to SVN 20140728 (r213129) from the gcc-4_9-branch. + + -- Matthias Klose Mon, 28 Jul 2014 18:26:32 +0200 + gcc-4.9 (4.9.1-3) unstable; urgency=medium * Update to SVN 20140727 (r213100) from the gcc-4_9-branch. diff --git a/debian/patches/svn-updates.diff b/debian/patches/svn-updates.diff index 58a3e4a..22a673c 100644 --- a/debian/patches/svn-updates.diff +++ b/debian/patches/svn-updates.diff @@ -1,10 +1,10 @@ -# DP: updates from the 4.9 branch upto 20140727 (r213100). +# DP: updates from the 4.9 branch upto 20140728 (r213129). last_update() { cat > ${dir}LAST_UPDATED ++ ++ PR rtl-optimization/61801 ++ * gcc.target/i386/pr61801.c: Fix testcase. ++ ++2014-07-28 Richard Biener ++ ++ PR rtl-optimization/61801 ++ * gcc.target/i386/pr61801.c: New testcase. ++ +2014-07-24 Ulrich Weigand + + Backport from mainline @@ -1000,7 +1037,7 @@ Index: gcc/testsuite/ChangeLog 2014-07-16 Release Manager * GCC 4.9.1 released. -@@ -17,7 +108,8 @@ +@@ -17,7 +118,8 @@ 2014-06-09 Alan Lawrence PR target/61062 @@ -1010,7 +1047,7 @@ Index: gcc/testsuite/ChangeLog 2014-07-08 Jakub Jelinek -@@ -34,8 +126,8 @@ +@@ -34,8 +136,8 @@ 2014-07-08 Alan Lawrence @@ -2615,11 +2652,12 @@ Index: libobjc/encoding.c =================================================================== --- a/src/libobjc/encoding.c (.../tags/gcc_4_9_1_release) +++ b/src/libobjc/encoding.c (.../branches/gcc-4_9-branch) -@@ -192,6 +192,7 @@ +@@ -192,6 +192,8 @@ ? MAX (MAX (COMPUTED, SPECIFIED), 64) \ : MAX (COMPUTED, SPECIFIED));}) -+#define rs6000_special_adjust_field_align_p(FIELD, COMPUTED) 0 ++#define rs6000_special_adjust_field_align_p(FIELD, COMPUTED) \ ++ (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (FIELD)) == VECTOR_TYPE) /* Skip a variable name, enclosed in quotes ("). */ static inline @@ -2627,7 +2665,13 @@ Index: libobjc/ChangeLog =================================================================== --- a/src/libobjc/ChangeLog (.../tags/gcc_4_9_1_release) +++ b/src/libobjc/ChangeLog (.../branches/gcc-4_9-branch) -@@ -1,3 +1,10 @@ +@@ -1,3 +1,16 @@ ++2014-07-28 Ulrich Weigand ++ ++ PR libobjc/61920 ++ * encoding.c (rs6000_special_adjust_field_align_p): Use definition ++ that matches the 4.9 branch ABI. ++ +2014-07-27 Alan Modra + Matthias Klose + -- cgit v1.2.3 From 17dc74147076a971e807d14db9c666ad5346faab Mon Sep 17 00:00:00 2001 From: doko Date: Thu, 31 Jul 2014 07:59:03 +0000 Subject: * Build libphobos on armel and armhf. Closes: #755390. git-svn-id: svn://svn.debian.org/svn/gcccvs/branches/sid/gcc-4.9@7541 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca --- debian/changelog | 1 + debian/rules.defs | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'debian') diff --git a/debian/changelog b/debian/changelog index 6cdef30..5269b3f 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,6 +1,7 @@ gcc-4.9 (4.9.1-4) UNRELEASED; urgency=medium * Update to SVN 20140728 (r213129) from the gcc-4_9-branch. + * Build libphobos on armel and armhf. Closes: #755390. -- Matthias Klose Mon, 28 Jul 2014 18:26:32 +0200 diff --git a/debian/rules.defs b/debian/rules.defs index 2d6d27f..90dd83b 100644 --- a/debian/rules.defs +++ b/debian/rules.defs @@ -818,10 +818,10 @@ ifeq ($(with_d),yes) mips mipsel mips64 mips64el mipsn32 mipsn32el \ powerpc powerpcspe ppc64 s390 s390x sh4 sparc sparc64 libphobos_no_systems := gnu kfreebsd-gnu - ifneq (,$(findstring $(DEB_TARGET_ARCH_CPU),$(libphobos_no_cpus))) + ifneq (,$(filter $(DEB_TARGET_ARCH_CPU),$(libphobos_no_cpus))) with_libphobos := disabled for cpu $(DEB_TARGET_ARCH_CPU) endif - ifneq (,$(findstring $(DEB_TARGET_GNU_SYSTEM),$(libphobos_no_systems))) + ifneq (,$(filter $(DEB_TARGET_GNU_SYSTEM),$(libphobos_no_systems))) with_libphobos := disabled for system $(DEB_TARGET_GNU_SYSTEM) endif -- cgit v1.2.3 From 584c7233755d00a9af9078931ac731a50784e58b Mon Sep 17 00:00:00 2001 From: doko Date: Thu, 31 Jul 2014 08:08:17 +0000 Subject: * Update to SVN 20140731 (r213317) from the gcc-4_9-branch. - CVE-2014-5044, fix integer overflows in array allocation in libgfortran. Closes: #756325. git-svn-id: svn://svn.debian.org/svn/gcccvs/branches/sid/gcc-4.9@7542 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca --- debian/changelog | 4 +- debian/patches/svn-updates.diff | 11047 +++++++++++++++++++++++++++++++++++++- 2 files changed, 11046 insertions(+), 5 deletions(-) (limited to 'debian') diff --git a/debian/changelog b/debian/changelog index 5269b3f..b40373b 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,6 +1,8 @@ gcc-4.9 (4.9.1-4) UNRELEASED; urgency=medium - * Update to SVN 20140728 (r213129) from the gcc-4_9-branch. + * Update to SVN 20140731 (r213317) from the gcc-4_9-branch. + - CVE-2014-5044, fix integer overflows in array allocation in libgfortran. + Closes: #756325. * Build libphobos on armel and armhf. Closes: #755390. -- Matthias Klose Mon, 28 Jul 2014 18:26:32 +0200 diff --git a/debian/patches/svn-updates.diff b/debian/patches/svn-updates.diff index 22a673c..f096fdf 100644 --- a/debian/patches/svn-updates.diff +++ b/debian/patches/svn-updates.diff @@ -1,10 +1,10 @@ -# DP: updates from the 4.9 branch upto 20140728 (r213129). +# DP: updates from the 4.9 branch upto 20140731 (r213317). last_update() { cat > ${dir}LAST_UPDATED ++ std::__detail::_Adaptor<_UniformRandomNumberGenerator, double> + __aurng(__urng); + + result_type __a = __param.successful_size(); +Index: libstdc++-v3/include/bits/random.tcc +=================================================================== +--- a/src/libstdc++-v3/include/bits/random.tcc (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/include/bits/random.tcc (.../branches/gcc-4_9-branch) +@@ -3463,6 +3463,9 @@ + _RealType + generate_canonical(_UniformRandomNumberGenerator& __urng) + { ++ static_assert(std::is_floating_point<_RealType>::value, ++ "template argument not a floating point type"); ++ + const size_t __b + = std::min(static_cast(std::numeric_limits<_RealType>::digits), + __bits); +Index: libstdc++-v3/include/bits/random.h +=================================================================== +--- a/src/libstdc++-v3/include/bits/random.h (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/include/bits/random.h (.../branches/gcc-4_9-branch) +@@ -164,6 +164,8 @@ + template + struct _Adaptor + { ++ static_assert(std::is_floating_point<_DInputType>::value, ++ "template argument not a floating point type"); + + public: + _Adaptor(_Engine& __g) +Index: libstdc++-v3/ChangeLog +=================================================================== +--- a/src/libstdc++-v3/ChangeLog (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/ChangeLog (.../branches/gcc-4_9-branch) +@@ -1,3 +1,14 @@ ++2014-07-29 Ed Smith-Rowland <3dw4rd@verizon.net> ++ ++ PR libstdc++/60037 - SIGFPE in std::generate_canonical ++ * include/bits/random.h (_Adaptor): static_assert for non floating-point ++ result type. ++ * include/bits/random.tcc (generate_canonical): Ditto. ++ * include/ext/random.tcc (hypergeometric_distribution::operator()): ++ Use double as a rng result type. ++ * testsuite/26_numerics/random/pr60037-neg.cc: New. ++ * testsuite/ext/random/hypergeometric_distribution/pr60037.cc: New. ++ + 2014-07-16 Release Manager + + * GCC 4.9.1 released. +Index: libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc +=================================================================== +--- a/src/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc (.../branches/gcc-4_9-branch) +@@ -0,0 +1,15 @@ ++// { dg-do compile } ++// { dg-options "-std=gnu++11" } ++ ++#include ++ ++std::mt19937 urng; ++ ++std::__detail::_Adaptor aurng(urng); ++ ++auto x = std::generate_canonical::digits>(urng); ++ ++// { dg-error "static assertion failed: template argument not a floating point type" "" { target *-*-* } 167 } ++ ++// { dg-error "static assertion failed: template argument not a floating point type" "" { target *-*-* } 3466 } +Index: libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/pr60037.cc +=================================================================== +--- a/src/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/pr60037.cc (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/pr60037.cc (.../branches/gcc-4_9-branch) +@@ -0,0 +1,23 @@ ++// { dg-options "-std=gnu++11 -O0" } ++// { dg-require-cstdint "" } ++// { dg-require-cmath "" } ++ ++#include ++#include ++ ++void ++hyperplot(unsigned int N, unsigned int K, unsigned int n) ++{ ++ std::mt19937 re; // the default engine ++ __gnu_cxx::hypergeometric_distribution<> hd(N, K, n); ++ auto gen = std::bind(hd, re); ++ gen(); ++} ++ ++int ++main() ++{ ++ hyperplot(15, 3, 2); ++ hyperplot(500, 50, 30); ++ hyperplot(100, 20, 5); ++} Index: configure.ac =================================================================== --- a/src/configure.ac (.../tags/gcc_4_9_1_release) @@ -306,7 +413,7 @@ Index: gcc/DATESTAMP +++ b/src/gcc/DATESTAMP (.../branches/gcc-4_9-branch) @@ -1 +1 @@ -20140716 -+20140728 ++20140731 Index: gcc/omp-low.c =================================================================== --- a/src/gcc/omp-low.c (.../tags/gcc_4_9_1_release) @@ -2682,6 +2789,10938 @@ Index: libobjc/ChangeLog 2014-07-16 Release Manager * GCC 4.9.1 released. +Index: libgfortran/m4/in_pack.m4 +=================================================================== +--- a/src/libgfortran/m4/in_pack.m4 (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/m4/in_pack.m4 (.../branches/gcc-4_9-branch) +@@ -79,7 +79,7 @@ + return source->base_addr; + + /* Allocate storage for the destination. */ +- destptr = ('rtype_name` *)xmalloc (ssize * sizeof ('rtype_name`)); ++ destptr = xmallocarray (ssize, sizeof ('rtype_name`)); + dest = destptr; + src = source->base_addr; + stride0 = stride[0]; +Index: libgfortran/m4/pack.m4 +=================================================================== +--- a/src/libgfortran/m4/pack.m4 (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/m4/pack.m4 (.../branches/gcc-4_9-branch) +@@ -168,8 +168,8 @@ + + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (sizeof ('rtype_name`) * total); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (total, sizeof ('rtype_name`)); + + if (total == 0) + return; +Index: libgfortran/m4/spread.m4 +=================================================================== +--- a/src/libgfortran/m4/spread.m4 (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/m4/spread.m4 (.../branches/gcc-4_9-branch) +@@ -102,8 +102,8 @@ + } + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (rs * sizeof('rtype_name`)); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (rs, sizeof('rtype_name`)); + if (rs <= 0) + return; + } +@@ -245,7 +245,7 @@ + + if (ret->base_addr == NULL) + { +- ret->base_addr = xmalloc (ncopies * sizeof ('rtype_name`)); ++ ret->base_addr = xmallocarray (ncopies, sizeof ('rtype_name`)); + ret->offset = 0; + GFC_DIMENSION_SET(ret->dim[0], 0, ncopies - 1, 1); + } +Index: libgfortran/m4/transpose.m4 +=================================================================== +--- a/src/libgfortran/m4/transpose.m4 (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/m4/transpose.m4 (.../branches/gcc-4_9-branch) +@@ -61,7 +61,8 @@ + GFC_DIMENSION_SET(ret->dim[1], 0, GFC_DESCRIPTOR_EXTENT(source,0) - 1, + GFC_DESCRIPTOR_EXTENT(source, 1)); + +- ret->base_addr = xmalloc (sizeof ('rtype_name`) * size0 ((array_t *) ret)); ++ ret->base_addr = xmallocarray (size0 ((array_t *) ret), ++ sizeof ('rtype_name`)); + ret->offset = 0; + } else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/m4/iforeach.m4 +=================================================================== +--- a/src/libgfortran/m4/iforeach.m4 (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/m4/iforeach.m4 (.../branches/gcc-4_9-branch) +@@ -30,7 +30,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (rtype_name) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (rtype_name)); + } + else + { +@@ -133,7 +133,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (rtype_name) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (rtype_name)); + } + else + { +@@ -264,7 +264,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (rtype_name) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (rtype_name)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/m4/eoshift1.m4 +=================================================================== +--- a/src/libgfortran/m4/eoshift1.m4 (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/m4/eoshift1.m4 (.../branches/gcc-4_9-branch) +@@ -106,8 +106,8 @@ + GFC_DIMENSION_SET(ret->dim[i], 0, ub, str); + + } +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (size * arraysize); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (arraysize, size); + + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/m4/eoshift3.m4 +=================================================================== +--- a/src/libgfortran/m4/eoshift3.m4 (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/m4/eoshift3.m4 (.../branches/gcc-4_9-branch) +@@ -90,7 +90,7 @@ + { + int i; + +- ret->base_addr = xmalloc (size * arraysize); ++ ret->base_addr = xmallocarray (arraysize, size); + ret->offset = 0; + ret->dtype = array->dtype; + for (i = 0; i < GFC_DESCRIPTOR_RANK (array); i++) +@@ -108,8 +108,8 @@ + GFC_DIMENSION_SET(ret->dim[i], 0, ub, str); + + } +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (size * arraysize); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (arraysize, size); + + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/m4/shape.m4 +=================================================================== +--- a/src/libgfortran/m4/shape.m4 (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/m4/shape.m4 (.../branches/gcc-4_9-branch) +@@ -50,7 +50,7 @@ + { + GFC_DIMENSION_SET(ret->dim[0], 0, rank - 1, 1); + ret->offset = 0; +- ret->base_addr = xmalloc (sizeof ('rtype_name`) * rank); ++ ret->base_addr = xmallocarray (rank, sizeof ('rtype_name`)); + } + + stride = GFC_DESCRIPTOR_STRIDE(ret,0); +Index: libgfortran/m4/cshift1.m4 +=================================================================== +--- a/src/libgfortran/m4/cshift1.m4 (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/m4/cshift1.m4 (.../branches/gcc-4_9-branch) +@@ -81,7 +81,7 @@ + { + int i; + +- ret->base_addr = xmalloc (size * arraysize); ++ ret->base_addr = xmallocarray (arraysize, size); + ret->offset = 0; + ret->dtype = array->dtype; + for (i = 0; i < GFC_DESCRIPTOR_RANK (array); i++) +Index: libgfortran/m4/matmull.m4 +=================================================================== +--- a/src/libgfortran/m4/matmull.m4 (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/m4/matmull.m4 (.../branches/gcc-4_9-branch) +@@ -89,7 +89,7 @@ + } + + retarray->base_addr +- = xmalloc (sizeof ('rtype_name`) * size0 ((array_t *) retarray)); ++ = xmallocarray (size0 ((array_t *) retarray), sizeof ('rtype_name`)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/m4/bessel.m4 +=================================================================== +--- a/src/libgfortran/m4/bessel.m4 (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/m4/bessel.m4 (.../branches/gcc-4_9-branch) +@@ -56,7 +56,7 @@ + { + size_t size = n2 < n1 ? 0 : n2-n1+1; + GFC_DIMENSION_SET(ret->dim[0], 0, size-1, 1); +- ret->base_addr = xmalloc (sizeof ('rtype_name`) * size); ++ ret->base_addr = xmallocarray (size, sizeof ('rtype_name`)); + ret->offset = 0; + } + +@@ -123,7 +123,7 @@ + { + size_t size = n2 < n1 ? 0 : n2-n1+1; + GFC_DIMENSION_SET(ret->dim[0], 0, size-1, 1); +- ret->base_addr = xmalloc (sizeof ('rtype_name`) * size); ++ ret->base_addr = xmallocarray (size, sizeof ('rtype_name`)); + ret->offset = 0; + } + +Index: libgfortran/m4/unpack.m4 +=================================================================== +--- a/src/libgfortran/m4/unpack.m4 (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/m4/unpack.m4 (.../branches/gcc-4_9-branch) +@@ -100,7 +100,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof ('rtype_name`)); ++ ret->base_addr = xmallocarray (rs, sizeof ('rtype_name`)); + } + else + { +@@ -245,7 +245,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof ('rtype_name`)); ++ ret->base_addr = xmallocarray (rs, sizeof ('rtype_name`)); + } + else + { +Index: libgfortran/m4/reshape.m4 +=================================================================== +--- a/src/libgfortran/m4/reshape.m4 (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/m4/reshape.m4 (.../branches/gcc-4_9-branch) +@@ -115,11 +115,11 @@ + ret->offset = 0; + + if (unlikely (rs < 1)) +- alloc_size = 1; ++ alloc_size = 0; + else +- alloc_size = rs * sizeof ('rtype_name`); ++ alloc_size = rs; + +- ret->base_addr = xmalloc (alloc_size); ++ ret->base_addr = xmallocarray (alloc_size, sizeof ('rtype_name`)); + ret->dtype = (source->dtype & ~GFC_DTYPE_RANK_MASK) | rdim; + } + +Index: libgfortran/m4/ifunction_logical.m4 +=================================================================== +--- a/src/libgfortran/m4/ifunction_logical.m4 (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/m4/ifunction_logical.m4 (.../branches/gcc-4_9-branch) +@@ -89,8 +89,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (rtype_name) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -99,7 +98,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (rtype_name)); + } + else + { +Index: libgfortran/m4/ifunction.m4 +=================================================================== +--- a/src/libgfortran/m4/ifunction.m4 (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/m4/ifunction.m4 (.../branches/gcc-4_9-branch) +@@ -85,10 +85,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (rtype_name) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (rtype_name)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -260,8 +259,7 @@ + + } + +- alloc_size = sizeof (rtype_name) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -273,7 +271,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (rtype_name)); + + } + else +@@ -417,8 +415,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (rtype_name) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -427,7 +424,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (rtype_name)); + } + else + { +Index: libgfortran/m4/matmul.m4 +=================================================================== +--- a/src/libgfortran/m4/matmul.m4 (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/m4/matmul.m4 (.../branches/gcc-4_9-branch) +@@ -125,7 +125,7 @@ + } + + retarray->base_addr +- = xmalloc (sizeof ('rtype_name`) * size0 ((array_t *) retarray)); ++ = xmallocarray (size0 ((array_t *) retarray), sizeof ('rtype_name`)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/runtime/in_pack_generic.c +=================================================================== +--- a/src/libgfortran/runtime/in_pack_generic.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/runtime/in_pack_generic.c (.../branches/gcc-4_9-branch) +@@ -180,7 +180,7 @@ + return source->base_addr; + + /* Allocate storage for the destination. */ +- destptr = xmalloc (ssize * size); ++ destptr = xmallocarray (ssize, size); + dest = (char *)destptr; + src = source->base_addr; + stride0 = stride[0] * size; +Index: libgfortran/runtime/memory.c +=================================================================== +--- a/src/libgfortran/runtime/memory.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/runtime/memory.c (.../branches/gcc-4_9-branch) +@@ -25,8 +25,13 @@ + + #include "libgfortran.h" + #include ++#include + ++#ifndef SIZE_MAX ++#define SIZE_MAX ((size_t)-1) ++#endif + ++ + void * + xmalloc (size_t n) + { +@@ -44,12 +49,34 @@ + } + + ++void * ++xmallocarray (size_t nmemb, size_t size) ++{ ++ void *p; ++ ++ if (!nmemb || !size) ++ size = nmemb = 1; ++ else if (nmemb > SIZE_MAX / size) ++ { ++ errno = ENOMEM; ++ os_error ("Integer overflow in xmallocarray"); ++ } ++ ++ p = malloc (nmemb * size); ++ ++ if (!p) ++ os_error ("Memory allocation failed in xmallocarray"); ++ ++ return p; ++} ++ ++ + /* calloc wrapper that aborts on error. */ + + void * + xcalloc (size_t nmemb, size_t size) + { +- if (nmemb * size == 0) ++ if (!nmemb || !size) + nmemb = size = 1; + + void *p = calloc (nmemb, size); +Index: libgfortran/runtime/convert_char.c +=================================================================== +--- a/src/libgfortran/runtime/convert_char.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/runtime/convert_char.c (.../branches/gcc-4_9-branch) +@@ -44,7 +44,7 @@ + gfc_charlen_type i, l; + + l = len > 0 ? len : 0; +- *dst = xmalloc ((l + 1) * sizeof (gfc_char4_t)); ++ *dst = xmallocarray ((l + 1), sizeof (gfc_char4_t)); + + for (i = 0; i < l; i++) + (*dst)[i] = src[i]; +@@ -60,7 +60,7 @@ + gfc_charlen_type i, l; + + l = len > 0 ? len : 0; +- *dst = xmalloc ((l + 1) * sizeof (unsigned char)); ++ *dst = xmalloc (l + 1); + + for (i = 0; i < l; i++) + (*dst)[i] = src[i]; +Index: libgfortran/runtime/environ.c +=================================================================== +--- a/src/libgfortran/runtime/environ.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/runtime/environ.c (.../branches/gcc-4_9-branch) +@@ -837,7 +837,7 @@ + } + else + { +- elist = xmalloc (unit_count * sizeof (exception_t)); ++ elist = xmallocarray (unit_count, sizeof (exception_t)); + do_count = 0; + p = val; + do_parse (); +Index: libgfortran/intrinsics/string_intrinsics_inc.c +=================================================================== +--- a/src/libgfortran/intrinsics/string_intrinsics_inc.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/intrinsics/string_intrinsics_inc.c (.../branches/gcc-4_9-branch) +@@ -164,7 +164,7 @@ + else + { + /* Allocate space for result string. */ +- *dest = xmalloc (*len * sizeof (CHARTYPE)); ++ *dest = xmallocarray (*len, sizeof (CHARTYPE)); + + /* Copy string if necessary. */ + memcpy (*dest, src, *len * sizeof (CHARTYPE)); +@@ -442,7 +442,7 @@ + *dest = &zero_length_string; + else + { +- CHARTYPE *tmp = xmalloc (*rlen * sizeof (CHARTYPE)); ++ CHARTYPE *tmp = xmallocarray (*rlen, sizeof (CHARTYPE)); + memcpy (tmp, res, reslen * sizeof (CHARTYPE)); + MEMSET (&tmp[reslen], ' ', *rlen - reslen); + *dest = tmp; +Index: libgfortran/intrinsics/pack_generic.c +=================================================================== +--- a/src/libgfortran/intrinsics/pack_generic.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/intrinsics/pack_generic.c (.../branches/gcc-4_9-branch) +@@ -152,8 +152,8 @@ + GFC_DIMENSION_SET(ret->dim[0], 0, total-1, 1); + + ret->offset = 0; +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (size * total); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (total, size); + + if (total == 0) + return; /* In this case, nothing remains to be done. */ +@@ -519,7 +519,7 @@ + + ret->offset = 0; + +- ret->base_addr = xmalloc (size * total); ++ ret->base_addr = xmallocarray (total, size); + + if (total == 0) + return; +Index: libgfortran/intrinsics/transpose_generic.c +=================================================================== +--- a/src/libgfortran/intrinsics/transpose_generic.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/intrinsics/transpose_generic.c (.../branches/gcc-4_9-branch) +@@ -60,7 +60,7 @@ + GFC_DIMENSION_SET(ret->dim[1], 0, GFC_DESCRIPTOR_EXTENT(source,0) - 1, + GFC_DESCRIPTOR_EXTENT(source, 1)); + +- ret->base_addr = xmalloc (size * size0 ((array_t*)ret)); ++ ret->base_addr = xmallocarray (size0 ((array_t*)ret), size); + ret->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/intrinsics/cshift0.c +=================================================================== +--- a/src/libgfortran/intrinsics/cshift0.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/intrinsics/cshift0.c (.../branches/gcc-4_9-branch) +@@ -79,8 +79,8 @@ + GFC_DIMENSION_SET(ret->dim[i], 0, ub, str); + } + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (size * arraysize); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (arraysize, size); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/intrinsics/spread_generic.c +=================================================================== +--- a/src/libgfortran/intrinsics/spread_generic.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/intrinsics/spread_generic.c (.../branches/gcc-4_9-branch) +@@ -100,7 +100,7 @@ + GFC_DIMENSION_SET(ret->dim[n], 0, ub, stride); + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * size); ++ ret->base_addr = xmallocarray (rs, size); + + if (rs <= 0) + return; +@@ -245,7 +245,7 @@ + + if (ret->base_addr == NULL) + { +- ret->base_addr = xmalloc (ncopies * size); ++ ret->base_addr = xmallocarray (ncopies, size); + ret->offset = 0; + GFC_DIMENSION_SET(ret->dim[0], 0, ncopies - 1, 1); + } +Index: libgfortran/intrinsics/unpack_generic.c +=================================================================== +--- a/src/libgfortran/intrinsics/unpack_generic.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/intrinsics/unpack_generic.c (.../branches/gcc-4_9-branch) +@@ -125,7 +125,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * size); ++ ret->base_addr = xmallocarray (rs, size); + } + else + { +Index: libgfortran/intrinsics/eoshift0.c +=================================================================== +--- a/src/libgfortran/intrinsics/eoshift0.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/intrinsics/eoshift0.c (.../branches/gcc-4_9-branch) +@@ -86,8 +86,8 @@ + + } + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (size * arraysize); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (arraysize, size); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/intrinsics/eoshift2.c +=================================================================== +--- a/src/libgfortran/intrinsics/eoshift2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/intrinsics/eoshift2.c (.../branches/gcc-4_9-branch) +@@ -78,8 +78,8 @@ + ret->offset = 0; + ret->dtype = array->dtype; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (size * arraysize); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (arraysize, size); + + for (i = 0; i < GFC_DESCRIPTOR_RANK (array); i++) + { +Index: libgfortran/intrinsics/reshape_generic.c +=================================================================== +--- a/src/libgfortran/intrinsics/reshape_generic.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/intrinsics/reshape_generic.c (.../branches/gcc-4_9-branch) +@@ -99,11 +99,11 @@ + ret->offset = 0; + + if (unlikely (rs < 1)) +- alloc_size = 1; ++ alloc_size = 0; /* xmalloc will allocate 1 byte. */ + else +- alloc_size = rs * size; ++ alloc_size = rs; + +- ret->base_addr = xmalloc (alloc_size); ++ ret->base_addr = xmallocarray (alloc_size, size); + + ret->dtype = (source->dtype & ~GFC_DTYPE_RANK_MASK) | rdim; + } +Index: libgfortran/ChangeLog +=================================================================== +--- a/src/libgfortran/ChangeLog (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/ChangeLog (.../branches/gcc-4_9-branch) +@@ -1,3 +1,57 @@ ++2014-07-31 Janne Blomqvist ++ ++ Backport from mainline ++ CVE-2014-5044 ++ * libgfortran.h (xmallocarray): New prototype. ++ * runtime/memory.c (xmallocarray): New function. ++ (xcalloc): Check for nonzero separately instead of multiplying. ++ * generated/*.c: Regenerated. ++ * intrinsics/cshift0.c (cshift0): Call xmallocarray instead of ++ xmalloc. ++ * intrinsics/eoshift0.c (eoshift0): Likewise. ++ * intrinsics/eoshift2.c (eoshift2): Likewise. ++ * intrinsics/pack_generic.c (pack_internal): Likewise. ++ (pack_s_internal): Likewise. ++ * intrinsics/reshape_generic.c (reshape_internal): Likewise. ++ * intrinsics/spread_generic.c (spread_internal): Likewise. ++ (spread_internal_scalar): Likewise. ++ * intrinsics/string_intrinsics_inc.c (string_trim): Likewise. ++ (string_minmax): Likewise. ++ * intrinsics/transpose_generic.c (transpose_internal): Likewise. ++ * intrinsics/unpack_generic.c (unpack_internal): Likewise. ++ * io/list_read.c (nml_touch_nodes): Don't cast xmalloc return value. ++ * io/transfer.c (st_set_nml_var): Call xmallocarray instead of ++ xmalloc. ++ * io/unit.c (get_internal_unit): Likewise. ++ (filename_from_unit): Don't cast xmalloc return value. ++ * io/write.c (nml_write_obj): Likewise, formatting. ++ * m4/bessel.m4 (bessel_jn_r'rtype_kind`): Call xmallocarray ++ instead of xmalloc. ++ (besse_yn_r'rtype_kind`): Likewise. ++ * m4/cshift1.m4 (cshift1): Likewise. ++ * m4/eoshift1.m4 (eoshift1): Likewise. ++ * m4/eoshift3.m4 (eoshift3): Likewise. ++ * m4/iforeach.m4: Likewise. ++ * m4/ifunction.m4: Likewise. ++ * m4/ifunction_logical.m4 (name`'rtype_qual`_'atype_code): ++ Likewise. ++ * m4/in_pack.m4 (internal_pack_'rtype_ccode`): Likewise. ++ * m4/matmul.m4 (matmul_'rtype_code`): Likewise. ++ * m4/matmull.m4 (matmul_'rtype_code`): Likewise. ++ * m4/pack.m4 (pack_'rtype_code`): Likewise. ++ * m4/reshape.m4 (reshape_'rtype_ccode`): Likewise. ++ * m4/shape.m4 (shape_'rtype_kind`): Likewise. ++ * m4/spread.m4 (spread_'rtype_code`): Likewise. ++ (spread_scalar_'rtype_code`): Likewise. ++ * m4/transpose.m4 (transpose_'rtype_code`): Likewise. ++ * m4/unpack.m4 (unpack0_'rtype_code`): Likewise. ++ (unpack1_'rtype_code`): Likewise. ++ * runtime/convert_char.c (convert_char1_to_char4): Likewise. ++ (convert_char4_to_char1): Simplify. ++ * runtime/environ.c (init_unformatted): Call xmallocarray instead ++ of xmalloc. ++ * runtime/in_pack_generic.c (internal_pack): Likewise. ++ + 2014-07-16 Release Manager + + * GCC 4.9.1 released. +Index: libgfortran/generated/spread_r10.c +=================================================================== +--- a/src/libgfortran/generated/spread_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/spread_r10.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,8 @@ + } + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (rs * sizeof(GFC_REAL_10)); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (rs, sizeof(GFC_REAL_10)); + if (rs <= 0) + return; + } +@@ -244,7 +244,7 @@ + + if (ret->base_addr == NULL) + { +- ret->base_addr = xmalloc (ncopies * sizeof (GFC_REAL_10)); ++ ret->base_addr = xmallocarray (ncopies, sizeof (GFC_REAL_10)); + ret->offset = 0; + GFC_DIMENSION_SET(ret->dim[0], 0, ncopies - 1, 1); + } +Index: libgfortran/generated/maxloc1_4_r8.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_4_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_4_r8.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/norm2_r4.c +=================================================================== +--- a/src/libgfortran/generated/norm2_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/norm2_r4.c (.../branches/gcc-4_9-branch) +@@ -101,10 +101,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +Index: libgfortran/generated/parity_l2.c +=================================================================== +--- a/src/libgfortran/generated/parity_l2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/parity_l2.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_LOGICAL_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_LOGICAL_2)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +Index: libgfortran/generated/eoshift3_4.c +=================================================================== +--- a/src/libgfortran/generated/eoshift3_4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/eoshift3_4.c (.../branches/gcc-4_9-branch) +@@ -89,7 +89,7 @@ + { + int i; + +- ret->base_addr = xmalloc (size * arraysize); ++ ret->base_addr = xmallocarray (arraysize, size); + ret->offset = 0; + ret->dtype = array->dtype; + for (i = 0; i < GFC_DESCRIPTOR_RANK (array); i++) +@@ -107,8 +107,8 @@ + GFC_DIMENSION_SET(ret->dim[i], 0, ub, str); + + } +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (size * arraysize); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (arraysize, size); + + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/generated/transpose_c8.c +=================================================================== +--- a/src/libgfortran/generated/transpose_c8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/transpose_c8.c (.../branches/gcc-4_9-branch) +@@ -60,7 +60,8 @@ + GFC_DIMENSION_SET(ret->dim[1], 0, GFC_DESCRIPTOR_EXTENT(source,0) - 1, + GFC_DESCRIPTOR_EXTENT(source, 1)); + +- ret->base_addr = xmalloc (sizeof (GFC_COMPLEX_8) * size0 ((array_t *) ret)); ++ ret->base_addr = xmallocarray (size0 ((array_t *) ret), ++ sizeof (GFC_COMPLEX_8)); + ret->offset = 0; + } else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/eoshift1_8.c +=================================================================== +--- a/src/libgfortran/generated/eoshift1_8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/eoshift1_8.c (.../branches/gcc-4_9-branch) +@@ -105,8 +105,8 @@ + GFC_DIMENSION_SET(ret->dim[i], 0, ub, str); + + } +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (size * arraysize); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (arraysize, size); + + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/generated/reshape_r16.c +=================================================================== +--- a/src/libgfortran/generated/reshape_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/reshape_r16.c (.../branches/gcc-4_9-branch) +@@ -111,11 +111,11 @@ + ret->offset = 0; + + if (unlikely (rs < 1)) +- alloc_size = 1; ++ alloc_size = 0; + else +- alloc_size = rs * sizeof (GFC_REAL_16); ++ alloc_size = rs; + +- ret->base_addr = xmalloc (alloc_size); ++ ret->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_16)); + ret->dtype = (source->dtype & ~GFC_DTYPE_RANK_MASK) | rdim; + } + +Index: libgfortran/generated/bessel_r4.c +=================================================================== +--- a/src/libgfortran/generated/bessel_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/bessel_r4.c (.../branches/gcc-4_9-branch) +@@ -55,7 +55,7 @@ + { + size_t size = n2 < n1 ? 0 : n2-n1+1; + GFC_DIMENSION_SET(ret->dim[0], 0, size-1, 1); +- ret->base_addr = xmalloc (sizeof (GFC_REAL_4) * size); ++ ret->base_addr = xmallocarray (size, sizeof (GFC_REAL_4)); + ret->offset = 0; + } + +@@ -122,7 +122,7 @@ + { + size_t size = n2 < n1 ? 0 : n2-n1+1; + GFC_DIMENSION_SET(ret->dim[0], 0, size-1, 1); +- ret->base_addr = xmalloc (sizeof (GFC_REAL_4) * size); ++ ret->base_addr = xmallocarray (size, sizeof (GFC_REAL_4)); + ret->offset = 0; + } + +Index: libgfortran/generated/any_l2.c +=================================================================== +--- a/src/libgfortran/generated/any_l2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/any_l2.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_LOGICAL_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -111,7 +110,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_LOGICAL_2)); + } + else + { +Index: libgfortran/generated/product_r4.c +=================================================================== +--- a/src/libgfortran/generated/product_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/product_r4.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_REAL_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_4)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_4)); + } + else + { +Index: libgfortran/generated/iany_i1.c +=================================================================== +--- a/src/libgfortran/generated/iany_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/iany_i1.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_1)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_1)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_1)); + } + else + { +Index: libgfortran/generated/parity_l16.c +=================================================================== +--- a/src/libgfortran/generated/parity_l16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/parity_l16.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_LOGICAL_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_LOGICAL_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +Index: libgfortran/generated/in_pack_r4.c +=================================================================== +--- a/src/libgfortran/generated/in_pack_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/in_pack_r4.c (.../branches/gcc-4_9-branch) +@@ -76,7 +76,7 @@ + return source->base_addr; + + /* Allocate storage for the destination. */ +- destptr = (GFC_REAL_4 *)xmalloc (ssize * sizeof (GFC_REAL_4)); ++ destptr = xmallocarray (ssize, sizeof (GFC_REAL_4)); + dest = destptr; + src = source->base_addr; + stride0 = stride[0]; +Index: libgfortran/generated/product_i2.c +=================================================================== +--- a/src/libgfortran/generated/product_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/product_i2.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_2)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_2)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_2)); + } + else + { +Index: libgfortran/generated/iparity_i4.c +=================================================================== +--- a/src/libgfortran/generated/iparity_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/iparity_i4.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/minloc0_4_i1.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_4_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_4_i1.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/reshape_c4.c +=================================================================== +--- a/src/libgfortran/generated/reshape_c4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/reshape_c4.c (.../branches/gcc-4_9-branch) +@@ -111,11 +111,11 @@ + ret->offset = 0; + + if (unlikely (rs < 1)) +- alloc_size = 1; ++ alloc_size = 0; + else +- alloc_size = rs * sizeof (GFC_COMPLEX_4); ++ alloc_size = rs; + +- ret->base_addr = xmalloc (alloc_size); ++ ret->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_4)); + ret->dtype = (source->dtype & ~GFC_DTYPE_RANK_MASK) | rdim; + } + +Index: libgfortran/generated/maxloc0_4_r16.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_4_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_4_r16.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/iall_i8.c +=================================================================== +--- a/src/libgfortran/generated/iall_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/iall_i8.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/maxloc1_8_r16.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_8_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_8_r16.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/sum_r16.c +=================================================================== +--- a/src/libgfortran/generated/sum_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/sum_r16.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_REAL_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_16)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_16)); + } + else + { +Index: libgfortran/generated/sum_i1.c +=================================================================== +--- a/src/libgfortran/generated/sum_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/sum_i1.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_1)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_1)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_1)); + } + else + { +Index: libgfortran/generated/in_pack_i2.c +=================================================================== +--- a/src/libgfortran/generated/in_pack_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/in_pack_i2.c (.../branches/gcc-4_9-branch) +@@ -76,7 +76,7 @@ + return source->base_addr; + + /* Allocate storage for the destination. */ +- destptr = (GFC_INTEGER_2 *)xmalloc (ssize * sizeof (GFC_INTEGER_2)); ++ destptr = xmallocarray (ssize, sizeof (GFC_INTEGER_2)); + dest = destptr; + src = source->base_addr; + stride0 = stride[0]; +Index: libgfortran/generated/transpose_r10.c +=================================================================== +--- a/src/libgfortran/generated/transpose_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/transpose_r10.c (.../branches/gcc-4_9-branch) +@@ -60,7 +60,8 @@ + GFC_DIMENSION_SET(ret->dim[1], 0, GFC_DESCRIPTOR_EXTENT(source,0) - 1, + GFC_DESCRIPTOR_EXTENT(source, 1)); + +- ret->base_addr = xmalloc (sizeof (GFC_REAL_10) * size0 ((array_t *) ret)); ++ ret->base_addr = xmallocarray (size0 ((array_t *) ret), ++ sizeof (GFC_REAL_10)); + ret->offset = 0; + } else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/maxloc1_16_r16.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_16_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_16_r16.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/maxloc1_16_i4.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_16_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_16_i4.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/spread_i1.c +=================================================================== +--- a/src/libgfortran/generated/spread_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/spread_i1.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,8 @@ + } + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (rs * sizeof(GFC_INTEGER_1)); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (rs, sizeof(GFC_INTEGER_1)); + if (rs <= 0) + return; + } +@@ -244,7 +244,7 @@ + + if (ret->base_addr == NULL) + { +- ret->base_addr = xmalloc (ncopies * sizeof (GFC_INTEGER_1)); ++ ret->base_addr = xmallocarray (ncopies, sizeof (GFC_INTEGER_1)); + ret->offset = 0; + GFC_DIMENSION_SET(ret->dim[0], 0, ncopies - 1, 1); + } +Index: libgfortran/generated/maxloc0_16_i8.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_16_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_16_i8.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/maxval_r16.c +=================================================================== +--- a/src/libgfortran/generated/maxval_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxval_r16.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -286,8 +285,7 @@ + + } + +- alloc_size = sizeof (GFC_REAL_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -299,7 +297,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_16)); + + } + else +@@ -472,8 +470,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -482,7 +479,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_16)); + } + else + { +Index: libgfortran/generated/product_c10.c +=================================================================== +--- a/src/libgfortran/generated/product_c10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/product_c10.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_COMPLEX_10) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_10)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_COMPLEX_10) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_10)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_COMPLEX_10) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_10)); + } + else + { +Index: libgfortran/generated/minloc1_8_i4.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_8_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_8_i4.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/minloc0_16_i16.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_16_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_16_i16.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/matmul_r16.c +=================================================================== +--- a/src/libgfortran/generated/matmul_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/matmul_r16.c (.../branches/gcc-4_9-branch) +@@ -124,7 +124,7 @@ + } + + retarray->base_addr +- = xmalloc (sizeof (GFC_REAL_16) * size0 ((array_t *) retarray)); ++ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_16)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/generated/minloc0_4_r4.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_4_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_4_r4.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/iany_i2.c +=================================================================== +--- a/src/libgfortran/generated/iany_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/iany_i2.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_2)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_2)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_2)); + } + else + { +Index: libgfortran/generated/sum_r4.c +=================================================================== +--- a/src/libgfortran/generated/sum_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/sum_r4.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_REAL_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_4)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_4)); + } + else + { +Index: libgfortran/generated/unpack_c8.c +=================================================================== +--- a/src/libgfortran/generated/unpack_c8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/unpack_c8.c (.../branches/gcc-4_9-branch) +@@ -99,7 +99,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_COMPLEX_8)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_COMPLEX_8)); + } + else + { +@@ -244,7 +244,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_COMPLEX_8)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_COMPLEX_8)); + } + else + { +Index: libgfortran/generated/in_pack_c16.c +=================================================================== +--- a/src/libgfortran/generated/in_pack_c16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/in_pack_c16.c (.../branches/gcc-4_9-branch) +@@ -76,7 +76,7 @@ + return source->base_addr; + + /* Allocate storage for the destination. */ +- destptr = (GFC_COMPLEX_16 *)xmalloc (ssize * sizeof (GFC_COMPLEX_16)); ++ destptr = xmallocarray (ssize, sizeof (GFC_COMPLEX_16)); + dest = destptr; + src = source->base_addr; + stride0 = stride[0]; +Index: libgfortran/generated/minloc0_4_i2.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_4_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_4_i2.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/spread_c10.c +=================================================================== +--- a/src/libgfortran/generated/spread_c10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/spread_c10.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,8 @@ + } + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (rs * sizeof(GFC_COMPLEX_10)); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (rs, sizeof(GFC_COMPLEX_10)); + if (rs <= 0) + return; + } +@@ -244,7 +244,7 @@ + + if (ret->base_addr == NULL) + { +- ret->base_addr = xmalloc (ncopies * sizeof (GFC_COMPLEX_10)); ++ ret->base_addr = xmallocarray (ncopies, sizeof (GFC_COMPLEX_10)); + ret->offset = 0; + GFC_DIMENSION_SET(ret->dim[0], 0, ncopies - 1, 1); + } +Index: libgfortran/generated/maxloc0_8_i1.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_8_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_8_i1.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/spread_r4.c +=================================================================== +--- a/src/libgfortran/generated/spread_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/spread_r4.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,8 @@ + } + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (rs * sizeof(GFC_REAL_4)); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (rs, sizeof(GFC_REAL_4)); + if (rs <= 0) + return; + } +@@ -244,7 +244,7 @@ + + if (ret->base_addr == NULL) + { +- ret->base_addr = xmalloc (ncopies * sizeof (GFC_REAL_4)); ++ ret->base_addr = xmallocarray (ncopies, sizeof (GFC_REAL_4)); + ret->offset = 0; + GFC_DIMENSION_SET(ret->dim[0], 0, ncopies - 1, 1); + } +Index: libgfortran/generated/minloc0_8_i8.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_8_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_8_i8.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/matmul_c8.c +=================================================================== +--- a/src/libgfortran/generated/matmul_c8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/matmul_c8.c (.../branches/gcc-4_9-branch) +@@ -124,7 +124,7 @@ + } + + retarray->base_addr +- = xmalloc (sizeof (GFC_COMPLEX_8) * size0 ((array_t *) retarray)); ++ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_8)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/generated/minloc1_16_r10.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_16_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_16_r10.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/sum_i2.c +=================================================================== +--- a/src/libgfortran/generated/sum_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/sum_i2.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_2)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_2)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_2)); + } + else + { +Index: libgfortran/generated/iparity_i16.c +=================================================================== +--- a/src/libgfortran/generated/iparity_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/iparity_i16.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/minloc0_16_i1.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_16_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_16_i1.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/reshape_c16.c +=================================================================== +--- a/src/libgfortran/generated/reshape_c16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/reshape_c16.c (.../branches/gcc-4_9-branch) +@@ -111,11 +111,11 @@ + ret->offset = 0; + + if (unlikely (rs < 1)) +- alloc_size = 1; ++ alloc_size = 0; + else +- alloc_size = rs * sizeof (GFC_COMPLEX_16); ++ alloc_size = rs; + +- ret->base_addr = xmalloc (alloc_size); ++ ret->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_16)); + ret->dtype = (source->dtype & ~GFC_DTYPE_RANK_MASK) | rdim; + } + +Index: libgfortran/generated/pack_c4.c +=================================================================== +--- a/src/libgfortran/generated/pack_c4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/pack_c4.c (.../branches/gcc-4_9-branch) +@@ -167,8 +167,8 @@ + + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (sizeof (GFC_COMPLEX_4) * total); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (total, sizeof (GFC_COMPLEX_4)); + + if (total == 0) + return; +Index: libgfortran/generated/parity_l4.c +=================================================================== +--- a/src/libgfortran/generated/parity_l4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/parity_l4.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_LOGICAL_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_LOGICAL_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +Index: libgfortran/generated/spread_i2.c +=================================================================== +--- a/src/libgfortran/generated/spread_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/spread_i2.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,8 @@ + } + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (rs * sizeof(GFC_INTEGER_2)); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (rs, sizeof(GFC_INTEGER_2)); + if (rs <= 0) + return; + } +@@ -244,7 +244,7 @@ + + if (ret->base_addr == NULL) + { +- ret->base_addr = xmalloc (ncopies * sizeof (GFC_INTEGER_2)); ++ ret->base_addr = xmallocarray (ncopies, sizeof (GFC_INTEGER_2)); + ret->offset = 0; + GFC_DIMENSION_SET(ret->dim[0], 0, ncopies - 1, 1); + } +Index: libgfortran/generated/any_l4.c +=================================================================== +--- a/src/libgfortran/generated/any_l4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/any_l4.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_LOGICAL_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -111,7 +110,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_LOGICAL_4)); + } + else + { +Index: libgfortran/generated/maxloc1_4_i8.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_4_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_4_i8.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/maxloc0_8_r4.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_8_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_8_r4.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/maxloc1_4_i16.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_4_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_4_i16.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/minloc0_4_r10.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_4_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_4_r10.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/minloc0_8_i16.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_8_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_8_i16.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/minloc1_8_r10.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_8_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_8_r10.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/minloc0_16_r4.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_16_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_16_r4.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/product_i4.c +=================================================================== +--- a/src/libgfortran/generated/product_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/product_i4.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/sum_c16.c +=================================================================== +--- a/src/libgfortran/generated/sum_c16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/sum_c16.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_COMPLEX_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_COMPLEX_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_16)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_COMPLEX_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_16)); + } + else + { +Index: libgfortran/generated/transpose_c10.c +=================================================================== +--- a/src/libgfortran/generated/transpose_c10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/transpose_c10.c (.../branches/gcc-4_9-branch) +@@ -60,7 +60,8 @@ + GFC_DIMENSION_SET(ret->dim[1], 0, GFC_DESCRIPTOR_EXTENT(source,0) - 1, + GFC_DESCRIPTOR_EXTENT(source, 1)); + +- ret->base_addr = xmalloc (sizeof (GFC_COMPLEX_10) * size0 ((array_t *) ret)); ++ ret->base_addr = xmallocarray (size0 ((array_t *) ret), ++ sizeof (GFC_COMPLEX_10)); + ret->offset = 0; + } else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/maxloc1_16_r8.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_16_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_16_r8.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/transpose_r4.c +=================================================================== +--- a/src/libgfortran/generated/transpose_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/transpose_r4.c (.../branches/gcc-4_9-branch) +@@ -60,7 +60,8 @@ + GFC_DIMENSION_SET(ret->dim[1], 0, GFC_DESCRIPTOR_EXTENT(source,0) - 1, + GFC_DESCRIPTOR_EXTENT(source, 1)); + +- ret->base_addr = xmalloc (sizeof (GFC_REAL_4) * size0 ((array_t *) ret)); ++ ret->base_addr = xmallocarray (size0 ((array_t *) ret), ++ sizeof (GFC_REAL_4)); + ret->offset = 0; + } else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/cshift1_4.c +=================================================================== +--- a/src/libgfortran/generated/cshift1_4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/cshift1_4.c (.../branches/gcc-4_9-branch) +@@ -80,7 +80,7 @@ + { + int i; + +- ret->base_addr = xmalloc (size * arraysize); ++ ret->base_addr = xmallocarray (arraysize, size); + ret->offset = 0; + ret->dtype = array->dtype; + for (i = 0; i < GFC_DESCRIPTOR_RANK (array); i++) +Index: libgfortran/generated/maxloc0_8_i2.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_8_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_8_i2.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/count_8_l.c +=================================================================== +--- a/src/libgfortran/generated/count_8_l.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/count_8_l.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -111,7 +110,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/in_pack_i4.c +=================================================================== +--- a/src/libgfortran/generated/in_pack_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/in_pack_i4.c (.../branches/gcc-4_9-branch) +@@ -76,7 +76,7 @@ + return source->base_addr; + + /* Allocate storage for the destination. */ +- destptr = (GFC_INTEGER_4 *)xmalloc (ssize * sizeof (GFC_INTEGER_4)); ++ destptr = xmallocarray (ssize, sizeof (GFC_INTEGER_4)); + dest = destptr; + src = source->base_addr; + stride0 = stride[0]; +Index: libgfortran/generated/minloc0_16_i2.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_16_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_16_i2.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/minloc1_8_r8.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_8_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_8_r8.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/matmul_c16.c +=================================================================== +--- a/src/libgfortran/generated/matmul_c16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/matmul_c16.c (.../branches/gcc-4_9-branch) +@@ -124,7 +124,7 @@ + } + + retarray->base_addr +- = xmalloc (sizeof (GFC_COMPLEX_16) * size0 ((array_t *) retarray)); ++ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_16)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/generated/minval_i1.c +=================================================================== +--- a/src/libgfortran/generated/minval_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minval_i1.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_1)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -286,8 +285,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -299,7 +297,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_1)); + + } + else +@@ -472,8 +470,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -482,7 +479,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_1)); + } + else + { +Index: libgfortran/generated/shape_i16.c +=================================================================== +--- a/src/libgfortran/generated/shape_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/shape_i16.c (.../branches/gcc-4_9-branch) +@@ -49,7 +49,7 @@ + { + GFC_DIMENSION_SET(ret->dim[0], 0, rank - 1, 1); + ret->offset = 0; +- ret->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ ret->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + + stride = GFC_DESCRIPTOR_STRIDE(ret,0); +Index: libgfortran/generated/iany_i4.c +=================================================================== +--- a/src/libgfortran/generated/iany_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/iany_i4.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/minloc0_16_r16.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_16_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_16_r16.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/product_i16.c +=================================================================== +--- a/src/libgfortran/generated/product_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/product_i16.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/unpack_i1.c +=================================================================== +--- a/src/libgfortran/generated/unpack_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/unpack_i1.c (.../branches/gcc-4_9-branch) +@@ -99,7 +99,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_INTEGER_1)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_INTEGER_1)); + } + else + { +@@ -244,7 +244,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_INTEGER_1)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_INTEGER_1)); + } + else + { +Index: libgfortran/generated/minloc0_4_i4.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_4_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_4_i4.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/matmul_i1.c +=================================================================== +--- a/src/libgfortran/generated/matmul_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/matmul_i1.c (.../branches/gcc-4_9-branch) +@@ -124,7 +124,7 @@ + } + + retarray->base_addr +- = xmalloc (sizeof (GFC_INTEGER_1) * size0 ((array_t *) retarray)); ++ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_1)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/generated/minval_r4.c +=================================================================== +--- a/src/libgfortran/generated/minval_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minval_r4.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -286,8 +285,7 @@ + + } + +- alloc_size = sizeof (GFC_REAL_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -299,7 +297,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_4)); + + } + else +@@ -472,8 +470,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -482,7 +479,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_4)); + } + else + { +Index: libgfortran/generated/spread_i16.c +=================================================================== +--- a/src/libgfortran/generated/spread_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/spread_i16.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,8 @@ + } + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (rs * sizeof(GFC_INTEGER_16)); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (rs, sizeof(GFC_INTEGER_16)); + if (rs <= 0) + return; + } +@@ -244,7 +244,7 @@ + + if (ret->base_addr == NULL) + { +- ret->base_addr = xmalloc (ncopies * sizeof (GFC_INTEGER_16)); ++ ret->base_addr = xmallocarray (ncopies, sizeof (GFC_INTEGER_16)); + ret->offset = 0; + GFC_DIMENSION_SET(ret->dim[0], 0, ncopies - 1, 1); + } +Index: libgfortran/generated/sum_i4.c +=================================================================== +--- a/src/libgfortran/generated/sum_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/sum_i4.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/unpack_r10.c +=================================================================== +--- a/src/libgfortran/generated/unpack_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/unpack_r10.c (.../branches/gcc-4_9-branch) +@@ -99,7 +99,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_REAL_10)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_REAL_10)); + } + else + { +@@ -244,7 +244,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_REAL_10)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_REAL_10)); + } + else + { +Index: libgfortran/generated/bessel_r16.c +=================================================================== +--- a/src/libgfortran/generated/bessel_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/bessel_r16.c (.../branches/gcc-4_9-branch) +@@ -59,7 +59,7 @@ + { + size_t size = n2 < n1 ? 0 : n2-n1+1; + GFC_DIMENSION_SET(ret->dim[0], 0, size-1, 1); +- ret->base_addr = xmalloc (sizeof (GFC_REAL_16) * size); ++ ret->base_addr = xmallocarray (size, sizeof (GFC_REAL_16)); + ret->offset = 0; + } + +@@ -126,7 +126,7 @@ + { + size_t size = n2 < n1 ? 0 : n2-n1+1; + GFC_DIMENSION_SET(ret->dim[0], 0, size-1, 1); +- ret->base_addr = xmalloc (sizeof (GFC_REAL_16) * size); ++ ret->base_addr = xmallocarray (size, sizeof (GFC_REAL_16)); + ret->offset = 0; + } + +Index: libgfortran/generated/norm2_r8.c +=================================================================== +--- a/src/libgfortran/generated/norm2_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/norm2_r8.c (.../branches/gcc-4_9-branch) +@@ -101,10 +101,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +Index: libgfortran/generated/spread_i4.c +=================================================================== +--- a/src/libgfortran/generated/spread_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/spread_i4.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,8 @@ + } + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (rs * sizeof(GFC_INTEGER_4)); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (rs, sizeof(GFC_INTEGER_4)); + if (rs <= 0) + return; + } +@@ -244,7 +244,7 @@ + + if (ret->base_addr == NULL) + { +- ret->base_addr = xmalloc (ncopies * sizeof (GFC_INTEGER_4)); ++ ret->base_addr = xmallocarray (ncopies, sizeof (GFC_INTEGER_4)); + ret->offset = 0; + GFC_DIMENSION_SET(ret->dim[0], 0, ncopies - 1, 1); + } +Index: libgfortran/generated/eoshift3_8.c +=================================================================== +--- a/src/libgfortran/generated/eoshift3_8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/eoshift3_8.c (.../branches/gcc-4_9-branch) +@@ -89,7 +89,7 @@ + { + int i; + +- ret->base_addr = xmalloc (size * arraysize); ++ ret->base_addr = xmallocarray (arraysize, size); + ret->offset = 0; + ret->dtype = array->dtype; + for (i = 0; i < GFC_DESCRIPTOR_RANK (array); i++) +@@ -107,8 +107,8 @@ + GFC_DIMENSION_SET(ret->dim[i], 0, ub, str); + + } +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (size * arraysize); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (arraysize, size); + + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/generated/minloc1_4_i1.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_4_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_4_i1.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/minval_i2.c +=================================================================== +--- a/src/libgfortran/generated/minval_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minval_i2.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_2)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -286,8 +285,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -299,7 +297,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_2)); + + } + else +@@ -472,8 +470,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -482,7 +479,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_2)); + } + else + { +Index: libgfortran/generated/bessel_r8.c +=================================================================== +--- a/src/libgfortran/generated/bessel_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/bessel_r8.c (.../branches/gcc-4_9-branch) +@@ -55,7 +55,7 @@ + { + size_t size = n2 < n1 ? 0 : n2-n1+1; + GFC_DIMENSION_SET(ret->dim[0], 0, size-1, 1); +- ret->base_addr = xmalloc (sizeof (GFC_REAL_8) * size); ++ ret->base_addr = xmallocarray (size, sizeof (GFC_REAL_8)); + ret->offset = 0; + } + +@@ -122,7 +122,7 @@ + { + size_t size = n2 < n1 ? 0 : n2-n1+1; + GFC_DIMENSION_SET(ret->dim[0], 0, size-1, 1); +- ret->base_addr = xmalloc (sizeof (GFC_REAL_8) * size); ++ ret->base_addr = xmallocarray (size, sizeof (GFC_REAL_8)); + ret->offset = 0; + } + +Index: libgfortran/generated/unpack_r4.c +=================================================================== +--- a/src/libgfortran/generated/unpack_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/unpack_r4.c (.../branches/gcc-4_9-branch) +@@ -99,7 +99,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_REAL_4)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_REAL_4)); + } + else + { +@@ -244,7 +244,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_REAL_4)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_REAL_4)); + } + else + { +Index: libgfortran/generated/product_r8.c +=================================================================== +--- a/src/libgfortran/generated/product_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/product_r8.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_REAL_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_8)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_8)); + } + else + { +Index: libgfortran/generated/matmul_r4.c +=================================================================== +--- a/src/libgfortran/generated/matmul_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/matmul_r4.c (.../branches/gcc-4_9-branch) +@@ -124,7 +124,7 @@ + } + + retarray->base_addr +- = xmalloc (sizeof (GFC_REAL_4) * size0 ((array_t *) retarray)); ++ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_4)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/generated/unpack_i2.c +=================================================================== +--- a/src/libgfortran/generated/unpack_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/unpack_i2.c (.../branches/gcc-4_9-branch) +@@ -99,7 +99,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_INTEGER_2)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_INTEGER_2)); + } + else + { +@@ -244,7 +244,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_INTEGER_2)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_INTEGER_2)); + } + else + { +Index: libgfortran/generated/in_pack_r8.c +=================================================================== +--- a/src/libgfortran/generated/in_pack_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/in_pack_r8.c (.../branches/gcc-4_9-branch) +@@ -76,7 +76,7 @@ + return source->base_addr; + + /* Allocate storage for the destination. */ +- destptr = (GFC_REAL_8 *)xmalloc (ssize * sizeof (GFC_REAL_8)); ++ destptr = xmallocarray (ssize, sizeof (GFC_REAL_8)); + dest = destptr; + src = source->base_addr; + stride0 = stride[0]; +Index: libgfortran/generated/maxloc1_4_r16.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_4_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_4_r16.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/minloc0_8_r16.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_8_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_8_r16.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/reshape_c8.c +=================================================================== +--- a/src/libgfortran/generated/reshape_c8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/reshape_c8.c (.../branches/gcc-4_9-branch) +@@ -111,11 +111,11 @@ + ret->offset = 0; + + if (unlikely (rs < 1)) +- alloc_size = 1; ++ alloc_size = 0; + else +- alloc_size = rs * sizeof (GFC_COMPLEX_8); ++ alloc_size = rs; + +- ret->base_addr = xmalloc (alloc_size); ++ ret->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_8)); + ret->dtype = (source->dtype & ~GFC_DTYPE_RANK_MASK) | rdim; + } + +Index: libgfortran/generated/iparity_i8.c +=================================================================== +--- a/src/libgfortran/generated/iparity_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/iparity_i8.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/count_1_l.c +=================================================================== +--- a/src/libgfortran/generated/count_1_l.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/count_1_l.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -111,7 +110,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_1)); + } + else + { +Index: libgfortran/generated/maxloc0_8_i4.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_8_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_8_i4.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/matmul_i2.c +=================================================================== +--- a/src/libgfortran/generated/matmul_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/matmul_i2.c (.../branches/gcc-4_9-branch) +@@ -124,7 +124,7 @@ + } + + retarray->base_addr +- = xmalloc (sizeof (GFC_INTEGER_2) * size0 ((array_t *) retarray)); ++ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_2)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/generated/minloc1_4_r4.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_4_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_4_r4.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/transpose_i16.c +=================================================================== +--- a/src/libgfortran/generated/transpose_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/transpose_i16.c (.../branches/gcc-4_9-branch) +@@ -60,7 +60,8 @@ + GFC_DIMENSION_SET(ret->dim[1], 0, GFC_DESCRIPTOR_EXTENT(source,0) - 1, + GFC_DESCRIPTOR_EXTENT(source, 1)); + +- ret->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * size0 ((array_t *) ret)); ++ ret->base_addr = xmallocarray (size0 ((array_t *) ret), ++ sizeof (GFC_INTEGER_16)); + ret->offset = 0; + } else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/minloc0_16_i4.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_16_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_16_i4.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/transpose_i4.c +=================================================================== +--- a/src/libgfortran/generated/transpose_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/transpose_i4.c (.../branches/gcc-4_9-branch) +@@ -60,7 +60,8 @@ + GFC_DIMENSION_SET(ret->dim[1], 0, GFC_DESCRIPTOR_EXTENT(source,0) - 1, + GFC_DESCRIPTOR_EXTENT(source, 1)); + +- ret->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * size0 ((array_t *) ret)); ++ ret->base_addr = xmallocarray (size0 ((array_t *) ret), ++ sizeof (GFC_INTEGER_4)); + ret->offset = 0; + } else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/maxloc1_16_i8.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_16_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_16_i8.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/minloc1_4_i2.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_4_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_4_i2.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/matmul_l16.c +=================================================================== +--- a/src/libgfortran/generated/matmul_l16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/matmul_l16.c (.../branches/gcc-4_9-branch) +@@ -88,7 +88,7 @@ + } + + retarray->base_addr +- = xmalloc (sizeof (GFC_LOGICAL_16) * size0 ((array_t *) retarray)); ++ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_LOGICAL_16)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/generated/maxloc1_8_i1.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_8_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_8_i1.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/minloc1_8_i8.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_8_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_8_i8.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/minloc0_4_r8.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_4_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_4_r8.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/product_r16.c +=================================================================== +--- a/src/libgfortran/generated/product_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/product_r16.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_REAL_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_16)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_16)); + } + else + { +Index: libgfortran/generated/sum_r8.c +=================================================================== +--- a/src/libgfortran/generated/sum_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/sum_r8.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_REAL_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_8)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_8)); + } + else + { +Index: libgfortran/generated/norm2_r10.c +=================================================================== +--- a/src/libgfortran/generated/norm2_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/norm2_r10.c (.../branches/gcc-4_9-branch) +@@ -101,10 +101,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_10) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_10)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +Index: libgfortran/generated/unpack_c10.c +=================================================================== +--- a/src/libgfortran/generated/unpack_c10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/unpack_c10.c (.../branches/gcc-4_9-branch) +@@ -99,7 +99,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_COMPLEX_10)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_COMPLEX_10)); + } + else + { +@@ -244,7 +244,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_COMPLEX_10)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_COMPLEX_10)); + } + else + { +Index: libgfortran/generated/spread_r8.c +=================================================================== +--- a/src/libgfortran/generated/spread_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/spread_r8.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,8 @@ + } + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (rs * sizeof(GFC_REAL_8)); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (rs, sizeof(GFC_REAL_8)); + if (rs <= 0) + return; + } +@@ -244,7 +244,7 @@ + + if (ret->base_addr == NULL) + { +- ret->base_addr = xmalloc (ncopies * sizeof (GFC_REAL_8)); ++ ret->base_addr = xmallocarray (ncopies, sizeof (GFC_REAL_8)); + ret->offset = 0; + GFC_DIMENSION_SET(ret->dim[0], 0, ncopies - 1, 1); + } +Index: libgfortran/generated/minloc1_16_i16.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_16_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_16_i16.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/maxloc1_8_r4.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_8_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_8_r4.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/minloc1_16_i1.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_16_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_16_i1.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/spread_r16.c +=================================================================== +--- a/src/libgfortran/generated/spread_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/spread_r16.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,8 @@ + } + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (rs * sizeof(GFC_REAL_16)); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (rs, sizeof(GFC_REAL_16)); + if (rs <= 0) + return; + } +@@ -244,7 +244,7 @@ + + if (ret->base_addr == NULL) + { +- ret->base_addr = xmalloc (ncopies * sizeof (GFC_REAL_16)); ++ ret->base_addr = xmallocarray (ncopies, sizeof (GFC_REAL_16)); + ret->offset = 0; + GFC_DIMENSION_SET(ret->dim[0], 0, ncopies - 1, 1); + } +Index: libgfortran/generated/pack_c8.c +=================================================================== +--- a/src/libgfortran/generated/pack_c8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/pack_c8.c (.../branches/gcc-4_9-branch) +@@ -167,8 +167,8 @@ + + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (sizeof (GFC_COMPLEX_8) * total); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (total, sizeof (GFC_COMPLEX_8)); + + if (total == 0) + return; +Index: libgfortran/generated/minval_r10.c +=================================================================== +--- a/src/libgfortran/generated/minval_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minval_r10.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_10) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_10)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -286,8 +285,7 @@ + + } + +- alloc_size = sizeof (GFC_REAL_10) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -299,7 +297,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_10)); + + } + else +@@ -472,8 +470,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_10) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -482,7 +479,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_10)); + } + else + { +Index: libgfortran/generated/parity_l8.c +=================================================================== +--- a/src/libgfortran/generated/parity_l8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/parity_l8.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_LOGICAL_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_LOGICAL_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +Index: libgfortran/generated/minval_i4.c +=================================================================== +--- a/src/libgfortran/generated/minval_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minval_i4.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -286,8 +285,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -299,7 +297,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -472,8 +470,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -482,7 +479,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/maxloc1_8_i2.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_8_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_8_i2.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/any_l8.c +=================================================================== +--- a/src/libgfortran/generated/any_l8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/any_l8.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_LOGICAL_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -111,7 +110,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_LOGICAL_8)); + } + else + { +Index: libgfortran/generated/maxloc0_16_r10.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_16_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_16_r10.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/minloc0_4_i16.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_4_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_4_i16.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/maxloc0_8_r8.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_8_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_8_r8.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/minloc1_4_r10.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_4_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_4_r10.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/minloc1_8_i16.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_8_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_8_i16.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/maxloc0_8_r10.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_8_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_8_r10.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/unpack_i4.c +=================================================================== +--- a/src/libgfortran/generated/unpack_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/unpack_i4.c (.../branches/gcc-4_9-branch) +@@ -99,7 +99,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_INTEGER_4)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -244,7 +244,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_INTEGER_4)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/minloc1_16_r4.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_16_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_16_r4.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/product_i8.c +=================================================================== +--- a/src/libgfortran/generated/product_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/product_i8.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/minloc0_16_r8.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_16_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_16_r8.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/count_2_l.c +=================================================================== +--- a/src/libgfortran/generated/count_2_l.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/count_2_l.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -111,7 +110,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_2)); + } + else + { +Index: libgfortran/generated/transpose_r8.c +=================================================================== +--- a/src/libgfortran/generated/transpose_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/transpose_r8.c (.../branches/gcc-4_9-branch) +@@ -60,7 +60,8 @@ + GFC_DIMENSION_SET(ret->dim[1], 0, GFC_DESCRIPTOR_EXTENT(source,0) - 1, + GFC_DESCRIPTOR_EXTENT(source, 1)); + +- ret->base_addr = xmalloc (sizeof (GFC_REAL_8) * size0 ((array_t *) ret)); ++ ret->base_addr = xmallocarray (size0 ((array_t *) ret), ++ sizeof (GFC_REAL_8)); + ret->offset = 0; + } else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/cshift1_8.c +=================================================================== +--- a/src/libgfortran/generated/cshift1_8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/cshift1_8.c (.../branches/gcc-4_9-branch) +@@ -80,7 +80,7 @@ + { + int i; + +- ret->base_addr = xmalloc (size * arraysize); ++ ret->base_addr = xmallocarray (arraysize, size); + ret->offset = 0; + ret->dtype = array->dtype; + for (i = 0; i < GFC_DESCRIPTOR_RANK (array); i++) +Index: libgfortran/generated/matmul_i4.c +=================================================================== +--- a/src/libgfortran/generated/matmul_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/matmul_i4.c (.../branches/gcc-4_9-branch) +@@ -124,7 +124,7 @@ + } + + retarray->base_addr +- = xmalloc (sizeof (GFC_INTEGER_4) * size0 ((array_t *) retarray)); ++ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_4)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/generated/pack_r10.c +=================================================================== +--- a/src/libgfortran/generated/pack_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/pack_r10.c (.../branches/gcc-4_9-branch) +@@ -167,8 +167,8 @@ + + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (sizeof (GFC_REAL_10) * total); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (total, sizeof (GFC_REAL_10)); + + if (total == 0) + return; +Index: libgfortran/generated/minloc1_16_i2.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_16_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_16_i2.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/in_pack_i8.c +=================================================================== +--- a/src/libgfortran/generated/in_pack_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/in_pack_i8.c (.../branches/gcc-4_9-branch) +@@ -76,7 +76,7 @@ + return source->base_addr; + + /* Allocate storage for the destination. */ +- destptr = (GFC_INTEGER_8 *)xmalloc (ssize * sizeof (GFC_INTEGER_8)); ++ destptr = xmallocarray (ssize, sizeof (GFC_INTEGER_8)); + dest = destptr; + src = source->base_addr; + stride0 = stride[0]; +Index: libgfortran/generated/transpose_r16.c +=================================================================== +--- a/src/libgfortran/generated/transpose_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/transpose_r16.c (.../branches/gcc-4_9-branch) +@@ -60,7 +60,8 @@ + GFC_DIMENSION_SET(ret->dim[1], 0, GFC_DESCRIPTOR_EXTENT(source,0) - 1, + GFC_DESCRIPTOR_EXTENT(source, 1)); + +- ret->base_addr = xmalloc (sizeof (GFC_REAL_16) * size0 ((array_t *) ret)); ++ ret->base_addr = xmallocarray (size0 ((array_t *) ret), ++ sizeof (GFC_REAL_16)); + ret->offset = 0; + } else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/minloc1_4_i4.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_4_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_4_i4.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/maxval_i1.c +=================================================================== +--- a/src/libgfortran/generated/maxval_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxval_i1.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_1)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -286,8 +285,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -299,7 +297,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_1)); + + } + else +@@ -472,8 +470,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -482,7 +479,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_1)); + } + else + { +Index: libgfortran/generated/product_c16.c +=================================================================== +--- a/src/libgfortran/generated/product_c16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/product_c16.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_COMPLEX_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_COMPLEX_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_16)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_COMPLEX_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_16)); + } + else + { +Index: libgfortran/generated/reshape_r4.c +=================================================================== +--- a/src/libgfortran/generated/reshape_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/reshape_r4.c (.../branches/gcc-4_9-branch) +@@ -111,11 +111,11 @@ + ret->offset = 0; + + if (unlikely (rs < 1)) +- alloc_size = 1; ++ alloc_size = 0; + else +- alloc_size = rs * sizeof (GFC_REAL_4); ++ alloc_size = rs; + +- ret->base_addr = xmalloc (alloc_size); ++ ret->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_4)); + ret->dtype = (source->dtype & ~GFC_DTYPE_RANK_MASK) | rdim; + } + +Index: libgfortran/generated/iany_i8.c +=================================================================== +--- a/src/libgfortran/generated/iany_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/iany_i8.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/cshift1_16.c +=================================================================== +--- a/src/libgfortran/generated/cshift1_16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/cshift1_16.c (.../branches/gcc-4_9-branch) +@@ -80,7 +80,7 @@ + { + int i; + +- ret->base_addr = xmalloc (size * arraysize); ++ ret->base_addr = xmallocarray (arraysize, size); + ret->offset = 0; + ret->dtype = array->dtype; + for (i = 0; i < GFC_DESCRIPTOR_RANK (array); i++) +Index: libgfortran/generated/maxloc0_4_i1.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_4_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_4_i1.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/minloc0_4_i8.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_4_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_4_i8.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/spread_c16.c +=================================================================== +--- a/src/libgfortran/generated/spread_c16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/spread_c16.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,8 @@ + } + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (rs * sizeof(GFC_COMPLEX_16)); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (rs, sizeof(GFC_COMPLEX_16)); + if (rs <= 0) + return; + } +@@ -244,7 +244,7 @@ + + if (ret->base_addr == NULL) + { +- ret->base_addr = xmalloc (ncopies * sizeof (GFC_COMPLEX_16)); ++ ret->base_addr = xmallocarray (ncopies, sizeof (GFC_COMPLEX_16)); + ret->offset = 0; + GFC_DIMENSION_SET(ret->dim[0], 0, ncopies - 1, 1); + } +Index: libgfortran/generated/maxval_r4.c +=================================================================== +--- a/src/libgfortran/generated/maxval_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxval_r4.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -286,8 +285,7 @@ + + } + +- alloc_size = sizeof (GFC_REAL_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -299,7 +297,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_4)); + + } + else +@@ -472,8 +470,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -482,7 +479,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_4)); + } + else + { +Index: libgfortran/generated/minval_r8.c +=================================================================== +--- a/src/libgfortran/generated/minval_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minval_r8.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -286,8 +285,7 @@ + + } + +- alloc_size = sizeof (GFC_REAL_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -299,7 +297,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_8)); + + } + else +@@ -472,8 +470,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -482,7 +479,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_8)); + } + else + { +Index: libgfortran/generated/minloc1_16_r16.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_16_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_16_r16.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/unpack_i16.c +=================================================================== +--- a/src/libgfortran/generated/unpack_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/unpack_i16.c (.../branches/gcc-4_9-branch) +@@ -99,7 +99,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_INTEGER_16)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -244,7 +244,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_INTEGER_16)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/sum_i8.c +=================================================================== +--- a/src/libgfortran/generated/sum_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/sum_i8.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/pack_i1.c +=================================================================== +--- a/src/libgfortran/generated/pack_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/pack_i1.c (.../branches/gcc-4_9-branch) +@@ -167,8 +167,8 @@ + + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (sizeof (GFC_INTEGER_1) * total); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (total, sizeof (GFC_INTEGER_1)); + + if (total == 0) + return; +Index: libgfortran/generated/any_l16.c +=================================================================== +--- a/src/libgfortran/generated/any_l16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/any_l16.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_LOGICAL_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -111,7 +110,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_LOGICAL_16)); + } + else + { +Index: libgfortran/generated/spread_i8.c +=================================================================== +--- a/src/libgfortran/generated/spread_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/spread_i8.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,8 @@ + } + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (rs * sizeof(GFC_INTEGER_8)); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (rs, sizeof(GFC_INTEGER_8)); + if (rs <= 0) + return; + } +@@ -244,7 +244,7 @@ + + if (ret->base_addr == NULL) + { +- ret->base_addr = xmalloc (ncopies * sizeof (GFC_INTEGER_8)); ++ ret->base_addr = xmallocarray (ncopies, sizeof (GFC_INTEGER_8)); + ret->offset = 0; + GFC_DIMENSION_SET(ret->dim[0], 0, ncopies - 1, 1); + } +Index: libgfortran/generated/maxval_i2.c +=================================================================== +--- a/src/libgfortran/generated/maxval_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxval_i2.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_2)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -286,8 +285,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -299,7 +297,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_2)); + + } + else +@@ -472,8 +470,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -482,7 +479,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_2)); + } + else + { +Index: libgfortran/generated/maxloc1_8_i4.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_8_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_8_i4.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/unpack_r8.c +=================================================================== +--- a/src/libgfortran/generated/unpack_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/unpack_r8.c (.../branches/gcc-4_9-branch) +@@ -99,7 +99,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_REAL_8)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_REAL_8)); + } + else + { +@@ -244,7 +244,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_REAL_8)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_REAL_8)); + } + else + { +Index: libgfortran/generated/maxloc0_4_r4.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_4_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_4_r4.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/all_l1.c +=================================================================== +--- a/src/libgfortran/generated/all_l1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/all_l1.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_LOGICAL_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -111,7 +110,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_LOGICAL_1)); + } + else + { +Index: libgfortran/generated/matmul_r8.c +=================================================================== +--- a/src/libgfortran/generated/matmul_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/matmul_r8.c (.../branches/gcc-4_9-branch) +@@ -124,7 +124,7 @@ + } + + retarray->base_addr +- = xmalloc (sizeof (GFC_REAL_8) * size0 ((array_t *) retarray)); ++ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_8)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/generated/minloc0_4_r16.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_4_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_4_r16.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/maxloc0_4_i2.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_4_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_4_i2.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/minloc1_8_r16.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_8_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_8_r16.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/pack_c10.c +=================================================================== +--- a/src/libgfortran/generated/pack_c10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/pack_c10.c (.../branches/gcc-4_9-branch) +@@ -167,8 +167,8 @@ + + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (sizeof (GFC_COMPLEX_10) * total); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (total, sizeof (GFC_COMPLEX_10)); + + if (total == 0) + return; +Index: libgfortran/generated/pack_r4.c +=================================================================== +--- a/src/libgfortran/generated/pack_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/pack_r4.c (.../branches/gcc-4_9-branch) +@@ -167,8 +167,8 @@ + + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (sizeof (GFC_REAL_4) * total); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (total, sizeof (GFC_REAL_4)); + + if (total == 0) + return; +Index: libgfortran/generated/transpose_c16.c +=================================================================== +--- a/src/libgfortran/generated/transpose_c16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/transpose_c16.c (.../branches/gcc-4_9-branch) +@@ -60,7 +60,8 @@ + GFC_DIMENSION_SET(ret->dim[1], 0, GFC_DESCRIPTOR_EXTENT(source,0) - 1, + GFC_DESCRIPTOR_EXTENT(source, 1)); + +- ret->base_addr = xmalloc (sizeof (GFC_COMPLEX_16) * size0 ((array_t *) ret)); ++ ret->base_addr = xmallocarray (size0 ((array_t *) ret), ++ sizeof (GFC_COMPLEX_16)); + ret->offset = 0; + } else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/maxloc0_8_i8.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_8_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_8_i8.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/minloc1_4_r8.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_4_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_4_r8.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/minloc1_16_i4.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_16_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_16_i4.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/minloc0_16_i8.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_16_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_16_i8.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/pack_i2.c +=================================================================== +--- a/src/libgfortran/generated/pack_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/pack_i2.c (.../branches/gcc-4_9-branch) +@@ -167,8 +167,8 @@ + + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (sizeof (GFC_INTEGER_2) * total); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (total, sizeof (GFC_INTEGER_2)); + + if (total == 0) + return; +Index: libgfortran/generated/transpose_i8.c +=================================================================== +--- a/src/libgfortran/generated/transpose_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/transpose_i8.c (.../branches/gcc-4_9-branch) +@@ -60,7 +60,8 @@ + GFC_DIMENSION_SET(ret->dim[1], 0, GFC_DESCRIPTOR_EXTENT(source,0) - 1, + GFC_DESCRIPTOR_EXTENT(source, 1)); + +- ret->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * size0 ((array_t *) ret)); ++ ret->base_addr = xmallocarray (size0 ((array_t *) ret), ++ sizeof (GFC_INTEGER_8)); + ret->offset = 0; + } else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/eoshift1_16.c +=================================================================== +--- a/src/libgfortran/generated/eoshift1_16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/eoshift1_16.c (.../branches/gcc-4_9-branch) +@@ -105,8 +105,8 @@ + GFC_DIMENSION_SET(ret->dim[i], 0, ub, str); + + } +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (size * arraysize); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (arraysize, size); + + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/generated/all_l2.c +=================================================================== +--- a/src/libgfortran/generated/all_l2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/all_l2.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_LOGICAL_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -111,7 +110,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_LOGICAL_2)); + } + else + { +Index: libgfortran/generated/product_c4.c +=================================================================== +--- a/src/libgfortran/generated/product_c4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/product_c4.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_COMPLEX_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_COMPLEX_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_4)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_COMPLEX_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_4)); + } + else + { +Index: libgfortran/generated/iall_i1.c +=================================================================== +--- a/src/libgfortran/generated/iall_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/iall_i1.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_1)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_1)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_1)); + } + else + { +Index: libgfortran/generated/reshape_i4.c +=================================================================== +--- a/src/libgfortran/generated/reshape_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/reshape_i4.c (.../branches/gcc-4_9-branch) +@@ -111,11 +111,11 @@ + ret->offset = 0; + + if (unlikely (rs < 1)) +- alloc_size = 1; ++ alloc_size = 0; + else +- alloc_size = rs * sizeof (GFC_INTEGER_4); ++ alloc_size = rs; + +- ret->base_addr = xmalloc (alloc_size); ++ ret->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + ret->dtype = (source->dtype & ~GFC_DTYPE_RANK_MASK) | rdim; + } + +Index: libgfortran/generated/in_pack_r10.c +=================================================================== +--- a/src/libgfortran/generated/in_pack_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/in_pack_r10.c (.../branches/gcc-4_9-branch) +@@ -76,7 +76,7 @@ + return source->base_addr; + + /* Allocate storage for the destination. */ +- destptr = (GFC_REAL_10 *)xmalloc (ssize * sizeof (GFC_REAL_10)); ++ destptr = xmallocarray (ssize, sizeof (GFC_REAL_10)); + dest = destptr; + src = source->base_addr; + stride0 = stride[0]; +Index: libgfortran/generated/in_pack_c4.c +=================================================================== +--- a/src/libgfortran/generated/in_pack_c4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/in_pack_c4.c (.../branches/gcc-4_9-branch) +@@ -76,7 +76,7 @@ + return source->base_addr; + + /* Allocate storage for the destination. */ +- destptr = (GFC_COMPLEX_4 *)xmalloc (ssize * sizeof (GFC_COMPLEX_4)); ++ destptr = xmallocarray (ssize, sizeof (GFC_COMPLEX_4)); + dest = destptr; + src = source->base_addr; + stride0 = stride[0]; +Index: libgfortran/generated/all_l16.c +=================================================================== +--- a/src/libgfortran/generated/all_l16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/all_l16.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_LOGICAL_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -111,7 +110,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_LOGICAL_16)); + } + else + { +Index: libgfortran/generated/maxloc0_16_i1.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_16_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_16_i1.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/maxloc1_8_r8.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_8_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_8_r8.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/minval_i16.c +=================================================================== +--- a/src/libgfortran/generated/minval_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minval_i16.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -286,8 +285,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -299,7 +297,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -472,8 +470,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -482,7 +479,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/reshape_r10.c +=================================================================== +--- a/src/libgfortran/generated/reshape_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/reshape_r10.c (.../branches/gcc-4_9-branch) +@@ -111,11 +111,11 @@ + ret->offset = 0; + + if (unlikely (rs < 1)) +- alloc_size = 1; ++ alloc_size = 0; + else +- alloc_size = rs * sizeof (GFC_REAL_10); ++ alloc_size = rs; + +- ret->base_addr = xmalloc (alloc_size); ++ ret->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_10)); + ret->dtype = (source->dtype & ~GFC_DTYPE_RANK_MASK) | rdim; + } + +Index: libgfortran/generated/unpack_r16.c +=================================================================== +--- a/src/libgfortran/generated/unpack_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/unpack_r16.c (.../branches/gcc-4_9-branch) +@@ -99,7 +99,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_REAL_16)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_REAL_16)); + } + else + { +@@ -244,7 +244,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_REAL_16)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_REAL_16)); + } + else + { +Index: libgfortran/generated/maxval_i4.c +=================================================================== +--- a/src/libgfortran/generated/maxval_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxval_i4.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -286,8 +285,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -299,7 +297,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -472,8 +470,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -482,7 +479,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/minval_i8.c +=================================================================== +--- a/src/libgfortran/generated/minval_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minval_i8.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -286,8 +285,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -299,7 +297,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -472,8 +470,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -482,7 +479,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/maxloc0_16_i16.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_16_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_16_i16.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/shape_i4.c +=================================================================== +--- a/src/libgfortran/generated/shape_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/shape_i4.c (.../branches/gcc-4_9-branch) +@@ -49,7 +49,7 @@ + { + GFC_DIMENSION_SET(ret->dim[0], 0, rank - 1, 1); + ret->offset = 0; +- ret->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ ret->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + + stride = GFC_DESCRIPTOR_STRIDE(ret,0); +Index: libgfortran/generated/minloc1_4_i16.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_4_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_4_i16.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/maxloc0_4_r10.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_4_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_4_r10.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/maxloc0_8_i16.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_8_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_8_i16.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/iall_i2.c +=================================================================== +--- a/src/libgfortran/generated/iall_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/iall_i2.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_2)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_2)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_2)); + } + else + { +Index: libgfortran/generated/maxloc1_8_r10.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_8_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_8_r10.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/maxloc0_16_r4.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_16_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_16_r4.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/minloc0_8_i1.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_8_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_8_i1.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/minloc1_16_r8.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_16_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_16_r8.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/unpack_i8.c +=================================================================== +--- a/src/libgfortran/generated/unpack_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/unpack_i8.c (.../branches/gcc-4_9-branch) +@@ -99,7 +99,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_INTEGER_8)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -244,7 +244,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_INTEGER_8)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/maxloc0_4_i4.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_4_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_4_i4.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/count_4_l.c +=================================================================== +--- a/src/libgfortran/generated/count_4_l.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/count_4_l.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -111,7 +110,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/sum_r10.c +=================================================================== +--- a/src/libgfortran/generated/sum_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/sum_r10.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_10) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_10)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_REAL_10) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_10)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_10) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_10)); + } + else + { +Index: libgfortran/generated/sum_c4.c +=================================================================== +--- a/src/libgfortran/generated/sum_c4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/sum_c4.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_COMPLEX_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_COMPLEX_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_4)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_COMPLEX_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_4)); + } + else + { +Index: libgfortran/generated/maxloc1_16_r10.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_16_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_16_r10.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/pack_i16.c +=================================================================== +--- a/src/libgfortran/generated/pack_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/pack_i16.c (.../branches/gcc-4_9-branch) +@@ -167,8 +167,8 @@ + + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * total); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (total, sizeof (GFC_INTEGER_16)); + + if (total == 0) + return; +Index: libgfortran/generated/matmul_i8.c +=================================================================== +--- a/src/libgfortran/generated/matmul_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/matmul_i8.c (.../branches/gcc-4_9-branch) +@@ -124,7 +124,7 @@ + } + + retarray->base_addr +- = xmalloc (sizeof (GFC_INTEGER_8) * size0 ((array_t *) retarray)); ++ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_8)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/generated/maxloc0_16_i2.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_16_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_16_i2.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/spread_c4.c +=================================================================== +--- a/src/libgfortran/generated/spread_c4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/spread_c4.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,8 @@ + } + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (rs * sizeof(GFC_COMPLEX_4)); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (rs, sizeof(GFC_COMPLEX_4)); + if (rs <= 0) + return; + } +@@ -244,7 +244,7 @@ + + if (ret->base_addr == NULL) + { +- ret->base_addr = xmalloc (ncopies * sizeof (GFC_COMPLEX_4)); ++ ret->base_addr = xmallocarray (ncopies, sizeof (GFC_COMPLEX_4)); + ret->offset = 0; + GFC_DIMENSION_SET(ret->dim[0], 0, ncopies - 1, 1); + } +Index: libgfortran/generated/maxval_r10.c +=================================================================== +--- a/src/libgfortran/generated/maxval_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxval_r10.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_10) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_10)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -286,8 +285,7 @@ + + } + +- alloc_size = sizeof (GFC_REAL_10) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -299,7 +297,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_10)); + + } + else +@@ -472,8 +470,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_10) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -482,7 +479,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_10)); + } + else + { +Index: libgfortran/generated/pack_i4.c +=================================================================== +--- a/src/libgfortran/generated/pack_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/pack_i4.c (.../branches/gcc-4_9-branch) +@@ -167,8 +167,8 @@ + + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * total); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (total, sizeof (GFC_INTEGER_4)); + + if (total == 0) + return; +Index: libgfortran/generated/maxloc1_4_i1.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_4_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_4_i1.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/matmul_r10.c +=================================================================== +--- a/src/libgfortran/generated/matmul_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/matmul_r10.c (.../branches/gcc-4_9-branch) +@@ -124,7 +124,7 @@ + } + + retarray->base_addr +- = xmalloc (sizeof (GFC_REAL_10) * size0 ((array_t *) retarray)); ++ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_10)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/generated/minloc1_4_i8.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_4_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_4_i8.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/minloc0_8_r4.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_8_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_8_r4.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/matmul_l4.c +=================================================================== +--- a/src/libgfortran/generated/matmul_l4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/matmul_l4.c (.../branches/gcc-4_9-branch) +@@ -88,7 +88,7 @@ + } + + retarray->base_addr +- = xmalloc (sizeof (GFC_LOGICAL_4) * size0 ((array_t *) retarray)); ++ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_LOGICAL_4)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/generated/reshape_r8.c +=================================================================== +--- a/src/libgfortran/generated/reshape_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/reshape_r8.c (.../branches/gcc-4_9-branch) +@@ -111,11 +111,11 @@ + ret->offset = 0; + + if (unlikely (rs < 1)) +- alloc_size = 1; ++ alloc_size = 0; + else +- alloc_size = rs * sizeof (GFC_REAL_8); ++ alloc_size = rs; + +- ret->base_addr = xmalloc (alloc_size); ++ ret->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_8)); + ret->dtype = (source->dtype & ~GFC_DTYPE_RANK_MASK) | rdim; + } + +Index: libgfortran/generated/in_pack_c10.c +=================================================================== +--- a/src/libgfortran/generated/in_pack_c10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/in_pack_c10.c (.../branches/gcc-4_9-branch) +@@ -76,7 +76,7 @@ + return source->base_addr; + + /* Allocate storage for the destination. */ +- destptr = (GFC_COMPLEX_10 *)xmalloc (ssize * sizeof (GFC_COMPLEX_10)); ++ destptr = xmallocarray (ssize, sizeof (GFC_COMPLEX_10)); + dest = destptr; + src = source->base_addr; + stride0 = stride[0]; +Index: libgfortran/generated/all_l4.c +=================================================================== +--- a/src/libgfortran/generated/all_l4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/all_l4.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_LOGICAL_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -111,7 +110,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_LOGICAL_4)); + } + else + { +Index: libgfortran/generated/minloc0_8_i2.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_8_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_8_i2.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/norm2_r16.c +=================================================================== +--- a/src/libgfortran/generated/norm2_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/norm2_r16.c (.../branches/gcc-4_9-branch) +@@ -105,10 +105,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +Index: libgfortran/generated/reshape_c10.c +=================================================================== +--- a/src/libgfortran/generated/reshape_c10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/reshape_c10.c (.../branches/gcc-4_9-branch) +@@ -111,11 +111,11 @@ + ret->offset = 0; + + if (unlikely (rs < 1)) +- alloc_size = 1; ++ alloc_size = 0; + else +- alloc_size = rs * sizeof (GFC_COMPLEX_10); ++ alloc_size = rs; + +- ret->base_addr = xmalloc (alloc_size); ++ ret->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_10)); + ret->dtype = (source->dtype & ~GFC_DTYPE_RANK_MASK) | rdim; + } + +Index: libgfortran/generated/unpack_c16.c +=================================================================== +--- a/src/libgfortran/generated/unpack_c16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/unpack_c16.c (.../branches/gcc-4_9-branch) +@@ -99,7 +99,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_COMPLEX_16)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_COMPLEX_16)); + } + else + { +@@ -244,7 +244,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_COMPLEX_16)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_COMPLEX_16)); + } + else + { +Index: libgfortran/generated/maxloc1_4_r4.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_4_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_4_r4.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/maxval_r8.c +=================================================================== +--- a/src/libgfortran/generated/maxval_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxval_r8.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -286,8 +285,7 @@ + + } + +- alloc_size = sizeof (GFC_REAL_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -299,7 +297,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_8)); + + } + else +@@ -472,8 +470,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -482,7 +479,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_8)); + } + else + { +Index: libgfortran/generated/transpose_c4.c +=================================================================== +--- a/src/libgfortran/generated/transpose_c4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/transpose_c4.c (.../branches/gcc-4_9-branch) +@@ -60,7 +60,8 @@ + GFC_DIMENSION_SET(ret->dim[1], 0, GFC_DESCRIPTOR_EXTENT(source,0) - 1, + GFC_DESCRIPTOR_EXTENT(source, 1)); + +- ret->base_addr = xmalloc (sizeof (GFC_COMPLEX_4) * size0 ((array_t *) ret)); ++ ret->base_addr = xmallocarray (size0 ((array_t *) ret), ++ sizeof (GFC_COMPLEX_4)); + ret->offset = 0; + } else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/eoshift1_4.c +=================================================================== +--- a/src/libgfortran/generated/eoshift1_4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/eoshift1_4.c (.../branches/gcc-4_9-branch) +@@ -105,8 +105,8 @@ + GFC_DIMENSION_SET(ret->dim[i], 0, ub, str); + + } +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (size * arraysize); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (arraysize, size); + + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/generated/minval_r16.c +=================================================================== +--- a/src/libgfortran/generated/minval_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minval_r16.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -286,8 +285,7 @@ + + } + +- alloc_size = sizeof (GFC_REAL_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -299,7 +297,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_16)); + + } + else +@@ -472,8 +470,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -482,7 +479,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_16)); + } + else + { +Index: libgfortran/generated/iany_i16.c +=================================================================== +--- a/src/libgfortran/generated/iany_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/iany_i16.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/maxloc1_4_i2.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_4_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_4_i2.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/maxloc1_8_i8.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_8_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_8_i8.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/maxloc0_4_r8.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_4_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_4_r8.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/maxloc0_16_r16.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_16_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_16_r16.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/sum_c10.c +=================================================================== +--- a/src/libgfortran/generated/sum_c10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/sum_c10.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_COMPLEX_10) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_10)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_COMPLEX_10) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_10)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_COMPLEX_10) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_10)); + } + else + { +Index: libgfortran/generated/iall_i4.c +=================================================================== +--- a/src/libgfortran/generated/iall_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/iall_i4.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/minloc1_4_r16.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_4_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_4_r16.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/maxloc0_8_r16.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_8_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_8_r16.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/pack_r8.c +=================================================================== +--- a/src/libgfortran/generated/pack_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/pack_r8.c (.../branches/gcc-4_9-branch) +@@ -167,8 +167,8 @@ + + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (sizeof (GFC_REAL_8) * total); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (total, sizeof (GFC_REAL_8)); + + if (total == 0) + return; +Index: libgfortran/generated/matmul_c10.c +=================================================================== +--- a/src/libgfortran/generated/matmul_c10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/matmul_c10.c (.../branches/gcc-4_9-branch) +@@ -124,7 +124,7 @@ + } + + retarray->base_addr +- = xmalloc (sizeof (GFC_COMPLEX_10) * size0 ((array_t *) retarray)); ++ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_10)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/generated/maxloc0_16_i4.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_16_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_16_i4.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/pack_r16.c +=================================================================== +--- a/src/libgfortran/generated/pack_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/pack_r16.c (.../branches/gcc-4_9-branch) +@@ -167,8 +167,8 @@ + + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (sizeof (GFC_REAL_16) * total); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (total, sizeof (GFC_REAL_16)); + + if (total == 0) + return; +Index: libgfortran/generated/minloc1_16_i8.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_16_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_16_i8.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/minloc0_16_r10.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_16_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_16_r10.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/unpack_c4.c +=================================================================== +--- a/src/libgfortran/generated/unpack_c4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/unpack_c4.c (.../branches/gcc-4_9-branch) +@@ -99,7 +99,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_COMPLEX_4)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_COMPLEX_4)); + } + else + { +@@ -244,7 +244,7 @@ + rs *= extent[n]; + } + ret->offset = 0; +- ret->base_addr = xmalloc (rs * sizeof (GFC_COMPLEX_4)); ++ ret->base_addr = xmallocarray (rs, sizeof (GFC_COMPLEX_4)); + } + else + { +Index: libgfortran/generated/iparity_i1.c +=================================================================== +--- a/src/libgfortran/generated/iparity_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/iparity_i1.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_1)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_1)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_1)); + } + else + { +Index: libgfortran/generated/product_c8.c +=================================================================== +--- a/src/libgfortran/generated/product_c8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/product_c8.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_COMPLEX_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_COMPLEX_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_8)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_COMPLEX_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_8)); + } + else + { +Index: libgfortran/generated/in_pack_i16.c +=================================================================== +--- a/src/libgfortran/generated/in_pack_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/in_pack_i16.c (.../branches/gcc-4_9-branch) +@@ -76,7 +76,7 @@ + return source->base_addr; + + /* Allocate storage for the destination. */ +- destptr = (GFC_INTEGER_16 *)xmalloc (ssize * sizeof (GFC_INTEGER_16)); ++ destptr = xmallocarray (ssize, sizeof (GFC_INTEGER_16)); + dest = destptr; + src = source->base_addr; + stride0 = stride[0]; +Index: libgfortran/generated/minloc0_8_i4.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_8_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_8_i4.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/matmul_c4.c +=================================================================== +--- a/src/libgfortran/generated/matmul_c4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/matmul_c4.c (.../branches/gcc-4_9-branch) +@@ -124,7 +124,7 @@ + } + + retarray->base_addr +- = xmalloc (sizeof (GFC_COMPLEX_4) * size0 ((array_t *) retarray)); ++ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_4)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/generated/reshape_i8.c +=================================================================== +--- a/src/libgfortran/generated/reshape_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/reshape_i8.c (.../branches/gcc-4_9-branch) +@@ -111,11 +111,11 @@ + ret->offset = 0; + + if (unlikely (rs < 1)) +- alloc_size = 1; ++ alloc_size = 0; + else +- alloc_size = rs * sizeof (GFC_INTEGER_8); ++ alloc_size = rs; + +- ret->base_addr = xmalloc (alloc_size); ++ ret->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + ret->dtype = (source->dtype & ~GFC_DTYPE_RANK_MASK) | rdim; + } + +Index: libgfortran/generated/in_pack_c8.c +=================================================================== +--- a/src/libgfortran/generated/in_pack_c8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/in_pack_c8.c (.../branches/gcc-4_9-branch) +@@ -76,7 +76,7 @@ + return source->base_addr; + + /* Allocate storage for the destination. */ +- destptr = (GFC_COMPLEX_8 *)xmalloc (ssize * sizeof (GFC_COMPLEX_8)); ++ destptr = xmallocarray (ssize, sizeof (GFC_COMPLEX_8)); + dest = destptr; + src = source->base_addr; + stride0 = stride[0]; +Index: libgfortran/generated/bessel_r10.c +=================================================================== +--- a/src/libgfortran/generated/bessel_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/bessel_r10.c (.../branches/gcc-4_9-branch) +@@ -55,7 +55,7 @@ + { + size_t size = n2 < n1 ? 0 : n2-n1+1; + GFC_DIMENSION_SET(ret->dim[0], 0, size-1, 1); +- ret->base_addr = xmalloc (sizeof (GFC_REAL_10) * size); ++ ret->base_addr = xmallocarray (size, sizeof (GFC_REAL_10)); + ret->offset = 0; + } + +@@ -122,7 +122,7 @@ + { + size_t size = n2 < n1 ? 0 : n2-n1+1; + GFC_DIMENSION_SET(ret->dim[0], 0, size-1, 1); +- ret->base_addr = xmalloc (sizeof (GFC_REAL_10) * size); ++ ret->base_addr = xmallocarray (size, sizeof (GFC_REAL_10)); + ret->offset = 0; + } + +Index: libgfortran/generated/iall_i16.c +=================================================================== +--- a/src/libgfortran/generated/iall_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/iall_i16.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/maxloc1_16_i1.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_16_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_16_i1.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/reshape_i16.c +=================================================================== +--- a/src/libgfortran/generated/reshape_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/reshape_i16.c (.../branches/gcc-4_9-branch) +@@ -111,11 +111,11 @@ + ret->offset = 0; + + if (unlikely (rs < 1)) +- alloc_size = 1; ++ alloc_size = 0; + else +- alloc_size = rs * sizeof (GFC_INTEGER_16); ++ alloc_size = rs; + +- ret->base_addr = xmalloc (alloc_size); ++ ret->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + ret->dtype = (source->dtype & ~GFC_DTYPE_RANK_MASK) | rdim; + } + +Index: libgfortran/generated/count_16_l.c +=================================================================== +--- a/src/libgfortran/generated/count_16_l.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/count_16_l.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -111,7 +110,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/minloc1_8_i1.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_8_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_8_i1.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/maxloc1_4_i4.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_4_i4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_4_i4.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/maxval_i8.c +=================================================================== +--- a/src/libgfortran/generated/maxval_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxval_i8.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -286,8 +285,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -299,7 +297,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -472,8 +470,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -482,7 +479,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/eoshift3_16.c +=================================================================== +--- a/src/libgfortran/generated/eoshift3_16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/eoshift3_16.c (.../branches/gcc-4_9-branch) +@@ -89,7 +89,7 @@ + { + int i; + +- ret->base_addr = xmalloc (size * arraysize); ++ ret->base_addr = xmallocarray (arraysize, size); + ret->offset = 0; + ret->dtype = array->dtype; + for (i = 0; i < GFC_DESCRIPTOR_RANK (array); i++) +@@ -107,8 +107,8 @@ + GFC_DIMENSION_SET(ret->dim[i], 0, ub, str); + + } +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (size * arraysize); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (arraysize, size); + + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/generated/shape_i8.c +=================================================================== +--- a/src/libgfortran/generated/shape_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/shape_i8.c (.../branches/gcc-4_9-branch) +@@ -49,7 +49,7 @@ + { + GFC_DIMENSION_SET(ret->dim[0], 0, rank - 1, 1); + ret->offset = 0; +- ret->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ ret->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + + stride = GFC_DESCRIPTOR_STRIDE(ret,0); +Index: libgfortran/generated/maxloc0_4_i16.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_4_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_4_i16.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/maxloc1_4_r10.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_4_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_4_r10.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_4) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_4)); + } + else + { +Index: libgfortran/generated/maxloc1_8_i16.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_8_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_8_i16.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/minloc0_8_r10.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_8_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_8_r10.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/iparity_i2.c +=================================================================== +--- a/src/libgfortran/generated/iparity_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/iparity_i2.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_2)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_2)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_2) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_2)); + } + else + { +Index: libgfortran/generated/maxloc1_16_r4.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_16_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_16_r4.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/maxloc0_16_r8.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_16_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_16_r8.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_16) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_16)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/sum_i16.c +=================================================================== +--- a/src/libgfortran/generated/sum_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/sum_i16.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/maxloc0_4_i8.c +=================================================================== +--- a/src/libgfortran/generated/maxloc0_4_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc0_4_i8.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_4) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_4)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/pack_c16.c +=================================================================== +--- a/src/libgfortran/generated/pack_c16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/pack_c16.c (.../branches/gcc-4_9-branch) +@@ -167,8 +167,8 @@ + + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (sizeof (GFC_COMPLEX_16) * total); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (total, sizeof (GFC_COMPLEX_16)); + + if (total == 0) + return; +Index: libgfortran/generated/maxloc1_16_i16.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_16_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_16_i16.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/minloc1_8_r4.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_8_r4.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_8_r4.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/sum_c8.c +=================================================================== +--- a/src/libgfortran/generated/sum_c8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/sum_c8.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_COMPLEX_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_COMPLEX_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_8)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_COMPLEX_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_COMPLEX_8)); + } + else + { +Index: libgfortran/generated/maxloc1_16_i2.c +=================================================================== +--- a/src/libgfortran/generated/maxloc1_16_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxloc1_16_i2.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/parity_l1.c +=================================================================== +--- a/src/libgfortran/generated/parity_l1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/parity_l1.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_LOGICAL_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_LOGICAL_1)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +Index: libgfortran/generated/maxval_i16.c +=================================================================== +--- a/src/libgfortran/generated/maxval_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/maxval_i16.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -286,8 +285,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -299,7 +297,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + + } + else +@@ -472,8 +470,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_16) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -482,7 +479,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_16)); + } + else + { +Index: libgfortran/generated/spread_c8.c +=================================================================== +--- a/src/libgfortran/generated/spread_c8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/spread_c8.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,8 @@ + } + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (rs * sizeof(GFC_COMPLEX_8)); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (rs, sizeof(GFC_COMPLEX_8)); + if (rs <= 0) + return; + } +@@ -244,7 +244,7 @@ + + if (ret->base_addr == NULL) + { +- ret->base_addr = xmalloc (ncopies * sizeof (GFC_COMPLEX_8)); ++ ret->base_addr = xmallocarray (ncopies, sizeof (GFC_COMPLEX_8)); + ret->offset = 0; + GFC_DIMENSION_SET(ret->dim[0], 0, ncopies - 1, 1); + } +Index: libgfortran/generated/matmul_i16.c +=================================================================== +--- a/src/libgfortran/generated/matmul_i16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/matmul_i16.c (.../branches/gcc-4_9-branch) +@@ -124,7 +124,7 @@ + } + + retarray->base_addr +- = xmalloc (sizeof (GFC_INTEGER_16) * size0 ((array_t *) retarray)); ++ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_16)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/generated/pack_i8.c +=================================================================== +--- a/src/libgfortran/generated/pack_i8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/pack_i8.c (.../branches/gcc-4_9-branch) +@@ -167,8 +167,8 @@ + + ret->offset = 0; + +- /* xmalloc allocates a single byte for zero size. */ +- ret->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * total); ++ /* xmallocarray allocates a single byte for zero size. */ ++ ret->base_addr = xmallocarray (total, sizeof (GFC_INTEGER_8)); + + if (total == 0) + return; +Index: libgfortran/generated/any_l1.c +=================================================================== +--- a/src/libgfortran/generated/any_l1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/any_l1.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_LOGICAL_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -111,7 +110,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_LOGICAL_1)); + } + else + { +Index: libgfortran/generated/minloc1_8_i2.c +=================================================================== +--- a/src/libgfortran/generated/minloc1_8_i2.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc1_8_i2.c (.../branches/gcc-4_9-branch) +@@ -98,10 +98,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -294,8 +293,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -307,7 +305,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + + } + else +@@ -485,8 +483,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -495,7 +492,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_8)); + } + else + { +Index: libgfortran/generated/minloc0_8_r8.c +=================================================================== +--- a/src/libgfortran/generated/minloc0_8_r8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/minloc0_8_r8.c (.../branches/gcc-4_9-branch) +@@ -58,7 +58,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -199,7 +199,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank - 1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else + { +@@ -367,7 +367,7 @@ + GFC_DIMENSION_SET(retarray->dim[0], 0, rank-1, 1); + retarray->dtype = (retarray->dtype & ~GFC_DTYPE_RANK_MASK) | 1; + retarray->offset = 0; +- retarray->base_addr = xmalloc (sizeof (GFC_INTEGER_8) * rank); ++ retarray->base_addr = xmallocarray (rank, sizeof (GFC_INTEGER_8)); + } + else if (unlikely (compile_options.bounds_check)) + { +Index: libgfortran/generated/matmul_l8.c +=================================================================== +--- a/src/libgfortran/generated/matmul_l8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/matmul_l8.c (.../branches/gcc-4_9-branch) +@@ -88,7 +88,7 @@ + } + + retarray->base_addr +- = xmalloc (sizeof (GFC_LOGICAL_8) * size0 ((array_t *) retarray)); ++ = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_LOGICAL_8)); + retarray->offset = 0; + } + else if (unlikely (compile_options.bounds_check)) +Index: libgfortran/generated/product_r10.c +=================================================================== +--- a/src/libgfortran/generated/product_r10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/product_r10.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_10) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_10)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_REAL_10) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_10)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_REAL_10) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_10)); + } + else + { +Index: libgfortran/generated/product_i1.c +=================================================================== +--- a/src/libgfortran/generated/product_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/product_i1.c (.../branches/gcc-4_9-branch) +@@ -97,10 +97,9 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_1)); + if (alloc_size == 0) + { + /* Make sure we have a zero-sized array. */ +@@ -272,8 +271,7 @@ + + } + +- alloc_size = sizeof (GFC_INTEGER_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; +@@ -285,7 +283,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_1)); + + } + else +@@ -430,8 +428,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_INTEGER_1) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -440,7 +437,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_INTEGER_1)); + } + else + { +Index: libgfortran/generated/all_l8.c +=================================================================== +--- a/src/libgfortran/generated/all_l8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/all_l8.c (.../branches/gcc-4_9-branch) +@@ -101,8 +101,7 @@ + retarray->offset = 0; + retarray->dtype = (array->dtype & ~GFC_DTYPE_RANK_MASK) | rank; + +- alloc_size = sizeof (GFC_LOGICAL_8) * GFC_DESCRIPTOR_STRIDE(retarray,rank-1) +- * extent[rank-1]; ++ alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1]; + + if (alloc_size == 0) + { +@@ -111,7 +110,7 @@ + return; + } + else +- retarray->base_addr = xmalloc (alloc_size); ++ retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_LOGICAL_8)); + } + else + { +Index: libgfortran/generated/in_pack_r16.c +=================================================================== +--- a/src/libgfortran/generated/in_pack_r16.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/in_pack_r16.c (.../branches/gcc-4_9-branch) +@@ -76,7 +76,7 @@ + return source->base_addr; + + /* Allocate storage for the destination. */ +- destptr = (GFC_REAL_16 *)xmalloc (ssize * sizeof (GFC_REAL_16)); ++ destptr = xmallocarray (ssize, sizeof (GFC_REAL_16)); + dest = destptr; + src = source->base_addr; + stride0 = stride[0]; +Index: libgfortran/generated/in_pack_i1.c +=================================================================== +--- a/src/libgfortran/generated/in_pack_i1.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/generated/in_pack_i1.c (.../branches/gcc-4_9-branch) +@@ -76,7 +76,7 @@ + return source->base_addr; + + /* Allocate storage for the destination. */ +- destptr = (GFC_INTEGER_1 *)xmalloc (ssize * sizeof (GFC_INTEGER_1)); ++ destptr = xmallocarray (ssize, sizeof (GFC_INTEGER_1)); + dest = destptr; + src = source->base_addr; + stride0 = stride[0]; +Index: libgfortran/libgfortran.h +=================================================================== +--- a/src/libgfortran/libgfortran.h (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/libgfortran.h (.../branches/gcc-4_9-branch) +@@ -768,6 +768,9 @@ + extern void *xmalloc (size_t) __attribute__ ((malloc)); + internal_proto(xmalloc); + ++extern void *xmallocarray (size_t, size_t) __attribute__ ((malloc)); ++internal_proto(xmallocarray); ++ + extern void *xcalloc (size_t, size_t) __attribute__ ((malloc)); + internal_proto(xcalloc); + +Index: libgfortran/io/list_read.c +=================================================================== +--- a/src/libgfortran/io/list_read.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/io/list_read.c (.../branches/gcc-4_9-branch) +@@ -2407,7 +2407,7 @@ + { + index_type len = strlen (nl->var_name) + 1; + int dim; +- char * ext_name = (char*)xmalloc (len + 1); ++ char * ext_name = xmalloc (len + 1); + memcpy (ext_name, nl->var_name, len-1); + memcpy (ext_name + len - 1, "%", 2); + for (nl = nl->next; nl; nl = nl->next) +Index: libgfortran/io/unit.c +=================================================================== +--- a/src/libgfortran/io/unit.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/io/unit.c (.../branches/gcc-4_9-branch) +@@ -454,7 +454,7 @@ + { + iunit->rank = GFC_DESCRIPTOR_RANK (dtp->internal_unit_desc); + iunit->ls = (array_loop_spec *) +- xmalloc (iunit->rank * sizeof (array_loop_spec)); ++ xmallocarray (iunit->rank, sizeof (array_loop_spec)); + dtp->internal_unit_len *= + init_loop_spec (dtp->internal_unit_desc, iunit->ls, &start_record); + +Index: libgfortran/io/transfer.c +=================================================================== +--- a/src/libgfortran/io/transfer.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/io/transfer.c (.../branches/gcc-4_9-branch) +@@ -3786,9 +3786,9 @@ + if (nml->var_rank > 0) + { + nml->dim = (descriptor_dimension*) +- xmalloc (nml->var_rank * sizeof (descriptor_dimension)); ++ xmallocarray (nml->var_rank, sizeof (descriptor_dimension)); + nml->ls = (array_loop_spec*) +- xmalloc (nml->var_rank * sizeof (array_loop_spec)); ++ xmallocarray (nml->var_rank, sizeof (array_loop_spec)); + } + else + { +Index: libgfortran/io/write.c +=================================================================== +--- a/src/libgfortran/io/write.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgfortran/io/write.c (.../branches/gcc-4_9-branch) +@@ -1864,7 +1864,7 @@ + base_var_name_len = base ? strlen (base->var_name) : 0; + ext_name_len = base_name_len + base_var_name_len + + strlen (obj->var_name) + obj->var_rank * NML_DIGITS + 1; +- ext_name = (char*)xmalloc (ext_name_len); ++ ext_name = xmalloc (ext_name_len); + + memcpy (ext_name, base_name, base_name_len); + clen = strlen (obj->var_name + base_var_name_len); +@@ -1893,7 +1893,7 @@ + /* Now obj_name. */ + + obj_name_len = strlen (obj->var_name) + 1; +- obj_name = xmalloc (obj_name_len+1); ++ obj_name = xmalloc (obj_name_len + 1); + memcpy (obj_name, obj->var_name, obj_name_len-1); + memcpy (obj_name + obj_name_len-1, "%", 2); + Index: . =================================================================== --- a/src/. (.../tags/gcc_4_9_1_release) -- cgit v1.2.3 From d0dca95161aaf4d288832a86fd0f8cb5ba002829 Mon Sep 17 00:00:00 2001 From: doko Date: Thu, 31 Jul 2014 08:12:44 +0000 Subject: * Fix java.security symlink. Closes: #756484. git-svn-id: svn://svn.debian.org/svn/gcccvs/branches/sid/gcc-4.9@7543 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca --- debian/changelog | 1 + debian/rules.d/binary-java.mk | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'debian') diff --git a/debian/changelog b/debian/changelog index b40373b..7b05249 100644 --- a/debian/changelog +++ b/debian/changelog @@ -4,6 +4,7 @@ gcc-4.9 (4.9.1-4) UNRELEASED; urgency=medium - CVE-2014-5044, fix integer overflows in array allocation in libgfortran. Closes: #756325. * Build libphobos on armel and armhf. Closes: #755390. + * Fix java.security symlink. Closes: #756484. -- Matthias Klose Mon, 28 Jul 2014 18:26:32 +0200 diff --git a/debian/rules.d/binary-java.mk b/debian/rules.d/binary-java.mk index 8ca4a9f..c0722d4 100644 --- a/debian/rules.d/binary-java.mk +++ b/debian/rules.d/binary-java.mk @@ -453,7 +453,7 @@ endif /$(gcj_vlibdir)/lib$(i).so /$(jvm_dir)/lib/lib$(i).so) dh_link -p$(p_jlib) \ - /etc/security/classpath.security \ + /etc/java/security/classpath.security \ /$(jvm_dir)/jre/lib/security/java.security dh_link -p$(p_jlibx) \ -- cgit v1.2.3 From 525a0893a750b6a99050c77e26612cd6ebd014b5 Mon Sep 17 00:00:00 2001 From: doko Date: Thu, 31 Jul 2014 14:44:34 +0000 Subject: - prepare for 4.9.1-4 git-svn-id: svn://svn.debian.org/svn/gcccvs/branches/sid/gcc-4.9@7544 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca --- debian/changelog | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'debian') diff --git a/debian/changelog b/debian/changelog index 7b05249..d8b8241 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,4 +1,4 @@ -gcc-4.9 (4.9.1-4) UNRELEASED; urgency=medium +gcc-4.9 (4.9.1-4) unstable; urgency=high * Update to SVN 20140731 (r213317) from the gcc-4_9-branch. - CVE-2014-5044, fix integer overflows in array allocation in libgfortran. @@ -6,7 +6,7 @@ gcc-4.9 (4.9.1-4) UNRELEASED; urgency=medium * Build libphobos on armel and armhf. Closes: #755390. * Fix java.security symlink. Closes: #756484. - -- Matthias Klose Mon, 28 Jul 2014 18:26:32 +0200 + -- Matthias Klose Thu, 31 Jul 2014 10:15:27 +0200 gcc-4.9 (4.9.1-3) unstable; urgency=medium -- cgit v1.2.3 From 58c6d0e2da5be692c258997797c212f0d82cde62 Mon Sep 17 00:00:00 2001 From: doko Date: Fri, 1 Aug 2014 15:22:15 +0000 Subject: * Update to SVN 20140731 (r213317) from the gcc-4_9-branch. - Fix PR tree-optimization/61964. LP: #1347147. git-svn-id: svn://svn.debian.org/svn/gcccvs/branches/sid/gcc-4.9@7550 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca --- debian/changelog | 7 + debian/patches/svn-updates.diff | 970 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 962 insertions(+), 15 deletions(-) (limited to 'debian') diff --git a/debian/changelog b/debian/changelog index d8b8241..87686eb 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +gcc-4.9 (4.9.1-5) UNRELEASED; urgency=medium + + * Update to SVN 20140731 (r213317) from the gcc-4_9-branch. + - Fix PR tree-optimization/61964. LP: #1347147. + + -- Matthias Klose Fri, 01 Aug 2014 17:20:58 +0200 + gcc-4.9 (4.9.1-4) unstable; urgency=high * Update to SVN 20140731 (r213317) from the gcc-4_9-branch. diff --git a/debian/patches/svn-updates.diff b/debian/patches/svn-updates.diff index f096fdf..dd917a4 100644 --- a/debian/patches/svn-updates.diff +++ b/debian/patches/svn-updates.diff @@ -1,10 +1,10 @@ -# DP: updates from the 4.9 branch upto 20140731 (r213317). +# DP: updates from the 4.9 branch upto 20140801 (r213487). last_update() { cat > ${dir}LAST_UPDATED depend_hash != NULL, 0)) + free (task->depend_hash); +- gomp_sem_destroy (&task->taskwait_sem); + } + + /* team.c */ +Index: libgomp/task.c +=================================================================== +--- a/src/libgomp/task.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgomp/task.c (.../branches/gcc-4_9-branch) +@@ -66,16 +66,16 @@ + task->parent = parent_task; + task->icv = *prev_icv; + task->kind = GOMP_TASK_IMPLICIT; +- task->in_taskwait = false; ++ task->taskwait = NULL; + task->in_tied_task = false; + task->final_task = false; + task->copy_ctors_done = false; ++ task->parent_depends_on = false; + task->children = NULL; + task->taskgroup = NULL; + task->dependers = NULL; + task->depend_hash = NULL; + task->depend_count = 0; +- gomp_sem_init (&task->taskwait_sem, 0); + } + + /* Clean up a task, after completing it. */ +@@ -104,6 +104,8 @@ + while (task != children); + } + ++static void gomp_task_maybe_wait_for_dependencies (void **depend); ++ + /* Called when encountering an explicit task directive. If IF_CLAUSE is + false, then we must not delay in executing the task. If UNTIED is true, + then the task may be executed by any member of the team. */ +@@ -141,35 +143,12 @@ + + /* If there are depend clauses and earlier deferred sibling tasks + with depend clauses, check if there isn't a dependency. If there +- is, fall through to the deferred task handling, as we can't +- schedule such tasks right away. There is no need to handle ++ is, we need to wait for them. There is no need to handle + depend clauses for non-deferred tasks other than this, because + the parent task is suspended until the child task finishes and thus + it can't start further child tasks. */ + if ((flags & 8) && thr->task && thr->task->depend_hash) +- { +- struct gomp_task *parent = thr->task; +- struct gomp_task_depend_entry elem, *ent = NULL; +- size_t ndepend = (uintptr_t) depend[0]; +- size_t nout = (uintptr_t) depend[1]; +- size_t i; +- gomp_mutex_lock (&team->task_lock); +- for (i = 0; i < ndepend; i++) +- { +- elem.addr = depend[i + 2]; +- ent = htab_find (parent->depend_hash, &elem); +- for (; ent; ent = ent->next) +- if (i >= nout && ent->is_in) +- continue; +- else +- break; +- if (ent) +- break; +- } +- gomp_mutex_unlock (&team->task_lock); +- if (ent) +- goto defer; +- } ++ gomp_task_maybe_wait_for_dependencies (depend); + + gomp_init_task (&task, thr->task, gomp_icv (false)); + task.kind = GOMP_TASK_IFFALSE; +@@ -209,7 +188,6 @@ + } + else + { +- defer:; + struct gomp_task *task; + struct gomp_task *parent = thr->task; + struct gomp_taskgroup *taskgroup = parent->taskgroup; +@@ -275,11 +253,12 @@ + task->depend[i].task = task; + task->depend[i].is_in = i >= nout; + task->depend[i].redundant = false; ++ task->depend[i].redundant_out = false; + + hash_entry_type *slot + = htab_find_slot (&parent->depend_hash, &task->depend[i], + INSERT); +- hash_entry_type out = NULL; ++ hash_entry_type out = NULL, last = NULL; + if (*slot) + { + /* If multiple depends on the same task are the +@@ -294,6 +273,11 @@ + } + for (ent = *slot; ent; ent = ent->next) + { ++ if (ent->redundant_out) ++ break; ++ ++ last = ent; ++ + /* depend(in:...) doesn't depend on earlier + depend(in:...). */ + if (i >= nout && ent->is_in) +@@ -341,7 +325,8 @@ + *slot = &task->depend[i]; + + /* There is no need to store more than one depend({,in}out:) +- task per address in the hash table chain, because each out ++ task per address in the hash table chain for the purpose ++ of creation of deferred tasks, because each out + depends on all earlier outs, thus it is enough to record + just the last depend({,in}out:). For depend(in:), we need + to keep all of the previous ones not terminated yet, because +@@ -348,14 +333,23 @@ + a later depend({,in}out:) might need to depend on all of + them. So, if the new task's clause is depend({,in}out:), + we know there is at most one other depend({,in}out:) clause +- in the list (out) and to maintain the invariant we now +- need to remove it from the list. */ ++ in the list (out). For non-deferred tasks we want to see ++ all outs, so they are moved to the end of the chain, ++ after first redundant_out entry all following entries ++ should be redundant_out. */ + if (!task->depend[i].is_in && out) + { +- if (out->next) +- out->next->prev = out->prev; +- out->prev->next = out->next; +- out->redundant = true; ++ if (out != last) ++ { ++ out->next->prev = out->prev; ++ out->prev->next = out->next; ++ out->next = last->next; ++ out->prev = last; ++ last->next = out; ++ if (out->next) ++ out->next->prev = out; ++ } ++ out->redundant_out = true; + } + } + if (task->num_dependees) +@@ -421,8 +415,20 @@ + gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent, + struct gomp_taskgroup *taskgroup, struct gomp_team *team) + { +- if (parent && parent->children == child_task) +- parent->children = child_task->next_child; ++ if (parent) ++ { ++ if (parent->children == child_task) ++ parent->children = child_task->next_child; ++ if (__builtin_expect (child_task->parent_depends_on, 0) ++ && parent->taskwait->last_parent_depends_on == child_task) ++ { ++ if (child_task->prev_child->kind == GOMP_TASK_WAITING ++ && child_task->prev_child->parent_depends_on) ++ parent->taskwait->last_parent_depends_on = child_task->prev_child; ++ else ++ parent->taskwait->last_parent_depends_on = NULL; ++ } ++ } + if (taskgroup && taskgroup->children == child_task) + taskgroup->children = child_task->next_taskgroup; + child_task->prev_queue->next_queue = child_task->next_queue; +@@ -489,8 +495,23 @@ + { + if (parent->children) + { +- task->next_child = parent->children; +- task->prev_child = parent->children->prev_child; ++ /* If parent is in gomp_task_maybe_wait_for_dependencies ++ and it doesn't need to wait for this task, put it after ++ all ready to run tasks it needs to wait for. */ ++ if (parent->taskwait && parent->taskwait->last_parent_depends_on ++ && !task->parent_depends_on) ++ { ++ struct gomp_task *last_parent_depends_on ++ = parent->taskwait->last_parent_depends_on; ++ task->next_child = last_parent_depends_on->next_child; ++ task->prev_child = last_parent_depends_on; ++ } ++ else ++ { ++ task->next_child = parent->children; ++ task->prev_child = parent->children->prev_child; ++ parent->children = task; ++ } + task->next_child->prev_child = task; + task->prev_child->next_child = task; + } +@@ -498,12 +519,23 @@ + { + task->next_child = task; + task->prev_child = task; ++ parent->children = task; + } +- parent->children = task; +- if (parent->in_taskwait) ++ if (parent->taskwait) + { +- parent->in_taskwait = false; +- gomp_sem_post (&parent->taskwait_sem); ++ if (parent->taskwait->in_taskwait) ++ { ++ parent->taskwait->in_taskwait = false; ++ gomp_sem_post (&parent->taskwait->taskwait_sem); ++ } ++ else if (parent->taskwait->in_depend_wait) ++ { ++ parent->taskwait->in_depend_wait = false; ++ gomp_sem_post (&parent->taskwait->taskwait_sem); ++ } ++ if (parent->taskwait->last_parent_depends_on == NULL ++ && task->parent_depends_on) ++ parent->taskwait->last_parent_depends_on = task; + } + } + if (taskgroup) +@@ -575,6 +607,13 @@ + struct gomp_task *parent = child_task->parent; + if (parent == NULL) + return; ++ if (__builtin_expect (child_task->parent_depends_on, 0) ++ && --parent->taskwait->n_depend == 0 ++ && parent->taskwait->in_depend_wait) ++ { ++ parent->taskwait->in_depend_wait = false; ++ gomp_sem_post (&parent->taskwait->taskwait_sem); ++ } + child_task->prev_child->next_child = child_task->next_child; + child_task->next_child->prev_child = child_task->prev_child; + if (parent->children != child_task) +@@ -589,10 +628,10 @@ + written by child_task->fn above is flushed + before the NULL is written. */ + __atomic_store_n (&parent->children, NULL, MEMMODEL_RELEASE); +- if (parent->in_taskwait) ++ if (parent->taskwait && parent->taskwait->in_taskwait) + { +- parent->in_taskwait = false; +- gomp_sem_post (&parent->taskwait_sem); ++ parent->taskwait->in_taskwait = false; ++ gomp_sem_post (&parent->taskwait->taskwait_sem); + } + } + } +@@ -736,6 +775,7 @@ + struct gomp_task *task = thr->task; + struct gomp_task *child_task = NULL; + struct gomp_task *to_free = NULL; ++ struct gomp_taskwait taskwait; + int do_wake = 0; + + /* The acquire barrier on load of task->children here synchronizes +@@ -748,6 +788,7 @@ + || __atomic_load_n (&task->children, MEMMODEL_ACQUIRE) == NULL) + return; + ++ memset (&taskwait, 0, sizeof (taskwait)); + gomp_mutex_lock (&team->task_lock); + while (1) + { +@@ -754,6 +795,8 @@ + bool cancelled = false; + if (task->children == NULL) + { ++ bool destroy_taskwait = task->taskwait != NULL; ++ task->taskwait = NULL; + gomp_mutex_unlock (&team->task_lock); + if (to_free) + { +@@ -760,6 +803,8 @@ + gomp_finish_task (to_free); + free (to_free); + } ++ if (destroy_taskwait) ++ gomp_sem_destroy (&taskwait.taskwait_sem); + return; + } + if (task->children->kind == GOMP_TASK_WAITING) +@@ -780,9 +825,180 @@ + } + } + else ++ { ++ /* All tasks we are waiting for are already running ++ in other threads. Wait for them. */ ++ if (task->taskwait == NULL) ++ { ++ taskwait.in_depend_wait = false; ++ gomp_sem_init (&taskwait.taskwait_sem, 0); ++ task->taskwait = &taskwait; ++ } ++ taskwait.in_taskwait = true; ++ } ++ gomp_mutex_unlock (&team->task_lock); ++ if (do_wake) ++ { ++ gomp_team_barrier_wake (&team->barrier, do_wake); ++ do_wake = 0; ++ } ++ if (to_free) ++ { ++ gomp_finish_task (to_free); ++ free (to_free); ++ to_free = NULL; ++ } ++ if (child_task) ++ { ++ thr->task = child_task; ++ child_task->fn (child_task->fn_data); ++ thr->task = task; ++ } ++ else ++ gomp_sem_wait (&taskwait.taskwait_sem); ++ gomp_mutex_lock (&team->task_lock); ++ if (child_task) ++ { ++ finish_cancelled:; ++ size_t new_tasks ++ = gomp_task_run_post_handle_depend (child_task, team); ++ child_task->prev_child->next_child = child_task->next_child; ++ child_task->next_child->prev_child = child_task->prev_child; ++ if (task->children == child_task) ++ { ++ if (child_task->next_child != child_task) ++ task->children = child_task->next_child; ++ else ++ task->children = NULL; ++ } ++ gomp_clear_parent (child_task->children); ++ gomp_task_run_post_remove_taskgroup (child_task); ++ to_free = child_task; ++ child_task = NULL; ++ team->task_count--; ++ if (new_tasks > 1) ++ { ++ do_wake = team->nthreads - team->task_running_count ++ - !task->in_tied_task; ++ if (do_wake > new_tasks) ++ do_wake = new_tasks; ++ } ++ } ++ } ++} ++ ++/* This is like GOMP_taskwait, but we only wait for tasks that the ++ upcoming task depends on. */ ++ ++static void ++gomp_task_maybe_wait_for_dependencies (void **depend) ++{ ++ struct gomp_thread *thr = gomp_thread (); ++ struct gomp_task *task = thr->task; ++ struct gomp_team *team = thr->ts.team; ++ struct gomp_task_depend_entry elem, *ent = NULL; ++ struct gomp_taskwait taskwait; ++ struct gomp_task *last_parent_depends_on = NULL; ++ size_t ndepend = (uintptr_t) depend[0]; ++ size_t nout = (uintptr_t) depend[1]; ++ size_t i; ++ size_t num_awaited = 0; ++ struct gomp_task *child_task = NULL; ++ struct gomp_task *to_free = NULL; ++ int do_wake = 0; ++ ++ gomp_mutex_lock (&team->task_lock); ++ for (i = 0; i < ndepend; i++) ++ { ++ elem.addr = depend[i + 2]; ++ ent = htab_find (task->depend_hash, &elem); ++ for (; ent; ent = ent->next) ++ if (i >= nout && ent->is_in) ++ continue; ++ else ++ { ++ struct gomp_task *tsk = ent->task; ++ if (!tsk->parent_depends_on) ++ { ++ tsk->parent_depends_on = true; ++ ++num_awaited; ++ if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING) ++ { ++ /* If a task we need to wait for is not already ++ running and is ready to be scheduled, move it ++ to front, so that we run it as soon as possible. */ ++ if (last_parent_depends_on) ++ { ++ tsk->prev_child->next_child = tsk->next_child; ++ tsk->next_child->prev_child = tsk->prev_child; ++ tsk->prev_child = last_parent_depends_on; ++ tsk->next_child = last_parent_depends_on->next_child; ++ tsk->prev_child->next_child = tsk; ++ tsk->next_child->prev_child = tsk; ++ } ++ else if (tsk != task->children) ++ { ++ tsk->prev_child->next_child = tsk->next_child; ++ tsk->next_child->prev_child = tsk->prev_child; ++ tsk->prev_child = task->children; ++ tsk->next_child = task->children->next_child; ++ task->children = tsk; ++ tsk->prev_child->next_child = tsk; ++ tsk->next_child->prev_child = tsk; ++ } ++ last_parent_depends_on = tsk; ++ } ++ } ++ } ++ } ++ if (num_awaited == 0) ++ { ++ gomp_mutex_unlock (&team->task_lock); ++ return; ++ } ++ ++ memset (&taskwait, 0, sizeof (taskwait)); ++ taskwait.n_depend = num_awaited; ++ taskwait.last_parent_depends_on = last_parent_depends_on; ++ gomp_sem_init (&taskwait.taskwait_sem, 0); ++ task->taskwait = &taskwait; ++ ++ while (1) ++ { ++ bool cancelled = false; ++ if (taskwait.n_depend == 0) ++ { ++ task->taskwait = NULL; ++ gomp_mutex_unlock (&team->task_lock); ++ if (to_free) ++ { ++ gomp_finish_task (to_free); ++ free (to_free); ++ } ++ gomp_sem_destroy (&taskwait.taskwait_sem); ++ return; ++ } ++ if (task->children->kind == GOMP_TASK_WAITING) ++ { ++ child_task = task->children; ++ cancelled ++ = gomp_task_run_pre (child_task, task, child_task->taskgroup, ++ team); ++ if (__builtin_expect (cancelled, 0)) ++ { ++ if (to_free) ++ { ++ gomp_finish_task (to_free); ++ free (to_free); ++ to_free = NULL; ++ } ++ goto finish_cancelled; ++ } ++ } ++ else + /* All tasks we are waiting for are already running + in other threads. Wait for them. */ +- task->in_taskwait = true; ++ taskwait.in_depend_wait = true; + gomp_mutex_unlock (&team->task_lock); + if (do_wake) + { +@@ -802,7 +1018,7 @@ + thr->task = task; + } + else +- gomp_sem_wait (&task->taskwait_sem); ++ gomp_sem_wait (&taskwait.taskwait_sem); + gomp_mutex_lock (&team->task_lock); + if (child_task) + { +@@ -809,6 +1025,8 @@ + finish_cancelled:; + size_t new_tasks + = gomp_task_run_post_handle_depend (child_task, team); ++ if (child_task->parent_depends_on) ++ --taskwait.n_depend; + child_task->prev_child->next_child = child_task->next_child; + child_task->next_child->prev_child = child_task->prev_child; + if (task->children == child_task) +Index: libgomp/ChangeLog +=================================================================== +--- a/src/libgomp/ChangeLog (.../tags/gcc_4_9_1_release) ++++ b/src/libgomp/ChangeLog (.../branches/gcc-4_9-branch) +@@ -1,3 +1,33 @@ ++2014-08-01 Jakub Jelinek ++ ++ * libgomp.h (struct gomp_task_depend_entry): Add redundant_out field. ++ (struct gomp_taskwait): New type. ++ (struct gomp_task): Add taskwait and parent_depends_on, remove ++ in_taskwait and taskwait_sem fields. ++ (gomp_finish_task): Don't destroy taskwait_sem. ++ * task.c (gomp_init_task): Don't init in_taskwait, instead init ++ taskwait and parent_depends_on. ++ (GOMP_task): For if (0) tasks with depend clause that depend on ++ earlier tasks don't defer them, instead call ++ gomp_task_maybe_wait_for_dependencies to wait for the dependencies. ++ Initialize redundant_out field, for redundant out entries just ++ move them at the end of linked list instead of removing them ++ completely, and set redundant_out flag instead of redundant. ++ (gomp_task_run_pre): Update last_parent_depends_on if scheduling ++ that task. ++ (gomp_task_run_post_handle_dependers): If parent is in ++ gomp_task_maybe_wait_for_dependencies and newly runnable task ++ is not parent_depends_on, queue it in parent->children linked ++ list after all runnable tasks with parent_depends_on set. ++ Adjust for addition of taskwait indirection. ++ (gomp_task_run_post_remove_parent): If parent is in ++ gomp_task_maybe_wait_for_dependencies and task to be removed ++ is parent_depends_on, decrement n_depend and if needed awake ++ parent. Adjust for addition of taskwait indirection. ++ (GOMP_taskwait): Adjust for addition of taskwait indirection. ++ (gomp_task_maybe_wait_for_dependencies): New function. ++ * testsuite/libgomp.c/depend-5.c: New test. ++ + 2014-07-16 Release Manager + + * GCC 4.9.1 released. +Index: libgomp/testsuite/libgomp.c/depend-5.c +=================================================================== +--- a/src/libgomp/testsuite/libgomp.c/depend-5.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgomp/testsuite/libgomp.c/depend-5.c (.../branches/gcc-4_9-branch) +@@ -0,0 +1,98 @@ ++#include ++ ++__attribute__((noinline, noclone)) void ++f1 (int ifval) ++{ ++ int x = 1, y = 2, z = 3; ++ #pragma omp parallel ++ #pragma omp single ++ { ++ #pragma omp task shared (x) depend(out: x) ++ x = 2; ++ #pragma omp task shared (x) depend(inout: x) ++ { ++ if (x != 2) ++ abort (); ++ x = 3; ++ } ++ #pragma omp task shared (x) depend(inout: x) ++ { ++ if (x != 3) ++ abort (); ++ x = 4; ++ } ++ #pragma omp task shared (z) depend(in: z) ++ if (z != 3) ++ abort (); ++ #pragma omp task shared (z) depend(in: z) ++ if (z != 3) ++ abort (); ++ #pragma omp task shared (z) depend(in: z) ++ if (z != 3) ++ abort (); ++ #pragma omp task shared (z) depend(in: z) ++ if (z != 3) ++ abort (); ++ #pragma omp task shared (z) depend(in: z) ++ if (z != 3) ++ abort (); ++ #pragma omp task shared (z) depend(in: z) ++ if (z != 3) ++ abort (); ++ #pragma omp task shared (y) depend(in: y) ++ if (y != 2) ++ abort (); ++ #pragma omp task shared (y) depend(in: y) ++ if (y != 2) ++ abort (); ++ #pragma omp task shared (y) depend(in: y) ++ if (y != 2) ++ abort (); ++ #pragma omp task shared (y) depend(in: y) ++ if (y != 2) ++ abort (); ++ #pragma omp task if (ifval) shared (x, y) depend(in: x) depend(inout: y) ++ { ++ if (x != 4 || y != 2) ++ abort (); ++ y = 3; ++ } ++ if (ifval == 0) ++ { ++ /* The above if (0) task should have waited till all ++ the tasks with x and y dependencies finish. */ ++ if (x != 4 || y != 3) ++ abort (); ++ x = 5; ++ y = 4; ++ } ++ #pragma omp task shared (z) depend(inout: z) ++ { ++ if (z != 3) ++ abort (); ++ z = 4; ++ } ++ #pragma omp task shared (z) depend(inout: z) ++ { ++ if (z != 4) ++ abort (); ++ z = 5; ++ } ++ #pragma omp taskwait ++ if (x != (ifval ? 4 : 5) || y != (ifval ? 3 : 4) || z != 5) ++ abort (); ++ #pragma omp task if (ifval) shared (x, y) depend(in: x) depend(inout: y) ++ { ++ if (x != (ifval ? 4 : 5) || y != (ifval ? 3 : 4)) ++ abort (); ++ } ++ } ++} ++ ++int ++main () ++{ ++ f1 (0); ++ f1 (1); ++ return 0; ++} Index: libstdc++-v3/include/ext/random.tcc =================================================================== --- a/src/libstdc++-v3/include/ext/random.tcc (.../tags/gcc_4_9_1_release) @@ -374,6 +1057,31 @@ Index: configure hppa*-hp-hpux10*) host_makefile_frag="config/mh-pa-hpux10" ;; +Index: gcc/tree-ssa-tail-merge.c +=================================================================== +--- a/src/gcc/tree-ssa-tail-merge.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/tree-ssa-tail-merge.c (.../branches/gcc-4_9-branch) +@@ -1159,17 +1159,9 @@ + lhs2 = gimple_get_lhs (s2); + if (TREE_CODE (lhs1) != SSA_NAME + && TREE_CODE (lhs2) != SSA_NAME) +- { +- /* If the vdef is the same, it's the same statement. */ +- if (vn_valueize (gimple_vdef (s1)) +- == vn_valueize (gimple_vdef (s2))) +- return true; +- +- /* Test for structural equality. */ +- return (operand_equal_p (lhs1, lhs2, 0) +- && gimple_operand_equal_value_p (gimple_assign_rhs1 (s1), +- gimple_assign_rhs1 (s2))); +- } ++ return (operand_equal_p (lhs1, lhs2, 0) ++ && gimple_operand_equal_value_p (gimple_assign_rhs1 (s1), ++ gimple_assign_rhs1 (s2))); + else if (TREE_CODE (lhs1) == SSA_NAME + && TREE_CODE (lhs2) == SSA_NAME) + return vn_valueize (lhs1) == vn_valueize (lhs2); Index: gcc/c-family/c-gimplify.c =================================================================== --- a/src/gcc/c-family/c-gimplify.c (.../tags/gcc_4_9_1_release) @@ -413,7 +1121,7 @@ Index: gcc/DATESTAMP +++ b/src/gcc/DATESTAMP (.../branches/gcc-4_9-branch) @@ -1 +1 @@ -20140716 -+20140731 ++20140801 Index: gcc/omp-low.c =================================================================== --- a/src/gcc/omp-low.c (.../tags/gcc_4_9_1_release) @@ -459,7 +1167,35 @@ Index: gcc/ChangeLog =================================================================== --- a/src/gcc/ChangeLog (.../tags/gcc_4_9_1_release) +++ b/src/gcc/ChangeLog (.../branches/gcc-4_9-branch) -@@ -1,3 +1,179 @@ +@@ -1,3 +1,207 @@ ++2014-08-01 Thomas Preud'homme ++ ++ Backport from mainline ++ 2014-06-13 Thomas Preud'homme ++ ++ PR tree-optimization/61375 ++ * tree-ssa-math-opts.c (find_bswap_or_nop_1): Cancel optimization if ++ symbolic number cannot be represented in an unsigned HOST_WIDE_INT. ++ (execute_optimize_bswap): Cancel optimization if CHAR_BIT != 8. ++ ++2014-08-01 Richard Biener ++ ++ PR tree-optimization/61964 ++ * tree-ssa-tail-merge.c (gimple_equal_p): Handle non-SSA LHS solely ++ by structural equality. ++ ++2014-07-31 Oleg Endo ++ ++ Backport from mainline ++ 2014-07-31 Oleg Endo ++ ++ PR target/61844 ++ * config/sh/sh.c (sh_legitimate_address_p, ++ sh_legitimize_reload_address): Handle reg+reg address modes when ++ ALLOW_INDEXED_ADDRESS is false. ++ * config/sh/predicates.md (general_movsrc_operand, ++ general_movdst_operand): Likewise. ++ +2014-07-25 Uros Bizjak + + Backport from mainline @@ -639,7 +1375,7 @@ Index: gcc/ChangeLog 2014-07-16 Release Manager * GCC 4.9.1 released. -@@ -4,14 +183,14 @@ +@@ -4,14 +211,14 @@ 2014-07-10 Cary Coutant @@ -658,7 +1394,7 @@ Index: gcc/ChangeLog 2014-07-10 Tom G. Christensen -@@ -33,13 +212,13 @@ +@@ -33,13 +240,13 @@ PR target/61062 * config/arm/arm_neon.h (vtrn_s8, vtrn_s16, vtrn_u8, vtrn_u16, vtrn_p8, vtrn_p16, vtrn_s32, vtrn_f32, vtrn_u32, vtrnq_s8, vtrnq_s16, vtrnq_s32, @@ -679,7 +1415,7 @@ Index: gcc/ChangeLog 2014-07-09 Alan Lawrence -@@ -157,11 +336,9 @@ +@@ -157,11 +364,9 @@ 2014-06-24 Jakub Jelinek * gimplify.c (gimplify_scan_omp_clauses) @@ -705,7 +1441,7 @@ Index: gcc/ChangeLog (struct gimplify_adjust_omp_clauses_data): New type. (gimplify_adjust_omp_clauses_1): Adjust for data being a struct gimplify_adjust_omp_clauses_data pointer instead -@@ -196,14 +372,12 @@ +@@ -196,14 +400,12 @@ gimple_seq * argument to omp_finish_clause hook. * omp-low.c (scan_sharing_clauses): Call scan_omp_op on non-DECL_P OMP_CLAUSE_DECL if ctx->outer. @@ -723,7 +1459,7 @@ Index: gcc/ChangeLog 2014-06-10 Jakub Jelinek -@@ -227,8 +401,7 @@ +@@ -227,8 +429,7 @@ OMP_CLAUSE_LINEAR_STMT. * omp-low.c (lower_rec_input_clauses): Fix typo. (maybe_add_implicit_barrier_cancel, lower_omp_1): Add @@ -733,7 +1469,7 @@ Index: gcc/ChangeLog 2014-06-30 Jason Merrill -@@ -279,8 +452,7 @@ +@@ -279,8 +480,7 @@ (aarch64_sqdmlsl_lane): Likewise. (aarch64_sqdmull_lane): Likewise. (aarch64_sqdmull2_lane): Likewise. @@ -907,6 +1643,46 @@ Index: gcc/testsuite/gcc.c-torture/execute/20050604-1.x +set additional_flags "-Wno-psabi" return 0 +Index: gcc/testsuite/gcc.c-torture/execute/pr61375.c +=================================================================== +--- a/src/gcc/testsuite/gcc.c-torture/execute/pr61375.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.c-torture/execute/pr61375.c (.../branches/gcc-4_9-branch) +@@ -0,0 +1,35 @@ ++#ifdef __UINT64_TYPE__ ++typedef __UINT64_TYPE__ uint64_t; ++#else ++typedef unsigned long long uint64_t; ++#endif ++ ++#ifndef __SIZEOF_INT128__ ++#define __int128 long long ++#endif ++ ++/* Some version of bswap optimization would ICE when analyzing a mask constant ++ too big for an HOST_WIDE_INT (PR61375). */ ++ ++__attribute__ ((noinline, noclone)) uint64_t ++uint128_central_bitsi_ior (unsigned __int128 in1, uint64_t in2) ++{ ++ __int128 mask = (__int128)0xffff << 56; ++ return ((in1 & mask) >> 56) | in2; ++} ++ ++int ++main (int argc) ++{ ++ __int128 in = 1; ++#ifdef __SIZEOF_INT128__ ++ in <<= 64; ++#endif ++ if (sizeof (uint64_t) * __CHAR_BIT__ != 64) ++ return 0; ++ if (sizeof (unsigned __int128) * __CHAR_BIT__ != 128) ++ return 0; ++ if (uint128_central_bitsi_ior (in, 2) != 0x102) ++ __builtin_abort (); ++ return 0; ++} Index: gcc/testsuite/gcc.c-torture/execute/20050316-1.x =================================================================== --- a/src/gcc/testsuite/gcc.c-torture/execute/20050316-1.x (.../tags/gcc_4_9_1_release) @@ -970,6 +1746,55 @@ Index: gcc/testsuite/gnat.dg/pack20_pkg.ads + procedure Modify (Fixed : in out String_Ptr); + +end Pack20_Pkg; +Index: gcc/testsuite/gcc.dg/pr51879-18.c +=================================================================== +--- a/src/gcc/testsuite/gcc.dg/pr51879-18.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.dg/pr51879-18.c (.../branches/gcc-4_9-branch) +@@ -13,5 +13,5 @@ + *q = foo (); + } + +-/* { dg-final { scan-tree-dump-times "foo \\(" 1 "pre"} } */ ++/* { dg-final { scan-tree-dump-times "foo \\(" 1 "pre" { xfail *-*-* } } } */ + /* { dg-final { cleanup-tree-dump "pre" } } */ +Index: gcc/testsuite/gcc.dg/torture/pr61964.c +=================================================================== +--- a/src/gcc/testsuite/gcc.dg/torture/pr61964.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.dg/torture/pr61964.c (.../branches/gcc-4_9-branch) +@@ -0,0 +1,33 @@ ++/* { dg-do run } */ ++ ++extern void abort (void); ++ ++struct node { struct node *next, *prev; } node; ++struct head { struct node *first; } heads[5]; ++int k = 2; ++struct head *head = &heads[2]; ++ ++static int __attribute__((noinline)) ++foo() ++{ ++ node.prev = (void *)head; ++ head->first = &node; ++ ++ struct node *n = head->first; ++ struct head *h = &heads[k]; ++ ++ if (n->prev == (void *)h) ++ h->first = n->next; ++ else ++ n->prev->next = n->next; ++ ++ n->next = h->first; ++ return n->next == &node; ++} ++ ++int main() ++{ ++ if (foo ()) ++ abort (); ++ return 0; ++} Index: gcc/testsuite/gcc.dg/tree-ssa/ssa-copyprop-2.c =================================================================== --- a/src/gcc/testsuite/gcc.dg/tree-ssa/ssa-copyprop-2.c (.../tags/gcc_4_9_1_release) @@ -1039,7 +1864,21 @@ Index: gcc/testsuite/ChangeLog =================================================================== --- a/src/gcc/testsuite/ChangeLog (.../tags/gcc_4_9_1_release) +++ b/src/gcc/testsuite/ChangeLog (.../branches/gcc-4_9-branch) -@@ -1,3 +1,104 @@ +@@ -1,3 +1,118 @@ ++2014-08-01 Thomas Preud'homme ++ ++ Backport from mainline ++ 2014-06-13 Thomas Preud'homme ++ ++ PR tree-optimization/61375 ++ * gcc.c-torture/execute/pr61375-1.c: New test. ++ ++2014-08-01 Richard Biener ++ ++ PR tree-optimization/61964 ++ * gcc.dg/torture/pr61964.c: New testcase. ++ * gcc.dg/pr51879-18.c: XFAIL. ++ +2014-07-28 Richard Biener + + PR rtl-optimization/61801 @@ -1144,7 +1983,7 @@ Index: gcc/testsuite/ChangeLog 2014-07-16 Release Manager * GCC 4.9.1 released. -@@ -17,7 +118,8 @@ +@@ -17,7 +132,8 @@ 2014-06-09 Alan Lawrence PR target/61062 @@ -1154,7 +1993,7 @@ Index: gcc/testsuite/ChangeLog 2014-07-08 Jakub Jelinek -@@ -34,8 +136,8 @@ +@@ -34,8 +150,8 @@ 2014-07-08 Alan Lawrence @@ -1230,6 +2069,37 @@ Index: gcc/testsuite/c-c++-common/pr61741.c + __builtin_abort (); + return 0; +} +Index: gcc/tree-ssa-math-opts.c +=================================================================== +--- a/src/gcc/tree-ssa-math-opts.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/tree-ssa-math-opts.c (.../branches/gcc-4_9-branch) +@@ -1749,6 +1749,8 @@ + size = TYPE_PRECISION (n->type); + if (size % BITS_PER_UNIT != 0) + return NULL_TREE; ++ if (size > HOST_BITS_PER_WIDEST_INT) ++ return NULL_TREE; + size /= BITS_PER_UNIT; + n->n = (sizeof (HOST_WIDEST_INT) < 8 ? 0 : + (unsigned HOST_WIDEST_INT)0x08070605 << 32 | 0x04030201); +@@ -1792,6 +1794,8 @@ + type_size = TYPE_PRECISION (type); + if (type_size % BITS_PER_UNIT != 0) + return NULL_TREE; ++ if (type_size > (int) HOST_BITS_PER_WIDEST_INT) ++ return NULL_TREE; + + /* Sign extension: result is dependent on the value. */ + old_type_size = TYPE_PRECISION (n->type); +@@ -1932,7 +1936,7 @@ + bool changed = false; + tree bswap16_type = NULL_TREE, bswap32_type = NULL_TREE, bswap64_type = NULL_TREE; + +- if (BITS_PER_UNIT != 8) ++ if (BITS_PER_UNIT != 8 || CHAR_BIT != 8) + return 0; + + if (sizeof (HOST_WIDEST_INT) < 8) Index: gcc/expr.c =================================================================== --- a/src/gcc/expr.c (.../tags/gcc_4_9_1_release) @@ -1613,6 +2483,76 @@ Index: gcc/config/i386/i386.c classes[i] = merge_classes (subclasses[i], classes[i]); } } +Index: gcc/config/sh/predicates.md +=================================================================== +--- a/src/gcc/config/sh/predicates.md (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/sh/predicates.md (.../branches/gcc-4_9-branch) +@@ -489,6 +489,10 @@ + rtx mem_rtx = MEM_P (op) ? op : SUBREG_REG (op); + rtx x = XEXP (mem_rtx, 0); + ++ if (! ALLOW_INDEXED_ADDRESS ++ && GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1))) ++ return false; ++ + if ((mode == QImode || mode == HImode) + && GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) +@@ -567,6 +571,10 @@ + rtx mem_rtx = MEM_P (op) ? op : SUBREG_REG (op); + rtx x = XEXP (mem_rtx, 0); + ++ if (! ALLOW_INDEXED_ADDRESS ++ && GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1))) ++ return false; ++ + if ((mode == QImode || mode == HImode) + && GET_CODE (x) == PLUS + && REG_P (XEXP (x, 0)) +Index: gcc/config/sh/sh.c +=================================================================== +--- a/src/gcc/config/sh/sh.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/sh/sh.c (.../branches/gcc-4_9-branch) +@@ -10207,6 +10207,10 @@ + static bool + sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict) + { ++ if (! ALLOW_INDEXED_ADDRESS ++ && GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1))) ++ return false; ++ + if (REG_P (x) && REGNO (x) == GBR_REG) + return true; + +@@ -10436,6 +10440,28 @@ + enum reload_type type = (enum reload_type) itype; + const int mode_sz = GET_MODE_SIZE (mode); + ++ if (! ALLOW_INDEXED_ADDRESS ++ && GET_CODE (*p) == PLUS ++ && REG_P (XEXP (*p, 0)) && REG_P (XEXP (*p, 1))) ++ { ++ *p = copy_rtx (*p); ++ push_reload (*p, NULL_RTX, p, NULL, ++ BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); ++ return true; ++ } ++ ++ if (! ALLOW_INDEXED_ADDRESS ++ && GET_CODE (*p) == PLUS ++ && GET_CODE (XEXP (*p, 0)) == PLUS) ++ { ++ rtx sum = gen_rtx_PLUS (Pmode, XEXP (XEXP (*p, 0), 0), ++ XEXP (XEXP (*p, 0), 1)); ++ *p = gen_rtx_PLUS (Pmode, sum, XEXP (*p, 1)); ++ push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL, ++ BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); ++ return true; ++ } ++ + if (TARGET_SHMEDIA) + return false; + Index: gcc/config/nios2/rtems.h =================================================================== --- a/src/gcc/config/nios2/rtems.h (.../tags/gcc_4_9_1_release) -- cgit v1.2.3 From 17c92e0dee47d7e6bd3097ed719b994bf014e995 Mon Sep 17 00:00:00 2001 From: doko Date: Sat, 2 Aug 2014 00:05:39 +0000 Subject: * Fix libphobos cross build. git-svn-id: svn://svn.debian.org/svn/gcccvs/branches/sid/gcc-4.9@7551 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca --- debian/changelog | 1 + debian/patches/gdc-cross-install-location.diff | 11 +++++++++++ debian/rules.conf | 6 +++--- debian/rules.d/binary-d.mk | 27 +++++++++++++++----------- debian/rules.patch | 3 +++ 5 files changed, 34 insertions(+), 14 deletions(-) create mode 100644 debian/patches/gdc-cross-install-location.diff (limited to 'debian') diff --git a/debian/changelog b/debian/changelog index 87686eb..65a92e4 100644 --- a/debian/changelog +++ b/debian/changelog @@ -2,6 +2,7 @@ gcc-4.9 (4.9.1-5) UNRELEASED; urgency=medium * Update to SVN 20140731 (r213317) from the gcc-4_9-branch. - Fix PR tree-optimization/61964. LP: #1347147. + * Fix libphobos cross build. -- Matthias Klose Fri, 01 Aug 2014 17:20:58 +0200 diff --git a/debian/patches/gdc-cross-install-location.diff b/debian/patches/gdc-cross-install-location.diff new file mode 100644 index 0000000..9dae9ed --- /dev/null +++ b/debian/patches/gdc-cross-install-location.diff @@ -0,0 +1,11 @@ +--- a/src/libphobos/configure.ac ++++ b/src/libphobos/configure.ac +@@ -239,7 +239,7 @@ + fi + + if test "${gdc_host}" != "${gdc_target}"; then +- gdc_include_dir='${libdir}/gcc/${host_alias}'/${d_gcc_ver}/include/d ++ gdc_include_dir='${libdir}/gcc-cross/${host_alias}'/${d_gcc_ver}/include/d + else + gdc_include_dir='${prefix}'/include/d/${d_gcc_ver} + fi diff --git a/debian/rules.conf b/debian/rules.conf index 1962574..5cb6f9d 100644 --- a/debian/rules.conf +++ b/debian/rules.conf @@ -1123,12 +1123,12 @@ ifeq ($(DEB_CROSS),yes) >> debian/substvars.local.tmp endif ifeq ($(with_libphobos),yes) + echo 'dep:phobosdev=libphobos$(pkg_ver)-dev$(LS)$(AQ) (>= $(DEB_GCC_SOFT_VERSION))' \ + >> debian/substvars.local.tmp ifeq ($(DEB_CROSS),yes) + : # FIXME: make the cross gdc aware of both include paths echo 'dep:gdccross=gdc$(pkg_ver) (>= $(DEB_GCC_SOFT_VERSION))' \ >> debian/substvars.local.tmp - else - echo 'dep:phobosdev=libphobos$(pkg_ver)-dev (>= $(DEB_GCC_SOFT_VERSION))' \ - >> debian/substvars.local.tmp endif endif #ifneq (,$(findstring gtk, $(java_awt_peers))) diff --git a/debian/rules.d/binary-d.mk b/debian/rules.d/binary-d.mk index 656c69c..4b54f6f 100644 --- a/debian/rules.d/binary-d.mk +++ b/debian/rules.d/binary-d.mk @@ -10,7 +10,11 @@ p_libphobos = libphobos$(pkg_ver)-dev d_gdc = debian/$(p_gdc) d_libphobos = debian/$(p_libphobos) -gdc_include_dir := $(PF)/include/d +ifeq ($(DEB_CROSS),yes) + gdc_include_dir := $(gcc_lib_dir)/include/d +else + gdc_include_dir := $(PF)/include/d/$(BASE_VERSION) +endif dirs_gdc = \ $(PF)/bin \ @@ -18,7 +22,7 @@ dirs_gdc = \ $(gcc_lexec_dir) ifneq ($(DEB_CROSS),yes) dirs_gdc += \ - $(gdc_include_dir)/$(BASE_VERSION) + $(gdc_include_dir) endif files_gdc = \ @@ -32,12 +36,12 @@ endif dirs_libphobos = \ $(PF)/lib \ - $(gdc_include_dir)/$(BASE_VERSION) \ + $(gdc_include_dir) \ $(gcc_lib_dir) files_libphobos = \ - $(PF)/$(libdir)/libgphobos2.a \ - $(gdc_include_dir)/$(BASE_VERSION) + $(usr_lib$(2))/libgphobos2.a \ + $(gdc_include_dir) $(binary_stamp)-gdc: $(install_stamp) @@ -67,13 +71,14 @@ ifneq ($(DEB_CROSS),yes) endif # FIXME: object.di needs to go into a libgdc-dev Multi-Arch: same package -ifneq ($(DEB_CROSS),yes) # Always needed by gdc. + mkdir -p $(d_gdc)/$(gdc_include_dir) cp $(srcdir)/libphobos/libdruntime/object.di \ - $(d_gdc)/$(gdc_include_dir)/$(BASE_VERSION)/. + $(d_gdc)/$(gdc_include_dir)/. +ifneq ($(DEB_CROSS),yes) dh_link -p$(p_gdc) \ - /$(gdc_include_dir)/$(BASE_VERSION) \ - /$(gdc_include_dir)/$(GCC_VERSION) + /$(gdc_include_dir) \ + /$(dir $(gdc_include_dir))/$(GCC_VERSION) endif dh_link -p$(p_gdc) \ @@ -108,7 +113,7 @@ $(binary_stamp)-libphobos: $(install_stamp) $(d_libphobos)/$(gcc_lib_dir) # included in gdc package - rm -f $(d_libphobos)/$(gdc_include_dir)/$(BASE_VERSION)/object.di + rm -f $(d_libphobos)/$(gdc_include_dir)/object.di ifeq ($(with_separate_gdc),yes) debian/dh_doclink -p$(p_libphobos) $(p_gdc) @@ -145,7 +150,7 @@ define __do_libphobos_dev $(d_l)/$(gcc_lib_dir) : # included in gdc package - rm -f $(d_l)/$(gdc_include_dir)/$(BASE_VERSION)/object.di + rm -f $(d_l)/$(gdc_include_dir)/object.di debian/dh_doclink -p$(p_l) \ $(if $(filter yes,$(with_separate_gdc)),$(p_gdc),$(p_base)) diff --git a/debian/rules.patch b/debian/rules.patch index 688506b..0f0cbaa 100644 --- a/debian/rules.patch +++ b/debian/rules.patch @@ -239,6 +239,9 @@ ifeq ($(DEB_CROSS),yes) debian_patches += cross-ma-install-location else debian_patches += cross-install-location + ifeq ($(with_d),yes) + debian_patches += gdc-cross-install-location + endif endif endif -- cgit v1.2.3 From 067b83433eb81cc248bbb8d1902add2659ec24b5 Mon Sep 17 00:00:00 2001 From: doko Date: Sat, 2 Aug 2014 00:38:15 +0000 Subject: * Update to SVN 20140802 (r213510) from the gcc-4_9-branch. git-svn-id: svn://svn.debian.org/svn/gcccvs/branches/sid/gcc-4.9@7552 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca --- debian/changelog | 4 +- debian/patches/svn-updates.diff | 284 +++++++++++++++++++++++++++++++++++++--- 2 files changed, 270 insertions(+), 18 deletions(-) (limited to 'debian') diff --git a/debian/changelog b/debian/changelog index 65a92e4..d5b8d9f 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,10 +1,10 @@ gcc-4.9 (4.9.1-5) UNRELEASED; urgency=medium - * Update to SVN 20140731 (r213317) from the gcc-4_9-branch. + * Update to SVN 20140802 (r213510) from the gcc-4_9-branch. - Fix PR tree-optimization/61964. LP: #1347147. * Fix libphobos cross build. - -- Matthias Klose Fri, 01 Aug 2014 17:20:58 +0200 + -- Matthias Klose Sat, 02 Aug 2014 02:36:26 +0200 gcc-4.9 (4.9.1-4) unstable; urgency=high diff --git a/debian/patches/svn-updates.diff b/debian/patches/svn-updates.diff index dd917a4..e0bbe86 100644 --- a/debian/patches/svn-updates.diff +++ b/debian/patches/svn-updates.diff @@ -1,10 +1,10 @@ -# DP: updates from the 4.9 branch upto 20140801 (r213487). +# DP: updates from the 4.9 branch upto 20140802 (r213510). last_update() { cat > ${dir}LAST_UPDATED ++ ++ PR middle-end/61455 ++ * array-notation-common.c (extract_array_notation_exprs): Handling ++ of DECL_EXPR added. ++ +2014-07-17 Richard Biener + + Backport from mainline @@ -1115,13 +1121,76 @@ Index: gcc/c-family/ChangeLog 2014-07-16 Release Manager * GCC 4.9.1 released. +Index: gcc/c-family/array-notation-common.c +=================================================================== +--- a/src/gcc/c-family/array-notation-common.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/c-family/array-notation-common.c (.../branches/gcc-4_9-branch) +@@ -329,6 +329,14 @@ + vec_safe_push (*array_list, node); + return; + } ++ if (TREE_CODE (node) == DECL_EXPR) ++ { ++ tree x = DECL_EXPR_DECL (node); ++ if (DECL_INITIAL (x)) ++ extract_array_notation_exprs (DECL_INITIAL (x), ++ ignore_builtin_fn, ++ array_list); ++ } + else if (TREE_CODE (node) == STATEMENT_LIST) + { + tree_stmt_iterator ii_tsi; +Index: gcc/c/ChangeLog +=================================================================== +--- a/src/gcc/c/ChangeLog (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/c/ChangeLog (.../branches/gcc-4_9-branch) +@@ -1,3 +1,9 @@ ++2014-08-01 Igor Zamyatin ++ ++ PR middle-end/61455 ++ * c-array-notation.c (expand_array_notations): Handling ++ of DECL_EXPR added. ++ + 2014-07-16 Release Manager + + * GCC 4.9.1 released. +Index: gcc/c/c-array-notation.c +=================================================================== +--- a/src/gcc/c/c-array-notation.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/c/c-array-notation.c (.../branches/gcc-4_9-branch) +@@ -1265,6 +1265,25 @@ + rhs_loc, rhs, TREE_TYPE (rhs)); + } + break; ++ case DECL_EXPR: ++ { ++ tree x = DECL_EXPR_DECL (*tp); ++ if (DECL_INITIAL (x)) ++ { ++ location_t loc = DECL_SOURCE_LOCATION (x); ++ tree lhs = x; ++ tree rhs = DECL_INITIAL (x); ++ DECL_INITIAL (x) = NULL; ++ tree new_modify_expr = build_modify_expr (loc, lhs, ++ TREE_TYPE (lhs), ++ NOP_EXPR, ++ loc, rhs, ++ TREE_TYPE(rhs)); ++ expand_array_notations (&new_modify_expr, walk_subtrees, NULL); ++ *tp = new_modify_expr; ++ } ++ } ++ break; + case CALL_EXPR: + *tp = fix_array_notation_call_expr (*tp); + break; Index: gcc/DATESTAMP =================================================================== --- a/src/gcc/DATESTAMP (.../tags/gcc_4_9_1_release) +++ b/src/gcc/DATESTAMP (.../branches/gcc-4_9-branch) @@ -1 +1 @@ -20140716 -+20140801 ++20140802 Index: gcc/omp-low.c =================================================================== --- a/src/gcc/omp-low.c (.../tags/gcc_4_9_1_release) @@ -1167,7 +1236,12 @@ Index: gcc/ChangeLog =================================================================== --- a/src/gcc/ChangeLog (.../tags/gcc_4_9_1_release) +++ b/src/gcc/ChangeLog (.../branches/gcc-4_9-branch) -@@ -1,3 +1,207 @@ +@@ -1,3 +1,212 @@ ++2014-08-01 Vladimir Makarov ++ ++ * lra-constraints.c (remove_inheritance_pseudos): Process ++ destination pseudo too. ++ +2014-08-01 Thomas Preud'homme + + Backport from mainline @@ -1375,7 +1449,7 @@ Index: gcc/ChangeLog 2014-07-16 Release Manager * GCC 4.9.1 released. -@@ -4,14 +211,14 @@ +@@ -4,14 +216,14 @@ 2014-07-10 Cary Coutant @@ -1394,7 +1468,7 @@ Index: gcc/ChangeLog 2014-07-10 Tom G. Christensen -@@ -33,13 +240,13 @@ +@@ -33,13 +245,13 @@ PR target/61062 * config/arm/arm_neon.h (vtrn_s8, vtrn_s16, vtrn_u8, vtrn_u16, vtrn_p8, vtrn_p16, vtrn_s32, vtrn_f32, vtrn_u32, vtrnq_s8, vtrnq_s16, vtrnq_s32, @@ -1415,7 +1489,7 @@ Index: gcc/ChangeLog 2014-07-09 Alan Lawrence -@@ -157,11 +364,9 @@ +@@ -157,11 +369,9 @@ 2014-06-24 Jakub Jelinek * gimplify.c (gimplify_scan_omp_clauses) @@ -1441,7 +1515,7 @@ Index: gcc/ChangeLog (struct gimplify_adjust_omp_clauses_data): New type. (gimplify_adjust_omp_clauses_1): Adjust for data being a struct gimplify_adjust_omp_clauses_data pointer instead -@@ -196,14 +400,12 @@ +@@ -196,14 +405,12 @@ gimple_seq * argument to omp_finish_clause hook. * omp-low.c (scan_sharing_clauses): Call scan_omp_op on non-DECL_P OMP_CLAUSE_DECL if ctx->outer. @@ -1459,7 +1533,7 @@ Index: gcc/ChangeLog 2014-06-10 Jakub Jelinek -@@ -227,8 +429,7 @@ +@@ -227,8 +434,7 @@ OMP_CLAUSE_LINEAR_STMT. * omp-low.c (lower_rec_input_clauses): Fix typo. (maybe_add_implicit_barrier_cancel, lower_omp_1): Add @@ -1469,7 +1543,7 @@ Index: gcc/ChangeLog 2014-06-30 Jason Merrill -@@ -279,8 +480,7 @@ +@@ -279,8 +485,7 @@ (aarch64_sqdmlsl_lane): Likewise. (aarch64_sqdmull_lane): Likewise. (aarch64_sqdmull2_lane): Likewise. @@ -1864,7 +1938,18 @@ Index: gcc/testsuite/ChangeLog =================================================================== --- a/src/gcc/testsuite/ChangeLog (.../tags/gcc_4_9_1_release) +++ b/src/gcc/testsuite/ChangeLog (.../branches/gcc-4_9-branch) -@@ -1,3 +1,118 @@ +@@ -1,3 +1,129 @@ ++2014-08-01 Igor Zamyatin ++ ++ PR other/61963 ++ * c-c++-common/cilk-plus/AN/pr61963.c: New test. ++ ++2014-08-01 Igor Zamyatin ++ ++ PR middle-end/61455 ++ * c-c++-common/cilk-plus/AN/pr61455.c: New test. ++ * c-c++-common/cilk-plus/AN/pr61455-2.c: Likewise. ++ +2014-08-01 Thomas Preud'homme + + Backport from mainline @@ -1983,7 +2068,7 @@ Index: gcc/testsuite/ChangeLog 2014-07-16 Release Manager * GCC 4.9.1 released. -@@ -17,7 +132,8 @@ +@@ -17,7 +143,8 @@ 2014-06-09 Alan Lawrence PR target/61062 @@ -1993,7 +2078,7 @@ Index: gcc/testsuite/ChangeLog 2014-07-08 Jakub Jelinek -@@ -34,8 +150,8 @@ +@@ -34,8 +161,8 @@ 2014-07-08 Alan Lawrence @@ -2069,6 +2154,148 @@ Index: gcc/testsuite/c-c++-common/pr61741.c + __builtin_abort (); + return 0; +} +Index: gcc/testsuite/c-c++-common/cilk-plus/AN/pr61963.c +=================================================================== +--- a/src/gcc/testsuite/c-c++-common/cilk-plus/AN/pr61963.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/c-c++-common/cilk-plus/AN/pr61963.c (.../branches/gcc-4_9-branch) +@@ -0,0 +1,9 @@ ++/* PR other/61963 */ ++/* { dg-do compile } */ ++/* { dg-options "-fcilkplus" } */ ++ ++void f (int * int *a) /* { dg-error "expected" } */ ++{ ++ a[0:64] = 0; /* { dg-error "was not declared" "" { target c++ } 7 } */ ++ a[0:64] = 0; ++} +Index: gcc/testsuite/c-c++-common/cilk-plus/AN/pr61455-2.c +=================================================================== +--- a/src/gcc/testsuite/c-c++-common/cilk-plus/AN/pr61455-2.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/c-c++-common/cilk-plus/AN/pr61455-2.c (.../branches/gcc-4_9-branch) +@@ -0,0 +1,13 @@ ++/* PR c++/61455 */ ++/* { dg-options "-fcilkplus" } */ ++ ++int a[3] = {2, 3, 4}; ++ ++int main () ++{ ++ int c = 10; ++ int b = __sec_reduce_add(a[:]); ++ if (b+c != 19) ++ __builtin_abort(); ++ return 0; ++} +Index: gcc/testsuite/c-c++-common/cilk-plus/AN/pr61455.c +=================================================================== +--- a/src/gcc/testsuite/c-c++-common/cilk-plus/AN/pr61455.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/c-c++-common/cilk-plus/AN/pr61455.c (.../branches/gcc-4_9-branch) +@@ -0,0 +1,9 @@ ++/* PR c++/61455 */ ++/* { dg-do compile } */ ++/* { dg-options "-fcilkplus" } */ ++ ++void foo () ++{ ++ int a[2]; ++ int b = a[:]; /* { dg-error "cannot be scalar" } */ ++} +Index: gcc/cp/ChangeLog +=================================================================== +--- a/src/gcc/cp/ChangeLog (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/cp/ChangeLog (.../branches/gcc-4_9-branch) +@@ -1,3 +1,19 @@ ++2014-08-01 Igor Zamyatin ++ ++ * cp-array-notation.c (expand_an_in_modify_expr): Fix the misprint ++ in error output. ++ ++2014-08-01 Igor Zamyatin ++ ++ PR other/61963 ++ * parser.c (cp_parser_array_notation): Added check for array_type. ++ ++2014-08-01 Igor Zamyatin ++ ++ PR middle-end/61455 ++ * cp-array-notation.c (expand_array_notation_exprs): Handling of ++ DECL_EXPR improved. Changed handling for INIT_EXPR. ++ + 2014-07-16 Release Manager + + * GCC 4.9.1 released. +Index: gcc/cp/cp-array-notation.c +=================================================================== +--- a/src/gcc/cp/cp-array-notation.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/cp/cp-array-notation.c (.../branches/gcc-4_9-branch) +@@ -607,7 +607,7 @@ + + if (lhs_rank == 0 && rhs_rank != 0) + { +- error_at (location, "%qD cannot be scalar when %qD is not", lhs, rhs); ++ error_at (location, "%qE cannot be scalar when %qE is not", lhs, rhs); + return error_mark_node; + } + if (lhs_rank != 0 && rhs_rank != 0 && lhs_rank != rhs_rank) +@@ -1147,7 +1147,6 @@ + case PARM_DECL: + case NON_LVALUE_EXPR: + case NOP_EXPR: +- case INIT_EXPR: + case ADDR_EXPR: + case ARRAY_REF: + case BIT_FIELD_REF: +@@ -1154,6 +1153,7 @@ + case VECTOR_CST: + case COMPLEX_CST: + return t; ++ case INIT_EXPR: + case MODIFY_EXPR: + if (contains_array_notation_expr (t)) + t = expand_an_in_modify_expr (loc, TREE_OPERAND (t, 0), NOP_EXPR, +@@ -1175,13 +1175,24 @@ + return t; + } + case DECL_EXPR: +- { +- tree x = DECL_EXPR_DECL (t); +- if (t && TREE_CODE (x) != FUNCTION_DECL) ++ if (contains_array_notation_expr (t)) ++ { ++ tree x = DECL_EXPR_DECL (t); + if (DECL_INITIAL (x)) +- t = expand_unary_array_notation_exprs (t); ++ { ++ location_t loc = DECL_SOURCE_LOCATION (x); ++ tree lhs = x; ++ tree rhs = DECL_INITIAL (x); ++ DECL_INITIAL (x) = NULL; ++ tree new_modify_expr = build_modify_expr (loc, lhs, ++ TREE_TYPE (lhs), ++ NOP_EXPR, ++ loc, rhs, ++ TREE_TYPE(rhs)); ++ t = expand_array_notation_exprs (new_modify_expr); ++ } ++ } + return t; +- } + case STATEMENT_LIST: + { + tree_stmt_iterator i; +Index: gcc/cp/parser.c +=================================================================== +--- a/src/gcc/cp/parser.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/cp/parser.c (.../branches/gcc-4_9-branch) +@@ -6306,7 +6306,7 @@ + parser->colon_corrects_to_scope_p = saved_colon_corrects; + + if (*init_index == error_mark_node || length_index == error_mark_node +- || stride == error_mark_node) ++ || stride == error_mark_node || array_type == error_mark_node) + { + if (cp_lexer_peek_token (parser->lexer)->type == CPP_CLOSE_SQUARE) + cp_lexer_consume_token (parser->lexer); Index: gcc/tree-ssa-math-opts.c =================================================================== --- a/src/gcc/tree-ssa-math-opts.c (.../tags/gcc_4_9_1_release) @@ -2207,6 +2434,31 @@ Index: gcc/fortran/dependency.c this_dep = GFC_DEP_OVERLAP; } +Index: gcc/lra-constraints.c +=================================================================== +--- a/src/gcc/lra-constraints.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/lra-constraints.c (.../branches/gcc-4_9-branch) +@@ -5752,6 +5752,20 @@ + SUBREG_REG (SET_SRC (set)) = SET_SRC (prev_set); + else + SET_SRC (set) = SET_SRC (prev_set); ++ /* As we are finishing with processing the insn ++ here, check the destination too as it might ++ inheritance pseudo for another pseudo. */ ++ if (bitmap_bit_p (remove_pseudos, dregno) ++ && bitmap_bit_p (&lra_inheritance_pseudos, dregno) ++ && (restore_regno ++ = lra_reg_info[dregno].restore_regno) >= 0) ++ { ++ if (GET_CODE (SET_DEST (set)) == SUBREG) ++ SUBREG_REG (SET_DEST (set)) ++ = regno_reg_rtx[restore_regno]; ++ else ++ SET_DEST (set) = regno_reg_rtx[restore_regno]; ++ } + lra_push_insn_and_update_insn_regno_info (curr_insn); + lra_set_used_insn_alternative_by_uid + (INSN_UID (curr_insn), -1); Index: gcc/tree-ssa-copy.c =================================================================== --- a/src/gcc/tree-ssa-copy.c (.../tags/gcc_4_9_1_release) -- cgit v1.2.3 From faa71488a96f443eddacad85309ea275629a9805 Mon Sep 17 00:00:00 2001 From: doko Date: Sat, 2 Aug 2014 09:31:09 +0000 Subject: * Fix libphobos cross build. git-svn-id: svn://svn.debian.org/svn/gcccvs/branches/sid/gcc-4.9@7553 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca --- debian/rules.d/binary-d.mk | 4 ---- 1 file changed, 4 deletions(-) (limited to 'debian') diff --git a/debian/rules.d/binary-d.mk b/debian/rules.d/binary-d.mk index 4b54f6f..686e007 100644 --- a/debian/rules.d/binary-d.mk +++ b/debian/rules.d/binary-d.mk @@ -108,10 +108,6 @@ $(binary_stamp)-libphobos: $(install_stamp) DH_COMPAT=2 dh_movefiles -p$(p_libphobos) $(files_libphobos) - # better to have it there, avoid conflicts - mv $(d_libphobos)/$(PF)/$(libdir)/libgphobos2.a \ - $(d_libphobos)/$(gcc_lib_dir) - # included in gdc package rm -f $(d_libphobos)/$(gdc_include_dir)/object.di -- cgit v1.2.3 From 474b6bcaefb317d1da0ff9296612da2b154685fc Mon Sep 17 00:00:00 2001 From: doko Date: Sat, 2 Aug 2014 11:11:25 +0000 Subject: * Fix libphobos cross build. git-svn-id: svn://svn.debian.org/svn/gcccvs/branches/sid/gcc-4.9@7554 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca --- debian/rules.d/binary-d.mk | 4 ---- 1 file changed, 4 deletions(-) (limited to 'debian') diff --git a/debian/rules.d/binary-d.mk b/debian/rules.d/binary-d.mk index 686e007..4647c42 100644 --- a/debian/rules.d/binary-d.mk +++ b/debian/rules.d/binary-d.mk @@ -141,10 +141,6 @@ define __do_libphobos_dev DH_COMPAT=2 dh_movefiles -p$(p_l) \ $(files_libphobos) - : # better to have it there, avoid conflicts - mv $(d_l)/$(PF)/$(libdir)/libgphobos2.a \ - $(d_l)/$(gcc_lib_dir) - : # included in gdc package rm -f $(d_l)/$(gdc_include_dir)/object.di -- cgit v1.2.3 From d0b776bb912b59d6e9e220768e460317b562ce34 Mon Sep 17 00:00:00 2001 From: doko Date: Sat, 2 Aug 2014 18:35:04 +0000 Subject: * Update to SVN 20140802 (r213518) from the gcc-4_9-branch. git-svn-id: svn://svn.debian.org/svn/gcccvs/branches/sid/gcc-4.9@7555 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca --- debian/changelog | 4 ++-- debian/patches/svn-updates.diff | 43 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 42 insertions(+), 5 deletions(-) (limited to 'debian') diff --git a/debian/changelog b/debian/changelog index d5b8d9f..c8888ef 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,10 +1,10 @@ gcc-4.9 (4.9.1-5) UNRELEASED; urgency=medium - * Update to SVN 20140802 (r213510) from the gcc-4_9-branch. + * Update to SVN 20140802 (r213518) from the gcc-4_9-branch. - Fix PR tree-optimization/61964. LP: #1347147. * Fix libphobos cross build. - -- Matthias Klose Sat, 02 Aug 2014 02:36:26 +0200 + -- Matthias Klose Sat, 02 Aug 2014 20:34:30 +0200 gcc-4.9 (4.9.1-4) unstable; urgency=high diff --git a/debian/patches/svn-updates.diff b/debian/patches/svn-updates.diff index e0bbe86..7a41a72 100644 --- a/debian/patches/svn-updates.diff +++ b/debian/patches/svn-updates.diff @@ -1,10 +1,10 @@ -# DP: updates from the 4.9 branch upto 20140802 (r213510). +# DP: updates from the 4.9 branch upto 20140802 (r213518). last_update() { cat > ${dir}LAST_UPDATED ++#include ++#include ++#include + + #include "backtrace.h" + +@@ -99,6 +102,7 @@ + if (back_state == NULL) + { + const char *filename; ++ struct stat s; + + filename = (const char *) runtime_progname (); + +@@ -108,6 +112,14 @@ + if (__builtin_strchr (filename, '/') == NULL) + filename = NULL; + ++ /* If the file is small, then it's not the real executable. ++ This is specifically to deal with Docker, which uses a bogus ++ argv[0] (http://gcc.gnu.org/PR61895). It would be nice to ++ have a better check for whether this file is the real ++ executable. */ ++ if (stat (filename, &s) < 0 || s.st_size < 1024) ++ filename = NULL; ++ + back_state = backtrace_create_state (filename, 1, error_callback, NULL); + } + runtime_unlock (&back_state_lock); Index: libobjc/encoding.c =================================================================== --- a/src/libobjc/encoding.c (.../tags/gcc_4_9_1_release) -- cgit v1.2.3 From a3585719bfb77e462fec86cea3374a6d939d3cf4 Mon Sep 17 00:00:00 2001 From: doko Date: Wed, 6 Aug 2014 14:37:45 +0000 Subject: - make systemtap-sdt-dev build dependency conditional on the release. git-svn-id: svn://svn.debian.org/svn/gcccvs/branches/sid/gcc-4.9@7556 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca --- debian/control | 4 ++-- debian/control.m4 | 6 +++--- debian/rules.conf | 5 +++++ 3 files changed, 10 insertions(+), 5 deletions(-) (limited to 'debian') diff --git a/debian/control b/debian/control index 87222a1..4b97936 100644 --- a/debian/control +++ b/debian/control @@ -9,8 +9,8 @@ Build-Depends: debhelper (>= 5.0.62), g++-multilib [amd64 i386 kfreebsd-amd64 mi kfreebsd-kernel-headers (>= 0.84) [kfreebsd-any], m4, libtool, autoconf2.64, libunwind7-dev (>= 0.98.5-6) [ia64], libatomic-ops-dev [ia64], - systemtap-sdt-dev [i386 amd64 x32 ia64 s390 powerpc powerpcspe ppc64 armel armhf arm64], - autogen, zlib1g-dev, gawk, lzma, xz-utils, patchutils, + autogen, gawk, lzma, xz-utils, patchutils, + zlib1g-dev, systemtap-sdt-dev [linux-any kfreebsd-any hurd-any], binutils (>= 2.23.52) | binutils-multiarch (>= 2.23.52), binutils-hppa64 (>= 2.23.52) [hppa], gperf (>= 3.0.1), bison (>= 1:2.3), flex, gettext, gdb, diff --git a/debian/control.m4 b/debian/control.m4 index 68ea5b5..5618ca0 100644 --- a/debian/control.m4 +++ b/debian/control.m4 @@ -58,10 +58,10 @@ Build-Depends: debhelper (>= 5.0.62), LIBC_BUILD_DEP, LIBC_BIARCH_BUILD_DEP kfreebsd-kernel-headers (>= 0.84) [kfreebsd-any], LIBUNWIND_BUILD_DEP LIBATOMIC_OPS_BUILD_DEP AUTO_BUILD_DEP - systemtap-sdt-dev [i386 amd64 x32 ia64 s390 powerpc powerpcspe ppc64 ppc64el armel armhf arm64], SOURCE_BUILD_DEP CROSS_BUILD_DEP CLOOG_BUILD_DEP MPC_BUILD_DEP MPFR_BUILD_DEP GMP_BUILD_DEP, autogen, zlib1g-dev, gawk, lzma, xz-utils, patchutils, + zlib1g-dev, SDT_BUILD_DEP bison (>= 1:2.3), flex, realpath (>= 1.9.12), lsb-release, quilt ',`dnl native Build-Depends: debhelper (>= 5.0.62), GCC_MULTILIB_BUILD_DEP @@ -69,8 +69,8 @@ Build-Depends: debhelper (>= 5.0.62), GCC_MULTILIB_BUILD_DEP kfreebsd-kernel-headers (>= 0.84) [kfreebsd-any], AUTO_BUILD_DEP BASE_BUILD_DEP libunwind7-dev (>= 0.98.5-6) [ia64], libatomic-ops-dev [ia64], - systemtap-sdt-dev [i386 amd64 x32 ia64 s390 powerpc powerpcspe ppc64 armel armhf arm64], - autogen, zlib1g-dev, gawk, lzma, xz-utils, patchutils, + autogen, gawk, lzma, xz-utils, patchutils, + zlib1g-dev, SDT_BUILD_DEP BINUTILS_BUILD_DEP, binutils-hppa64 (>= BINUTILSBDV) [hppa], gperf (>= 3.0.1), bison (>= 1:2.3), flex, gettext, gdb, diff --git a/debian/rules.conf b/debian/rules.conf index 5cb6f9d..60a9add 100644 --- a/debian/rules.conf +++ b/debian/rules.conf @@ -382,6 +382,10 @@ CHECK_BUILD_DEP := dejagnu [$(check_no_archs)], AUTO_BUILD_DEP := m4, libtool, AUTO_BUILD_DEP += autoconf2.64, +ifeq (,$(filter $(distrelease),lenny etch squeeze wheezy dapper hardy jaunty karmic lucid maverick natty oneiric precise quantal raring saucy trusty)) + SDT_BUILD_DEP = systemtap-sdt-dev [linux-any kfreebsd-any hurd-any], +endif + # ensure that the common libs, built from the next GCC version are available ifeq ($(PKGSOURCE),gcc-$(BASE_VERSION)) ifneq ($(with_common_libs),yes) @@ -595,6 +599,7 @@ ctrl_flags += \ -DLIBC_BUILD_DEP="$(LIBC_BUILD_DEP)" \ -DCHECK_BUILD_DEP="$(CHECK_BUILD_DEP)" \ -DAUTO_BUILD_DEP="$(AUTO_BUILD_DEP)" \ + -DSDT_BUILD_DEP="$(SDT_BUILD_DEP)" \ -DCLOOG_BUILD_DEP="$(CLOOG_BUILD_DEP)" \ -DGMP_BUILD_DEP="$(GMP_BUILD_DEP)" \ -DMPFR_BUILD_DEP="$(MPFR_BUILD_DEP)" \ -- cgit v1.2.3 From 1bf573a334526491ec222f814d0972e4a2458548 Mon Sep 17 00:00:00 2001 From: doko Date: Thu, 7 Aug 2014 13:20:35 +0000 Subject: * Update to SVN 20140807 (r213709) from the gcc-4_9-branch. git-svn-id: svn://svn.debian.org/svn/gcccvs/branches/sid/gcc-4.9@7559 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca --- debian/changelog | 4 +- debian/patches/libstdc++-python3.diff | 196 --- debian/patches/svn-updates.diff | 2278 ++++++++++++++++++++++++++++++++- debian/rules.patch | 4 - 4 files changed, 2245 insertions(+), 237 deletions(-) delete mode 100644 debian/patches/libstdc++-python3.diff (limited to 'debian') diff --git a/debian/changelog b/debian/changelog index c8888ef..4b087d4 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,10 +1,10 @@ gcc-4.9 (4.9.1-5) UNRELEASED; urgency=medium - * Update to SVN 20140802 (r213518) from the gcc-4_9-branch. + * Update to SVN 20140807 (r213709) from the gcc-4_9-branch. - Fix PR tree-optimization/61964. LP: #1347147. * Fix libphobos cross build. - -- Matthias Klose Sat, 02 Aug 2014 20:34:30 +0200 + -- Matthias Klose Thu, 07 Aug 2014 15:19:59 +0200 gcc-4.9 (4.9.1-4) unstable; urgency=high diff --git a/debian/patches/libstdc++-python3.diff b/debian/patches/libstdc++-python3.diff deleted file mode 100644 index 2a9fb12..0000000 --- a/debian/patches/libstdc++-python3.diff +++ /dev/null @@ -1,196 +0,0 @@ -# DP: Make the libstdc++-v3 pretty printer compatible with Python3. - -Index: b/src/libstdc++-v3/python/libstdcxx/v6/printers.py -=================================================================== ---- a/src/libstdc++-v3/python/libstdcxx/v6/printers.py -+++ b/src/libstdc++-v3/python/libstdcxx/v6/printers.py -@@ -51,7 +51,7 @@ def find_type(orig, name): - # anything fancier here. - field = typ.fields()[0] - if not field.is_base_class: -- raise ValueError, "Cannot find type %s::%s" % (str(orig), name) -+ raise ValueError("Cannot find type %s::%s" % (str(orig), name)) - typ = field.type - - class SharedPointerPrinter: -@@ -97,7 +97,7 @@ class StdListPrinter: - def __iter__(self): - return self - -- def next(self): -+ def __next__(self): - if self.base == self.head: - raise StopIteration - elt = self.base.cast(self.nodetype).dereference() -@@ -144,7 +144,7 @@ class StdSlistPrinter: - def __iter__(self): - return self - -- def next(self): -+ def __next__(self): - if self.base == 0: - raise StopIteration - elt = self.base.cast(self.nodetype).dereference() -@@ -198,7 +198,7 @@ class StdVectorPrinter: - def __iter__(self): - return self - -- def next(self): -+ def __next__(self): - count = self.count - self.count = self.count + 1 - if self.bitvec: -@@ -276,20 +276,20 @@ class StdTuplePrinter: - # Set the actual head to the first pair. - self.head = self.head.cast (nodes[0].type) - elif len (nodes) != 0: -- raise ValueError, "Top of tuple tree does not consist of a single node." -+ raise ValueError("Top of tuple tree does not consist of a single node.") - self.count = 0 - - def __iter__ (self): - return self - -- def next (self): -+ def __next__ (self): - nodes = self.head.type.fields () - # Check for further recursions in the inheritance tree. - if len (nodes) == 0: - raise StopIteration - # Check that this iteration has an expected structure. - if len (nodes) != 2: -- raise ValueError, "Cannot parse more than 2 nodes in a tuple tree." -+ raise ValueError("Cannot parse more than 2 nodes in a tuple tree.") - - # - Left node is the next recursion parent. - # - Right node is the actual class contained in the tuple. -@@ -353,7 +353,7 @@ class RbtreeIterator: - def __len__(self): - return int (self.size) - -- def next(self): -+ def __next__(self): - if self.count == self.size: - raise StopIteration - result = self.node -@@ -389,7 +389,7 @@ def get_value_from_Rb_tree_node(node): - return p.dereference() - except: - pass -- raise ValueError, "Unsupported implementation for %s" % str(node.type) -+ raise ValueError("Unsupported implementation for %s" % str(node.type)) - - # This is a pretty printer for std::_Rb_tree_iterator (which is - # std::map::iterator), and has nothing to do with the RbtreeIterator -@@ -431,9 +431,9 @@ class StdMapPrinter: - def __iter__(self): - return self - -- def next(self): -+ def __next__(self): - if self.count % 2 == 0: -- n = self.rbiter.next() -+ n = next(self.rbiter) - n = n.cast(self.type).dereference() - n = get_value_from_Rb_tree_node(n) - self.pair = n -@@ -474,8 +474,8 @@ class StdSetPrinter: - def __iter__(self): - return self - -- def next(self): -- item = self.rbiter.next() -+ def __next__(self): -+ item = next(self.rbiter) - item = item.cast(self.type).dereference() - item = get_value_from_Rb_tree_node(item) - # FIXME: this is weird ... what to do? -@@ -553,7 +553,7 @@ class StdDequePrinter: - def __iter__(self): - return self - -- def next(self): -+ def __next__(self): - if self.p == self.last: - raise StopIteration - -@@ -591,7 +591,7 @@ class StdDequePrinter: - - size = self.buffer_size * delta_n + delta_s + delta_e - -- return '%s with %d elements' % (self.typename, long (size)) -+ return '%s with %d elements' % (self.typename, int (size)) - - def children(self): - start = self.val['_M_impl']['_M_start'] -@@ -654,7 +654,7 @@ class Tr1HashtableIterator: - def __iter__ (self): - return self - -- def next (self): -+ def __next__ (self): - if self.node == 0: - raise StopIteration - node = self.node.cast(self.node_type) -@@ -677,7 +677,7 @@ class StdHashtableIterator: - def __iter__(self): - return self - -- def next(self): -+ def __next__(self): - if self.node == 0: - raise StopIteration - elt = self.node.cast(self.node_type).dereference() -@@ -706,10 +706,10 @@ class Tr1UnorderedSetPrinter: - return '[%d]' % i - - def children (self): -- counter = itertools.imap (self.format_count, itertools.count()) -+ counter = list(map (self.format_count, itertools.count())) - if self.typename.startswith('std::tr1'): -- return itertools.izip (counter, Tr1HashtableIterator (self.hashtable())) -- return itertools.izip (counter, StdHashtableIterator (self.hashtable())) -+ return list(zip (counter, Tr1HashtableIterator (self.hashtable()))) -+ return list(zip (counter, StdHashtableIterator (self.hashtable()))) - - class Tr1UnorderedMapPrinter: - "Print a tr1::unordered_map" -@@ -741,15 +741,15 @@ class Tr1UnorderedMapPrinter: - return '[%d]' % i - - def children (self): -- counter = itertools.imap (self.format_count, itertools.count()) -+ counter = list(map (self.format_count, itertools.count())) - # Map over the hash table and flatten the result. - if self.typename.startswith('std::tr1'): -- data = self.flatten (itertools.imap (self.format_one, Tr1HashtableIterator (self.hashtable()))) -+ data = self.flatten (list(map (self.format_one, Tr1HashtableIterator (self.hashtable())))) - # Zip the two iterators together. -- return itertools.izip (counter, data) -- data = self.flatten (itertools.imap (self.format_one, StdHashtableIterator (self.hashtable()))) -+ return list(zip (counter, data)) -+ data = self.flatten (list(map (self.format_one, StdHashtableIterator (self.hashtable())))) - # Zip the two iterators together. -- return itertools.izip (counter, data) -+ return list(zip (counter, data)) - - - def display_hint (self): -@@ -767,7 +767,7 @@ class StdForwardListPrinter: - def __iter__(self): - return self - -- def next(self): -+ def __next__(self): - if self.base == 0: - raise StopIteration - elt = self.base.cast(self.nodetype).dereference() -@@ -827,7 +827,7 @@ class Printer(object): - # A small sanity check. - # FIXME - if not self.compiled_rx.match(name + '<>'): -- raise ValueError, 'libstdc++ programming error: "%s" does not match' % name -+ raise ValueError('libstdc++ programming error: "%s" does not match' % name) - printer = RxPrinter(name, function) - self.subprinters.append(printer) - self.lookup[name] = printer diff --git a/debian/patches/svn-updates.diff b/debian/patches/svn-updates.diff index 7a41a72..67a29a1 100644 --- a/debian/patches/svn-updates.diff +++ b/debian/patches/svn-updates.diff @@ -1,10 +1,10 @@ -# DP: updates from the 4.9 branch upto 20140802 (r213518). +# DP: updates from the 4.9 branch upto 20140807 (r213709). last_update() { cat > ${dir}LAST_UPDATED prev_child->next_child = child_task->next_child; child_task->next_child->prev_child = child_task->prev_child; if (task->children == child_task) +@@ -897,18 +1115,26 @@ + if (taskgroup->children == NULL) + { + if (taskgroup->num_children) +- goto do_wait; +- gomp_mutex_unlock (&team->task_lock); +- if (to_free) + { +- gomp_finish_task (to_free); +- free (to_free); ++ if (task->children == NULL) ++ goto do_wait; ++ child_task = task->children; ++ } ++ else ++ { ++ gomp_mutex_unlock (&team->task_lock); ++ if (to_free) ++ { ++ gomp_finish_task (to_free); ++ free (to_free); ++ } ++ goto finish; + } +- goto finish; + } +- if (taskgroup->children->kind == GOMP_TASK_WAITING) ++ else ++ child_task = taskgroup->children; ++ if (child_task->kind == GOMP_TASK_WAITING) + { +- child_task = taskgroup->children; + cancelled + = gomp_task_run_pre (child_task, child_task->parent, taskgroup, + team); +@@ -925,6 +1151,7 @@ + } + else + { ++ child_task = NULL; + do_wait: + /* All tasks we are waiting for are already running + in other threads. Wait for them. */ +@@ -956,20 +1183,9 @@ + finish_cancelled:; + size_t new_tasks + = gomp_task_run_post_handle_depend (child_task, team); +- child_task->prev_taskgroup->next_taskgroup +- = child_task->next_taskgroup; +- child_task->next_taskgroup->prev_taskgroup +- = child_task->prev_taskgroup; +- --taskgroup->num_children; +- if (taskgroup->children == child_task) +- { +- if (child_task->next_taskgroup != child_task) +- taskgroup->children = child_task->next_taskgroup; +- else +- taskgroup->children = NULL; +- } + gomp_task_run_post_remove_parent (child_task); + gomp_clear_parent (child_task->children); ++ gomp_task_run_post_remove_taskgroup (child_task); + to_free = child_task; + child_task = NULL; + team->task_count--; Index: libgomp/ChangeLog =================================================================== --- a/src/libgomp/ChangeLog (.../tags/gcc_4_9_1_release) +++ b/src/libgomp/ChangeLog (.../branches/gcc-4_9-branch) -@@ -1,3 +1,33 @@ +@@ -1,3 +1,44 @@ ++2014-08-04 Jakub Jelinek ++ ++ * task.c (GOMP_taskgroup_end): If taskgroup->num_children ++ is not zero, but taskgroup->children is NULL and there are ++ any task->children, schedule those instead of waiting. ++ * testsuite/libgomp.c/depend-6.c: New test. ++ * testsuite/libgomp.c/depend-7.c: New test. ++ * testsuite/libgomp.c/depend-8.c: New test. ++ * testsuite/libgomp.c/depend-9.c: New test. ++ * testsuite/libgomp.c/depend-10.c: New test. ++ +2014-08-01 Jakub Jelinek + + * libgomp.h (struct gomp_task_depend_entry): Add redundant_out field. @@ -765,6 +841,22 @@ Index: libgomp/ChangeLog 2014-07-16 Release Manager * GCC 4.9.1 released. +Index: libgomp/testsuite/libgomp.c/depend-8.c +=================================================================== +--- a/src/libgomp/testsuite/libgomp.c/depend-8.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgomp/testsuite/libgomp.c/depend-8.c (.../branches/gcc-4_9-branch) +@@ -0,0 +1,3 @@ ++/* { dg-set-target-env-var OMP_NUM_THREADS "1" } */ ++ ++#include "depend-3.c" +Index: libgomp/testsuite/libgomp.c/depend-10.c +=================================================================== +--- a/src/libgomp/testsuite/libgomp.c/depend-10.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgomp/testsuite/libgomp.c/depend-10.c (.../branches/gcc-4_9-branch) +@@ -0,0 +1,3 @@ ++/* { dg-set-target-env-var OMP_NUM_THREADS "1" } */ ++ ++#include "depend-5.c" Index: libgomp/testsuite/libgomp.c/depend-5.c =================================================================== --- a/src/libgomp/testsuite/libgomp.c/depend-5.c (.../tags/gcc_4_9_1_release) @@ -868,6 +960,796 @@ Index: libgomp/testsuite/libgomp.c/depend-5.c + f1 (1); + return 0; +} +Index: libgomp/testsuite/libgomp.c/depend-9.c +=================================================================== +--- a/src/libgomp/testsuite/libgomp.c/depend-9.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgomp/testsuite/libgomp.c/depend-9.c (.../branches/gcc-4_9-branch) +@@ -0,0 +1,3 @@ ++/* { dg-set-target-env-var OMP_NUM_THREADS "1" } */ ++ ++#include "depend-4.c" +Index: libgomp/testsuite/libgomp.c/depend-6.c +=================================================================== +--- a/src/libgomp/testsuite/libgomp.c/depend-6.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgomp/testsuite/libgomp.c/depend-6.c (.../branches/gcc-4_9-branch) +@@ -0,0 +1,3 @@ ++/* { dg-set-target-env-var OMP_NUM_THREADS "1" } */ ++ ++#include "depend-1.c" +Index: libgomp/testsuite/libgomp.c/depend-7.c +=================================================================== +--- a/src/libgomp/testsuite/libgomp.c/depend-7.c (.../tags/gcc_4_9_1_release) ++++ b/src/libgomp/testsuite/libgomp.c/depend-7.c (.../branches/gcc-4_9-branch) +@@ -0,0 +1,3 @@ ++/* { dg-set-target-env-var OMP_NUM_THREADS "1" } */ ++ ++#include "depend-2.c" +Index: libstdc++-v3/python/libstdcxx/v6/printers.py +=================================================================== +--- a/src/libstdc++-v3/python/libstdcxx/v6/printers.py (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/python/libstdcxx/v6/printers.py (.../branches/gcc-4_9-branch) +@@ -1,4 +1,4 @@ +-# Pretty-printers for libstc++. ++# Pretty-printers for libstdc++. + + # Copyright (C) 2008-2014 Free Software Foundation, Inc. + +@@ -18,7 +18,51 @@ + import gdb + import itertools + import re ++import sys + ++### Python 2 + Python 3 compatibility code ++ ++# Resources about compatibility: ++# ++# * : Documentation of the "six" module ++ ++# FIXME: The handling of e.g. std::basic_string (at least on char) ++# probably needs updating to work with Python 3's new string rules. ++# ++# In particular, Python 3 has a separate type (called byte) for ++# bytestrings, and a special b"" syntax for the byte literals; the old ++# str() type has been redefined to always store Unicode text. ++# ++# We probably can't do much about this until this GDB PR is addressed: ++# ++ ++if sys.version_info[0] > 2: ++ ### Python 3 stuff ++ Iterator = object ++ # Python 3 folds these into the normal functions. ++ imap = map ++ izip = zip ++ # Also, int subsumes long ++ long = int ++else: ++ ### Python 2 stuff ++ class Iterator: ++ """Compatibility mixin for iterators ++ ++ Instead of writing next() methods for iterators, write ++ __next__() methods and use this mixin to make them work in ++ Python 2 as well as Python 3. ++ ++ Idea stolen from the "six" documentation: ++ ++ """ ++ ++ def next(self): ++ return self.__next__() ++ ++ # In Python 2, we still need these from itertools ++ from itertools import imap, izip ++ + # Try to use the new-style pretty-printing if available. + _use_gdb_pp = True + try: +@@ -51,7 +95,7 @@ + # anything fancier here. + field = typ.fields()[0] + if not field.is_base_class: +- raise ValueError, "Cannot find type %s::%s" % (str(orig), name) ++ raise ValueError("Cannot find type %s::%s" % (str(orig), name)) + typ = field.type + + class SharedPointerPrinter: +@@ -87,7 +131,7 @@ + class StdListPrinter: + "Print a std::list" + +- class _iterator: ++ class _iterator(Iterator): + def __init__(self, nodetype, head): + self.nodetype = nodetype + self.base = head['_M_next'] +@@ -97,7 +141,7 @@ + def __iter__(self): + return self + +- def next(self): ++ def __next__(self): + if self.base == self.head: + raise StopIteration + elt = self.base.cast(self.nodetype).dereference() +@@ -135,7 +179,7 @@ + class StdSlistPrinter: + "Print a __gnu_cxx::slist" + +- class _iterator: ++ class _iterator(Iterator): + def __init__(self, nodetype, head): + self.nodetype = nodetype + self.base = head['_M_head']['_M_next'] +@@ -144,7 +188,7 @@ + def __iter__(self): + return self + +- def next(self): ++ def __next__(self): + if self.base == 0: + raise StopIteration + elt = self.base.cast(self.nodetype).dereference() +@@ -180,7 +224,7 @@ + class StdVectorPrinter: + "Print a std::vector" + +- class _iterator: ++ class _iterator(Iterator): + def __init__ (self, start, finish, bitvec): + self.bitvec = bitvec + if bitvec: +@@ -198,7 +242,7 @@ + def __iter__(self): + return self + +- def next(self): ++ def __next__(self): + count = self.count + self.count = self.count + 1 + if self.bitvec: +@@ -265,7 +309,7 @@ + class StdTuplePrinter: + "Print a std::tuple" + +- class _iterator: ++ class _iterator(Iterator): + def __init__ (self, head): + self.head = head + +@@ -276,13 +320,13 @@ + # Set the actual head to the first pair. + self.head = self.head.cast (nodes[0].type) + elif len (nodes) != 0: +- raise ValueError, "Top of tuple tree does not consist of a single node." ++ raise ValueError("Top of tuple tree does not consist of a single node.") + self.count = 0 + + def __iter__ (self): + return self + +- def next (self): ++ def __next__ (self): + nodes = self.head.type.fields () + # Check for further recursions in the inheritance tree. + if len (nodes) == 0: +@@ -289,7 +333,7 @@ + raise StopIteration + # Check that this iteration has an expected structure. + if len (nodes) != 2: +- raise ValueError, "Cannot parse more than 2 nodes in a tuple tree." ++ raise ValueError("Cannot parse more than 2 nodes in a tuple tree.") + + # - Left node is the next recursion parent. + # - Right node is the actual class contained in the tuple. +@@ -341,7 +385,7 @@ + return self.visualizer.display_hint () + return None + +-class RbtreeIterator: ++class RbtreeIterator(Iterator): + def __init__(self, rbtree): + self.size = rbtree['_M_t']['_M_impl']['_M_node_count'] + self.node = rbtree['_M_t']['_M_impl']['_M_header']['_M_left'] +@@ -353,7 +397,7 @@ + def __len__(self): + return int (self.size) + +- def next(self): ++ def __next__(self): + if self.count == self.size: + raise StopIteration + result = self.node +@@ -389,7 +433,7 @@ + return p.dereference() + except: + pass +- raise ValueError, "Unsupported implementation for %s" % str(node.type) ++ raise ValueError("Unsupported implementation for %s" % str(node.type)) + + # This is a pretty printer for std::_Rb_tree_iterator (which is + # std::map::iterator), and has nothing to do with the RbtreeIterator +@@ -422,7 +466,7 @@ + "Print a std::map or std::multimap" + + # Turn an RbtreeIterator into a pretty-print iterator. +- class _iter: ++ class _iter(Iterator): + def __init__(self, rbiter, type): + self.rbiter = rbiter + self.count = 0 +@@ -431,9 +475,9 @@ + def __iter__(self): + return self + +- def next(self): ++ def __next__(self): + if self.count % 2 == 0: +- n = self.rbiter.next() ++ n = next(self.rbiter) + n = n.cast(self.type).dereference() + n = get_value_from_Rb_tree_node(n) + self.pair = n +@@ -465,7 +509,7 @@ + "Print a std::set or std::multiset" + + # Turn an RbtreeIterator into a pretty-print iterator. +- class _iter: ++ class _iter(Iterator): + def __init__(self, rbiter, type): + self.rbiter = rbiter + self.count = 0 +@@ -474,8 +518,8 @@ + def __iter__(self): + return self + +- def next(self): +- item = self.rbiter.next() ++ def __next__(self): ++ item = next(self.rbiter) + item = item.cast(self.type).dereference() + item = get_value_from_Rb_tree_node(item) + # FIXME: this is weird ... what to do? +@@ -541,7 +585,7 @@ + class StdDequePrinter: + "Print a std::deque" + +- class _iter: ++ class _iter(Iterator): + def __init__(self, node, start, end, last, buffer_size): + self.node = node + self.p = start +@@ -553,7 +597,7 @@ + def __iter__(self): + return self + +- def next(self): ++ def __next__(self): + if self.p == self.last: + raise StopIteration + +@@ -638,7 +682,7 @@ + def display_hint (self): + return 'string' + +-class Tr1HashtableIterator: ++class Tr1HashtableIterator(Iterator): + def __init__ (self, hash): + self.buckets = hash['_M_buckets'] + self.bucket = 0 +@@ -654,7 +698,7 @@ + def __iter__ (self): + return self + +- def next (self): ++ def __next__ (self): + if self.node == 0: + raise StopIteration + node = self.node.cast(self.node_type) +@@ -669,7 +713,7 @@ + self.bucket = self.bucket + 1 + return result + +-class StdHashtableIterator: ++class StdHashtableIterator(Iterator): + def __init__(self, hash): + self.node = hash['_M_before_begin']['_M_nxt'] + self.node_type = find_type(hash.type, '__node_type').pointer() +@@ -677,7 +721,7 @@ + def __iter__(self): + return self + +- def next(self): ++ def __next__(self): + if self.node == 0: + raise StopIteration + elt = self.node.cast(self.node_type).dereference() +@@ -706,10 +750,10 @@ + return '[%d]' % i + + def children (self): +- counter = itertools.imap (self.format_count, itertools.count()) ++ counter = imap (self.format_count, itertools.count()) + if self.typename.startswith('std::tr1'): +- return itertools.izip (counter, Tr1HashtableIterator (self.hashtable())) +- return itertools.izip (counter, StdHashtableIterator (self.hashtable())) ++ return izip (counter, Tr1HashtableIterator (self.hashtable())) ++ return izip (counter, StdHashtableIterator (self.hashtable())) + + class Tr1UnorderedMapPrinter: + "Print a tr1::unordered_map" +@@ -741,15 +785,15 @@ + return '[%d]' % i + + def children (self): +- counter = itertools.imap (self.format_count, itertools.count()) ++ counter = imap (self.format_count, itertools.count()) + # Map over the hash table and flatten the result. + if self.typename.startswith('std::tr1'): +- data = self.flatten (itertools.imap (self.format_one, Tr1HashtableIterator (self.hashtable()))) ++ data = self.flatten (imap (self.format_one, Tr1HashtableIterator (self.hashtable()))) + # Zip the two iterators together. +- return itertools.izip (counter, data) +- data = self.flatten (itertools.imap (self.format_one, StdHashtableIterator (self.hashtable()))) ++ return izip (counter, data) ++ data = self.flatten (imap (self.format_one, StdHashtableIterator (self.hashtable()))) + # Zip the two iterators together. +- return itertools.izip (counter, data) ++ return izip (counter, data) + + + def display_hint (self): +@@ -758,7 +802,7 @@ + class StdForwardListPrinter: + "Print a std::forward_list" + +- class _iterator: ++ class _iterator(Iterator): + def __init__(self, nodetype, head): + self.nodetype = nodetype + self.base = head['_M_next'] +@@ -767,7 +811,7 @@ + def __iter__(self): + return self + +- def next(self): ++ def __next__(self): + if self.base == 0: + raise StopIteration + elt = self.base.cast(self.nodetype).dereference() +@@ -827,7 +871,7 @@ + # A small sanity check. + # FIXME + if not self.compiled_rx.match(name + '<>'): +- raise ValueError, 'libstdc++ programming error: "%s" does not match' % name ++ raise ValueError('libstdc++ programming error: "%s" does not match' % name) + printer = RxPrinter(name, function) + self.subprinters.append(printer) + self.lookup[name] = printer +Index: libstdc++-v3/include/std/future +=================================================================== +--- a/src/libstdc++-v3/include/std/future (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/include/std/future (.../branches/gcc-4_9-branch) +@@ -1240,6 +1240,10 @@ + { + _M_result->_M_set(_M_fn()); + } ++ __catch(const __cxxabiv1::__forced_unwind&) ++ { ++ __throw_exception_again; // will cause broken_promise ++ } + __catch(...) + { + _M_result->_M_error = current_exception(); +@@ -1259,6 +1263,10 @@ + { + _M_fn(); + } ++ __catch(const __cxxabiv1::__forced_unwind&) ++ { ++ __throw_exception_again; // will cause broken_promise ++ } + __catch(...) + { + _M_result->_M_error = current_exception(); +@@ -1519,7 +1527,17 @@ + : _M_result(new _Result<_Res>()), _M_fn(std::move(__fn)) + { + _M_thread = std::thread{ [this] { +- _M_set_result(_S_task_setter(_M_result, _M_fn)); ++ __try ++ { ++ _M_set_result(_S_task_setter(_M_result, _M_fn)); ++ } ++ __catch (const __cxxabiv1::__forced_unwind&) ++ { ++ // make the shared state ready on thread cancellation ++ if (static_cast(_M_result)) ++ this->_M_break_promise(std::move(_M_result)); ++ __throw_exception_again; ++ } + } }; + } + +Index: libstdc++-v3/include/std/condition_variable +=================================================================== +--- a/src/libstdc++-v3/include/std/condition_variable (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/include/std/condition_variable (.../branches/gcc-4_9-branch) +@@ -189,7 +189,14 @@ + ~_Unlock() noexcept(false) + { + if (uncaught_exception()) +- __try { _M_lock.lock(); } __catch(...) { } ++ { ++ __try ++ { _M_lock.lock(); } ++ __catch(const __cxxabiv1::__forced_unwind&) ++ { __throw_exception_again; } ++ __catch(...) ++ { } ++ } + else + _M_lock.lock(); + } +Index: libstdc++-v3/include/std/mutex +=================================================================== +--- a/src/libstdc++-v3/include/std/mutex (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/include/std/mutex (.../branches/gcc-4_9-branch) +@@ -44,6 +44,7 @@ + #include + #include + #include // for std::swap ++#include + + #ifdef _GLIBCXX_USE_C99_STDINT_TR1 + +@@ -649,6 +650,8 @@ + auto __locks = std::tie(__l1, __l2, __l3...); + __try + { __try_lock_impl<0>::__do_try_lock(__locks, __idx); } ++ __catch(const __cxxabiv1::__forced_unwind&) ++ { __throw_exception_again; } + __catch(...) + { } + return __idx; +Index: libstdc++-v3/include/experimental/string_view +=================================================================== +--- a/src/libstdc++-v3/include/experimental/string_view (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/include/experimental/string_view (.../branches/gcc-4_9-branch) +@@ -39,7 +39,6 @@ + # include + #else + +-#include + #include + #include + +@@ -66,18 +65,10 @@ + * _CharT* _M_str + * size_t _M_len + * @endcode +- * +- * A basic_string_view represents an empty string with a static constexpr +- * length one string: +- * +- * @code +- * static constexpr value_type _S_empty_str[1]{0}; +- * @endcode + */ +- template> ++ template> + class basic_string_view + { +- + public: + + // types +@@ -99,7 +90,7 @@ + + constexpr + basic_string_view() noexcept +- : _M_len{0}, _M_str{_S_empty_str} ++ : _M_len{0}, _M_str{nullptr} + { } + + constexpr basic_string_view(const basic_string_view&) noexcept = default; +@@ -112,12 +103,12 @@ + + constexpr basic_string_view(const _CharT* __str) + : _M_len{__str == nullptr ? 0 : traits_type::length(__str)}, +- _M_str{__str == nullptr ? _S_empty_str : __str} ++ _M_str{__str} + { } + + constexpr basic_string_view(const _CharT* __str, size_type __len) +- : _M_len{__str == nullptr ? 0 :__len}, +- _M_str{__str == nullptr ? _S_empty_str : __str} ++ : _M_len{__len}, ++ _M_str{__str} + { } + + basic_string_view& +@@ -143,19 +134,19 @@ + + const_reverse_iterator + rbegin() const noexcept +- { return std::reverse_iterator(this->end()); } ++ { return const_reverse_iterator(this->end()); } + + const_reverse_iterator + rend() const noexcept +- { return std::reverse_iterator(this->begin()); } ++ { return const_reverse_iterator(this->begin()); } + + const_reverse_iterator + crbegin() const noexcept +- { return std::reverse_iterator(this->end()); } ++ { return const_reverse_iterator(this->end()); } + + const_reverse_iterator + crend() const noexcept +- { return std::reverse_iterator(this->begin()); } ++ { return const_reverse_iterator(this->begin()); } + + // [string.view.capacity], capacity + +@@ -169,8 +160,10 @@ + + constexpr size_type + max_size() const noexcept +- { return ((npos - sizeof(size_type) - sizeof(void*)) +- / sizeof(value_type) / 4); } ++ { ++ return (npos - sizeof(size_type) - sizeof(void*)) ++ / sizeof(value_type) / 4; ++ } + + constexpr bool + empty() const noexcept +@@ -195,7 +188,7 @@ + "(which is %zu) >= this->size() " + "(which is %zu)"), + __pos, this->size()), +- _S_empty_str[0]); ++ *this->_M_str); + } + + constexpr const _CharT& +@@ -219,11 +212,12 @@ + { return this->_M_str; } + + // [string.view.modifiers], modifiers: ++ + void + clear() noexcept + { + this->_M_len = 0; +- this->_M_str = _S_empty_str; ++ this->_M_str = nullptr; + } + + void +@@ -251,10 +245,16 @@ + template + explicit operator basic_string<_CharT, _Traits, _Allocator>() const + { +- return basic_string<_CharT, _Traits, _Allocator> +- (this->_M_len, this->_M_str); ++ return { this->_M_str, this->_M_len }; + } + ++ template> ++ basic_string<_CharT, _Traits, _Allocator> ++ to_string(const _Allocator& __alloc = _Allocator()) const ++ { ++ return { this->_M_str, this->_M_len, __alloc }; ++ } ++ + size_type + copy(_CharT* __str, size_type __n, size_type __pos = 0) const + { +@@ -431,8 +431,6 @@ + : static_cast(difference_type{__n1 - __n2}); + } + +- static constexpr value_type _S_empty_str[1]{}; +- + size_t _M_len; + const _CharT* _M_str; + }; +@@ -456,131 +454,119 @@ + } + + template +- bool ++ inline bool + operator==(basic_string_view<_CharT, _Traits> __x, + basic_string_view<_CharT, _Traits> __y) noexcept + { return __x.compare(__y) == 0; } + + template +- bool ++ inline bool + operator==(basic_string_view<_CharT, _Traits> __x, + __detail::__idt> __y) noexcept + { return __x.compare(__y) == 0; } + + template +- bool ++ inline bool + operator==(__detail::__idt> __x, + basic_string_view<_CharT, _Traits> __y) noexcept + { return __x.compare(__y) == 0; } + + template +- bool ++ inline bool + operator!=(basic_string_view<_CharT, _Traits> __x, + basic_string_view<_CharT, _Traits> __y) noexcept + { return !(__x == __y); } + + template +- bool ++ inline bool + operator!=(basic_string_view<_CharT, _Traits> __x, + __detail::__idt> __y) noexcept + { return !(__x == __y); } + + template +- bool ++ inline bool + operator!=(__detail::__idt> __x, + basic_string_view<_CharT, _Traits> __y) noexcept + { return !(__x == __y); } + + template +- bool ++ inline bool + operator< (basic_string_view<_CharT, _Traits> __x, + basic_string_view<_CharT, _Traits> __y) noexcept + { return __x.compare(__y) < 0; } + + template +- bool ++ inline bool + operator< (basic_string_view<_CharT, _Traits> __x, + __detail::__idt> __y) noexcept + { return __x.compare(__y) < 0; } + + template +- bool ++ inline bool + operator< (__detail::__idt> __x, + basic_string_view<_CharT, _Traits> __y) noexcept + { return __x.compare(__y) < 0; } + + template +- bool ++ inline bool + operator> (basic_string_view<_CharT, _Traits> __x, + basic_string_view<_CharT, _Traits> __y) noexcept + { return __x.compare(__y) > 0; } + + template +- bool ++ inline bool + operator> (basic_string_view<_CharT, _Traits> __x, + __detail::__idt> __y) noexcept + { return __x.compare(__y) > 0; } + + template +- bool ++ inline bool + operator> (__detail::__idt> __x, + basic_string_view<_CharT, _Traits> __y) noexcept + { return __x.compare(__y) > 0; } + + template +- bool ++ inline bool + operator<=(basic_string_view<_CharT, _Traits> __x, + basic_string_view<_CharT, _Traits> __y) noexcept + { return __x.compare(__y) <= 0; } + + template +- bool ++ inline bool + operator<=(basic_string_view<_CharT, _Traits> __x, + __detail::__idt> __y) noexcept + { return __x.compare(__y) <= 0; } + + template +- bool ++ inline bool + operator<=(__detail::__idt> __x, + basic_string_view<_CharT, _Traits> __y) noexcept + { return __x.compare(__y) <= 0; } + + template +- bool ++ inline bool + operator>=(basic_string_view<_CharT, _Traits> __x, + basic_string_view<_CharT, _Traits> __y) noexcept + { return __x.compare(__y) >= 0; } + + template +- bool ++ inline bool + operator>=(basic_string_view<_CharT, _Traits> __x, + __detail::__idt> __y) noexcept + { return __x.compare(__y) >= 0; } + + template +- bool ++ inline bool + operator>=(__detail::__idt> __x, + basic_string_view<_CharT, _Traits> __y) noexcept + { return __x.compare(__y) >= 0; } + +- // [string.view.comparison], sufficient additional overloads of comparison functions +- +- // [string.view.nonmem], other non-member basic_string_view functions +- template, +- typename _Allocator = allocator<_CharT>> +- basic_string<_CharT, _Traits, _Allocator> +- to_string(basic_string_view<_CharT, _Traits> __str, +- const _Allocator& __alloc = _Allocator()) +- { +- return basic_string<_CharT, _Traits, _Allocator> +- (__str.begin(), __str.end(), __alloc); +- } +- ++ // [string.view.io], Inserters and extractors + template +- basic_ostream<_CharT, _Traits>& +- operator<<(basic_ostream<_CharT, _Traits>& __os, +- basic_string_view<_CharT,_Traits> __str) +- { return __ostream_insert(__os, __str.data(), __str.size()); } ++ inline basic_ostream<_CharT, _Traits>& ++ operator<<(basic_ostream<_CharT, _Traits>& __os, ++ basic_string_view<_CharT,_Traits> __str) ++ { return __ostream_insert(__os, __str.data(), __str.size()); } + + + // basic_string_view typedef names +Index: libstdc++-v3/include/experimental/string_view.tcc +=================================================================== +--- a/src/libstdc++-v3/include/experimental/string_view.tcc (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/include/experimental/string_view.tcc (.../branches/gcc-4_9-branch) +@@ -47,10 +47,6 @@ + _GLIBCXX_BEGIN_NAMESPACE_VERSION + + template +- constexpr _CharT +- basic_string_view<_CharT, _Traits>::_S_empty_str[1]; +- +- template + typename basic_string_view<_CharT, _Traits>::size_type + basic_string_view<_CharT, _Traits>:: + find(const _CharT* __str, size_type __pos, size_type __n) const noexcept +Index: libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/traits.hpp +=================================================================== +--- a/src/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/traits.hpp (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/include/ext/pb_ds/detail/bin_search_tree_/traits.hpp (.../branches/gcc-4_9-branch) +@@ -55,7 +55,7 @@ + class Cmp_Fn, + template + class Node_Update, + class Node, +@@ -161,7 +161,7 @@ + class Cmp_Fn, + template + class Node_Update, + class Node, Index: libstdc++-v3/include/ext/random.tcc =================================================================== --- a/src/libstdc++-v3/include/ext/random.tcc (.../tags/gcc_4_9_1_release) @@ -881,6 +1763,48 @@ Index: libstdc++-v3/include/ext/random.tcc __aurng(__urng); result_type __a = __param.successful_size(); +Index: libstdc++-v3/include/ext/rope +=================================================================== +--- a/src/libstdc++-v3/include/ext/rope (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/include/ext/rope (.../branches/gcc-4_9-branch) +@@ -1544,7 +1544,7 @@ + typedef typename _Base::allocator_type allocator_type; + using _Base::_M_tree_ptr; + using _Base::get_allocator; +- using _Base::_M_get_allocator; ++ using _Base::_M_get_allocator; + typedef __GC_CONST _CharT* _Cstrptr; + + static _CharT _S_empty_c_str[1]; +@@ -1876,8 +1876,9 @@ + const allocator_type& __a = allocator_type()) + : _Base(__a) + { +- this->_M_tree_ptr = (0 == __len) ? +- 0 : _S_new_RopeFunction(__fn, __len, __delete_fn, __a); ++ this->_M_tree_ptr = (0 == __len) ++ ? 0 ++ : _S_new_RopeFunction(__fn, __len, __delete_fn, _M_get_allocator()); + } + + rope(const rope& __x, const allocator_type& __a = allocator_type()) +Index: libstdc++-v3/include/bits/atomic_base.h +=================================================================== +--- a/src/libstdc++-v3/include/bits/atomic_base.h (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/include/bits/atomic_base.h (.../branches/gcc-4_9-branch) +@@ -675,10 +675,10 @@ + + // Factored out to facilitate explicit specialization. + constexpr ptrdiff_t +- _M_type_size(ptrdiff_t __d) { return __d * sizeof(_PTp); } ++ _M_type_size(ptrdiff_t __d) const { return __d * sizeof(_PTp); } + + constexpr ptrdiff_t +- _M_type_size(ptrdiff_t __d) volatile { return __d * sizeof(_PTp); } ++ _M_type_size(ptrdiff_t __d) const volatile { return __d * sizeof(_PTp); } + + public: + __atomic_base() noexcept = default; Index: libstdc++-v3/include/bits/random.tcc =================================================================== --- a/src/libstdc++-v3/include/bits/random.tcc (.../tags/gcc_4_9_1_release) @@ -912,7 +1836,93 @@ Index: libstdc++-v3/ChangeLog =================================================================== --- a/src/libstdc++-v3/ChangeLog (.../tags/gcc_4_9_1_release) +++ b/src/libstdc++-v3/ChangeLog (.../branches/gcc-4_9-branch) -@@ -1,3 +1,14 @@ +@@ -1,3 +1,100 @@ ++2014-08-04 Jonathan Wakely ++ ++ Backported from mainline ++ 2014-07-29 Jonathan Wakely ++ ++ PR libstdc++/61946 ++ * include/ext/rope (rope::rope(char_producer<_CharT>*, size_t, bool, ++ const allocator_type&)): Pass non-const allocator to ++ _S_new_RopeFunction. ++ * testsuite/ext/rope/61946.cc: New. ++ ++2014-08-04 Zifei Tong ++ ++ * libsupc++/atexit_thread.cc (HAVE___CXA_THREAD_ATEXIT_IMPL): Add ++ _GLIBCXX_ prefix to macro. ++ ++2014-08-04 Samuel Bronson ++ ++ Backport r212453 from trunk ++ 2014-07-11 Samuel Bronson ++ Matthias Klose ++ ++ PR libstdc++/58962 ++ * python/libstdcxx/v6/printers.py: Port to Python 2+3 ++ (imap): New compat function. ++ (izip): Likewise. ++ (Iterator): New mixin to allow writing iterators in Python 3 style ++ regardless of which version we're running on. ++ [Python3] (long) New compat alias for "int". ++ * testsuite/lib/gdb-test.exp: Port to Python 2+3 (print syntax) ++ ++ Backport r210625 from trunk ++ 2014-05-19 Jonathan Wakely ++ ++ * python/libstdcxx/v6/printers.py: Use Python3 raise syntax. ++ ++2014-08-04 Jonathan Wakely ++ ++ Backported from mainline ++ 2014-06-10 Jonathan Wakely ++ ++ PR libstdc++/61390 ++ * include/ext/pb_ds/detail/bin_search_tree_/traits.hpp ++ (bin_search_tree_traits): Do not redeclare template-parameters. ++ * testsuite/util/testsuite_iterators.h (test_container): Likewise. ++ ++ Backported from mainline ++ 2014-06-02 Jonathan Wakely ++ ++ * include/std/condition_variable (condition_variable_any::_Unlock): Do ++ not swallow __forced_unwind. ++ * include/std/future (__future_base::_Task_setter): Likewise. ++ (__future_base::_Async_state_impl): Turn __forced_unwind into broken ++ promise and rethrow. ++ * include/std/mutex (try_lock): Likewise. ++ * testsuite/30_threads/async/forced_unwind.cc: New. ++ * testsuite/30_threads/packaged_task/forced_unwind.cc: New. ++ ++ Backported from mainline ++ 2014-06-01 Jonathan Wakely ++ ++ PR libstdc++/61374 ++ * include/experimental/string_view (operator basic_string): Correct ++ order of arguments. ++ (to_string): Replace with member function. ++ Add inline specifiers. Remove unused header. Remove _S_empty_rep and ++ allow _M_str to be null. ++ * testsuite/experimental/string_view/cons/char/1.cc: Adjust to new ++ default constructor semantics. ++ * testsuite/experimental/string_view/cons/wchar_t/1.cc: Likewise. ++ * testsuite/experimental/string_view/operations/copy/char/1.cc: Fix ++ copyright dates. Remove unused header. ++ * testsuite/experimental/string_view/operations/copy/wchar_t/1.cc: ++ Likewise. ++ * testsuite/experimental/string_view/operations/data/char/1.cc: ++ Fix copyright dates. Adjust to new default constructor semantics. ++ * testsuite/experimental/string_view/operations/data/wchar_t/1.cc: ++ Likewise. ++ * testsuite/experimental/string_view/operations/to_string/1.cc: New. ++ ++ Backported from mainline ++ 2014-04-15 Jonathan Wakely ++ ++ * include/bits/atomic_base.h (__atomic_base<_PTp*>::_M_type_size): Add ++ const to constexpr member functions. ++ +2014-07-29 Ed Smith-Rowland <3dw4rd@verizon.net> + + PR libstdc++/60037 - SIGFPE in std::generate_canonical @@ -927,6 +1937,34 @@ Index: libstdc++-v3/ChangeLog 2014-07-16 Release Manager * GCC 4.9.1 released. +Index: libstdc++-v3/libsupc++/atexit_thread.cc +=================================================================== +--- a/src/libstdc++-v3/libsupc++/atexit_thread.cc (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/libsupc++/atexit_thread.cc (.../branches/gcc-4_9-branch) +@@ -26,7 +26,7 @@ + #include + #include "bits/gthr.h" + +-#if HAVE___CXA_THREAD_ATEXIT_IMPL ++#if _GLIBCXX_HAVE___CXA_THREAD_ATEXIT_IMPL + + extern "C" int __cxa_thread_atexit_impl (void (*func) (void *), + void *arg, void *d); +@@ -38,7 +38,7 @@ + return __cxa_thread_atexit_impl (dtor, obj, dso_handle); + } + +-#else /* HAVE___CXA_THREAD_ATEXIT_IMPL */ ++#else /* _GLIBCXX_HAVE___CXA_THREAD_ATEXIT_IMPL */ + + namespace { + // One element in a singly-linked stack of cleanups. +@@ -142,4 +142,4 @@ + return 0; + } + +-#endif /* HAVE___CXA_THREAD_ATEXIT_IMPL */ ++#endif /* _GLIBCXX_HAVE___CXA_THREAD_ATEXIT_IMPL */ Index: libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc =================================================================== --- a/src/libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc (.../tags/gcc_4_9_1_release) @@ -947,6 +1985,305 @@ Index: libstdc++-v3/testsuite/26_numerics/random/pr60037-neg.cc +// { dg-error "static assertion failed: template argument not a floating point type" "" { target *-*-* } 167 } + +// { dg-error "static assertion failed: template argument not a floating point type" "" { target *-*-* } 3466 } +Index: libstdc++-v3/testsuite/30_threads/packaged_task/forced_unwind.cc +=================================================================== +--- a/src/libstdc++-v3/testsuite/30_threads/packaged_task/forced_unwind.cc (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/testsuite/30_threads/packaged_task/forced_unwind.cc (.../branches/gcc-4_9-branch) +@@ -0,0 +1,48 @@ ++// { dg-do run { target *-*-linux* *-*-gnu* } } ++// { dg-options " -std=gnu++11 -pthread" { target *-*-linux* *-*-gnu* } } ++// { dg-require-cstdint "" } ++// { dg-require-gthreads "" } ++// { dg-require-atomic-builtins "" } ++ ++// Copyright (C) 2014 Free Software Foundation, Inc. ++// ++// This file is part of the GNU ISO C++ Library. This library is free ++// software; you can redistribute it and/or modify it under the ++// terms of the GNU General Public License as published by the ++// Free Software Foundation; either version 3, or (at your option) ++// any later version. ++ ++// This library is distributed in the hope that it will be useful, ++// but WITHOUT ANY WARRANTY; without even the implied warranty of ++// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++// GNU General Public License for more details. ++ ++// You should have received a copy of the GNU General Public License along ++// with this library; see the file COPYING3. If not see ++// . ++ ++// Test (non-standard) handling of __forced_unwind exception. ++ ++#include ++#include ++#include ++#include ++ ++void f() { pthread_exit(nullptr); } ++ ++int main() ++{ ++ std::packaged_task p(f); ++ auto fut = p.get_future(); ++ std::thread t(std::move(p)); ++ try ++ { ++ fut.get(); ++ throw std::logic_error("Unreachable"); ++ } ++ catch (const std::future_error& e) ++ { ++ VERIFY( e.code() == std::future_errc::broken_promise ); ++ } ++ t.join(); ++} +Index: libstdc++-v3/testsuite/30_threads/async/forced_unwind.cc +=================================================================== +--- a/src/libstdc++-v3/testsuite/30_threads/async/forced_unwind.cc (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/testsuite/30_threads/async/forced_unwind.cc (.../branches/gcc-4_9-branch) +@@ -0,0 +1,45 @@ ++// { dg-do run { target *-*-linux* *-*-gnu* } } ++// { dg-options " -std=gnu++11 -pthread" { target *-*-linux* *-*-gnu* } } ++// { dg-require-cstdint "" } ++// { dg-require-gthreads "" } ++// { dg-require-atomic-builtins "" } ++ ++// Copyright (C) 2014 Free Software Foundation, Inc. ++// ++// This file is part of the GNU ISO C++ Library. This library is free ++// software; you can redistribute it and/or modify it under the ++// terms of the GNU General Public License as published by the ++// Free Software Foundation; either version 3, or (at your option) ++// any later version. ++ ++// This library is distributed in the hope that it will be useful, ++// but WITHOUT ANY WARRANTY; without even the implied warranty of ++// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++// GNU General Public License for more details. ++ ++// You should have received a copy of the GNU General Public License along ++// with this library; see the file COPYING3. If not see ++// . ++ ++// Test (non-standard) handling of __forced_unwind exception. ++ ++#include ++#include ++#include ++#include ++ ++void f() { pthread_exit(nullptr); } ++ ++int main() ++{ ++ auto fut = std::async(std::launch::async, f); ++ try ++ { ++ fut.get(); ++ throw std::logic_error("Unreachable"); ++ } ++ catch (const std::future_error& e) ++ { ++ VERIFY( e.code() == std::future_errc::broken_promise ); ++ } ++} +Index: libstdc++-v3/testsuite/experimental/string_view/cons/wchar_t/1.cc +=================================================================== +--- a/src/libstdc++-v3/testsuite/experimental/string_view/cons/wchar_t/1.cc (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/testsuite/experimental/string_view/cons/wchar_t/1.cc (.../branches/gcc-4_9-branch) +@@ -33,7 +33,7 @@ + // basic_string_view() + const std::experimental::wstring_view str00{}; + VERIFY( str00.length() == 0 ); +- VERIFY( str00.data() != nullptr ); ++ VERIFY( str00.data() == nullptr ); + + // basic_string_view(const char*) + const wchar_t str_lit01[] = L"rodeo beach, marin"; +@@ -54,11 +54,6 @@ + VERIFY( str05.length() == len_lit01 ); + VERIFY( str05.data() == str_lit01 ); + +- // basic_string_view(const wchar_t* s, std::size_t l) +- std::experimental::wstring_view str06{nullptr, len_lit01}; +- VERIFY( str06.length() == 0 ); +- VERIFY( str06.data() != nullptr ); +- + // basic_string_view(basic_string& s) + std::wstring istr07(10, L'z'); + std::experimental::wstring_view str07{istr07}; +Index: libstdc++-v3/testsuite/experimental/string_view/cons/char/1.cc +=================================================================== +--- a/src/libstdc++-v3/testsuite/experimental/string_view/cons/char/1.cc (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/testsuite/experimental/string_view/cons/char/1.cc (.../branches/gcc-4_9-branch) +@@ -33,7 +33,7 @@ + // basic_string_view() + const std::experimental::string_view str00{}; + VERIFY( str00.length() == 0 ); +- VERIFY( str00.data() != nullptr ); ++ VERIFY( str00.data() == nullptr ); + + // basic_string_view(const char*) + const char str_lit01[] = "rodeo beach, marin"; +@@ -54,11 +54,6 @@ + VERIFY( str05.length() == len_lit01 ); + VERIFY( str05.data() == str_lit01 ); + +- // basic_string_view(const char* s, std::size_t l) +- std::experimental::string_view str06{nullptr, len_lit01}; +- VERIFY( str06.length() == 0 ); +- VERIFY( str06.data() != nullptr ); +- + // basic_string_view(basic_string& s) + std::string istr07(10, 'z'); + std::experimental::string_view str07{istr07}; +Index: libstdc++-v3/testsuite/experimental/string_view/operations/to_string/1.cc +=================================================================== +--- a/src/libstdc++-v3/testsuite/experimental/string_view/operations/to_string/1.cc (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/testsuite/experimental/string_view/operations/to_string/1.cc (.../branches/gcc-4_9-branch) +@@ -0,0 +1,53 @@ ++// { dg-options "-std=gnu++1y" } ++ ++// Copyright (C) 2014 Free Software Foundation, Inc. ++// ++// This file is part of the GNU ISO C++ Library. This library is free ++// software; you can redistribute it and/or modify it under the ++// terms of the GNU General Public License as published by the ++// Free Software Foundation; either version 3, or (at your option) ++// any later version. ++ ++// This library is distributed in the hope that it will be useful, ++// but WITHOUT ANY WARRANTY; without even the implied warranty of ++// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++// GNU General Public License for more details. ++ ++// You should have received a copy of the GNU General Public License along ++// with this library; see the file COPYING3. If not see ++// . ++ ++// basic_string_view::to_string ++ ++#include ++#include ++#include ++#include ++ ++bool ++test01() ++{ ++ bool test [[gnu::unused]] = true; ++ ++ const char str_lit[] = "123456789A"; ++ const std::experimental::string_view sv(str_lit); ++ char buffer[4] = { 0 }; ++ ++ auto s1 = sv.to_string(); ++ VERIFY( s1 == str_lit ); ++ using test_alloc = __gnu_test::tracker_allocator; ++ auto s2 = sv.to_string( test_alloc{} ); ++ static_assert( std::is_same::value, ++ "to_string() uses custom allocator" ); ++ VERIFY( std::equal(s1.begin(), s1.end(), s2.begin(), s2.end()) ); ++ auto s3 = static_cast(sv); ++ VERIFY( s3 == s1 ); ++ ++ return test; ++} ++ ++int ++main() ++{ ++ test01(); ++} +Index: libstdc++-v3/testsuite/experimental/string_view/operations/data/wchar_t/1.cc +=================================================================== +--- a/src/libstdc++-v3/testsuite/experimental/string_view/operations/data/wchar_t/1.cc (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/testsuite/experimental/string_view/operations/data/wchar_t/1.cc (.../branches/gcc-4_9-branch) +@@ -1,6 +1,6 @@ + // { dg-options "-std=gnu++1y" } + +-// Copyright (C) 2013 Free Software Foundation, Inc. ++// Copyright (C) 2013-2014 Free Software Foundation, Inc. + // + // This file is part of the GNU ISO C++ Library. This library is free + // software; you can redistribute it and/or modify it under the +@@ -29,10 +29,9 @@ + + std::experimental::wstring_view empty; + +- // data() for size == 0 is non-NULL. + VERIFY( empty.size() == 0 ); + const std::experimental::wstring_view::value_type* p = empty.data(); +- VERIFY( p ); ++ VERIFY( p == nullptr ); + + return 0; + } +Index: libstdc++-v3/testsuite/experimental/string_view/operations/data/char/1.cc +=================================================================== +--- a/src/libstdc++-v3/testsuite/experimental/string_view/operations/data/char/1.cc (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/testsuite/experimental/string_view/operations/data/char/1.cc (.../branches/gcc-4_9-branch) +@@ -1,6 +1,6 @@ + // { dg-options "-std=gnu++1y" } + +-// Copyright (C) 2013 Free Software Foundation, Inc. ++// Copyright (C) 2013-2014 Free Software Foundation, Inc. + // + // This file is part of the GNU ISO C++ Library. This library is free + // software; you can redistribute it and/or modify it under the +@@ -29,10 +29,9 @@ + + std::experimental::string_view empty; + +- // data() for size == 0 is non-NULL. + VERIFY( empty.size() == 0 ); + const std::experimental::string_view::value_type* p = empty.data(); +- VERIFY( p ); ++ VERIFY( p == nullptr ); + + return 0; + } +Index: libstdc++-v3/testsuite/experimental/string_view/operations/copy/wchar_t/1.cc +=================================================================== +--- a/src/libstdc++-v3/testsuite/experimental/string_view/operations/copy/wchar_t/1.cc (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/testsuite/experimental/string_view/operations/copy/wchar_t/1.cc (.../branches/gcc-4_9-branch) +@@ -1,6 +1,6 @@ + // { dg-options "-std=gnu++1y" } + +-// Copyright (C) 2013 Free Software Foundation, Inc. ++// Copyright (C) 2013-2014 Free Software Foundation, Inc. + // + // This file is part of the GNU ISO C++ Library. This library is free + // software; you can redistribute it and/or modify it under the +@@ -20,7 +20,6 @@ + // basic_string_view::copy + + #include +-#include + #include + + bool +Index: libstdc++-v3/testsuite/experimental/string_view/operations/copy/char/1.cc +=================================================================== +--- a/src/libstdc++-v3/testsuite/experimental/string_view/operations/copy/char/1.cc (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/testsuite/experimental/string_view/operations/copy/char/1.cc (.../branches/gcc-4_9-branch) +@@ -1,6 +1,6 @@ + // { dg-options "-std=gnu++1y" } + +-// Copyright (C) 2013 Free Software Foundation, Inc. ++// Copyright (C) 2013-2014 Free Software Foundation, Inc. + // + // This file is part of the GNU ISO C++ Library. This library is free + // software; you can redistribute it and/or modify it under the +@@ -20,7 +20,6 @@ + // basic_string_view::copy + + #include +-#include + #include + + bool Index: libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/pr60037.cc =================================================================== --- a/src/libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/pr60037.cc (.../tags/gcc_4_9_1_release) @@ -975,6 +2312,76 @@ Index: libstdc++-v3/testsuite/ext/random/hypergeometric_distribution/pr60037.cc + hyperplot(500, 50, 30); + hyperplot(100, 20, 5); +} +Index: libstdc++-v3/testsuite/ext/rope/61946.cc +=================================================================== +--- a/src/libstdc++-v3/testsuite/ext/rope/61946.cc (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/testsuite/ext/rope/61946.cc (.../branches/gcc-4_9-branch) +@@ -0,0 +1,31 @@ ++// Copyright (C) 2014 Free Software Foundation, Inc. ++// ++// This file is part of the GNU ISO C++ Library. This library is free ++// software; you can redistribute it and/or modify it under the ++// terms of the GNU General Public License as published by the ++// Free Software Foundation; either version 3, or (at your option) ++// any later version. ++ ++// This library is distributed in the hope that it will be useful, ++// but WITHOUT ANY WARRANTY; without even the implied warranty of ++// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++// GNU General Public License for more details. ++ ++// You should have received a copy of the GNU General Public License along ++// with this library; see the file COPYING3. If not see ++// . ++ ++// { dg-do compile } ++ ++#include ++ ++struct empty_char_prod : __gnu_cxx::char_producer ++{ ++ virtual void operator()(size_t, size_t, char*) {} ++}; ++ ++int main () ++{ ++ empty_char_prod* ecp = new empty_char_prod; ++ __gnu_cxx::crope excrope( ecp, 10L, true ); ++} +Index: libstdc++-v3/testsuite/lib/gdb-test.exp +=================================================================== +--- a/src/libstdc++-v3/testsuite/lib/gdb-test.exp (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/testsuite/lib/gdb-test.exp (.../branches/gcc-4_9-branch) +@@ -91,7 +91,7 @@ + } + } + +- set do_whatis_tests [gdb_batch_check "python print gdb.type_printers" \ ++ set do_whatis_tests [gdb_batch_check "python print(gdb.type_printers)" \ + "\\\[\\\]"] + if {!$do_whatis_tests} { + send_log "skipping 'whatis' tests - gdb too old" +@@ -252,6 +252,6 @@ + # but not earlier versions. + # Return 1 if the version is ok, 0 otherwise. + proc gdb_version_check {} { +- return [gdb_batch_check "python print gdb.lookup_global_symbol" \ ++ return [gdb_batch_check "python print(gdb.lookup_global_symbol)" \ + ""] + } +Index: libstdc++-v3/testsuite/util/testsuite_iterators.h +=================================================================== +--- a/src/libstdc++-v3/testsuite/util/testsuite_iterators.h (.../tags/gcc_4_9_1_release) ++++ b/src/libstdc++-v3/testsuite/util/testsuite_iterators.h (.../branches/gcc-4_9-branch) +@@ -518,7 +518,7 @@ + * It takes two pointers representing a range and presents them as + * a container of iterators. + */ +- template class ItType> ++ template class ItType> + struct test_container + { + typename ItType::ContainerType bounds; Index: configure.ac =================================================================== --- a/src/configure.ac (.../tags/gcc_4_9_1_release) @@ -1057,6 +2464,35 @@ Index: configure hppa*-hp-hpux10*) host_makefile_frag="config/mh-pa-hpux10" ;; +Index: libgcc/ChangeLog +=================================================================== +--- a/src/libgcc/ChangeLog (.../tags/gcc_4_9_1_release) ++++ b/src/libgcc/ChangeLog (.../branches/gcc-4_9-branch) +@@ -1,3 +1,9 @@ ++2014-08-04 Rohit ++ ++ PR target/60102 ++ * config/rs6000/linux-unwind.h (ppc_fallback_frame_state): Update ++ based on change in SPE high register numbers and 3 HTM registers. ++ + 2014-07-16 Release Manager + + * GCC 4.9.1 released. +Index: libgcc/config/rs6000/linux-unwind.h +=================================================================== +--- a/src/libgcc/config/rs6000/linux-unwind.h (.../tags/gcc_4_9_1_release) ++++ b/src/libgcc/config/rs6000/linux-unwind.h (.../branches/gcc-4_9-branch) +@@ -274,8 +274,8 @@ + #ifdef __SPE__ + for (i = 14; i < 32; i++) + { +- fs->regs.reg[i + FIRST_PSEUDO_REGISTER - 1].how = REG_SAVED_OFFSET; +- fs->regs.reg[i + FIRST_PSEUDO_REGISTER - 1].loc.offset ++ fs->regs.reg[i + FIRST_SPE_HIGH_REGNO - 4].how = REG_SAVED_OFFSET; ++ fs->regs.reg[i + FIRST_SPE_HIGH_REGNO - 4].loc.offset + = (long) ®s->vregs - new_cfa + 4 * i; + } + #endif Index: gcc/tree-ssa-tail-merge.c =================================================================== --- a/src/gcc/tree-ssa-tail-merge.c (.../tags/gcc_4_9_1_release) @@ -1190,7 +2626,7 @@ Index: gcc/DATESTAMP +++ b/src/gcc/DATESTAMP (.../branches/gcc-4_9-branch) @@ -1 +1 @@ -20140716 -+20140802 ++20140807 Index: gcc/omp-low.c =================================================================== --- a/src/gcc/omp-low.c (.../tags/gcc_4_9_1_release) @@ -1236,7 +2672,51 @@ Index: gcc/ChangeLog =================================================================== --- a/src/gcc/ChangeLog (.../tags/gcc_4_9_1_release) +++ b/src/gcc/ChangeLog (.../branches/gcc-4_9-branch) -@@ -1,3 +1,212 @@ +@@ -1,3 +1,256 @@ ++2014-08-07 Ilya Tocar ++ ++ * config/i386/sse.md (vec_extract_lo_): Fix ++ constraint. ++ ++2014-08-06 Vladimir Makarov ++ ++ PR debug/61923 ++ * haifa-sched.c (advance_one_cycle): Fix dump. ++ (schedule_block): Don't advance cycle if we are already at the ++ beginning of the cycle. ++ ++2014-08-06 Richard Biener ++ ++ PR tree-optimization/61320 ++ * tree-ssa-loop-ivopts.c (may_be_unaligned_p): Properly ++ handle misaligned loads. ++ ++2014-08-04 Rohit ++ ++ PR target/60102 ++ * config/rs6000/rs6000.c ++ (rs6000_reg_names): Add SPE high register names. ++ (alt_reg_names): Likewise. ++ (rs6000_dwarf_register_span): For SPE high registers, replace ++ dwarf register numbers with GCC hard register numbers. ++ (rs6000_init_dwarf_reg_sizes_extra): Likewise. ++ (rs6000_dbx_register_number): For SPE high registers, return dwarf ++ register number for the corresponding GCC hard register number. ++ * config/rs6000/rs6000.h ++ (FIRST_PSEUDO_REGISTER): Update based on 32 newly added GCC hard ++ register numbers for SPE high registers. ++ (DWARF_FRAME_REGISTERS): Likewise. ++ (DWARF_REG_TO_UNWIND_COLUMN): Likewise. ++ (DWARF_FRAME_REGNUM): Likewise. ++ (FIXED_REGISTERS): Likewise. ++ (CALL_USED_REGISTERS): Likewise. ++ (CALL_REALLY_USED_REGISTERS): Likewise. ++ (REG_ALLOC_ORDER): Likewise. ++ (enum reg_class): Likewise. ++ (REG_CLASS_NAMES): Likewise. ++ (REG_CLASS_CONTENTS): Likewise. ++ (SPE_HIGH_REGNO_P): New macro to identify SPE high registers. ++ +2014-08-01 Vladimir Makarov + + * lra-constraints.c (remove_inheritance_pseudos): Process @@ -1449,7 +2929,7 @@ Index: gcc/ChangeLog 2014-07-16 Release Manager * GCC 4.9.1 released. -@@ -4,14 +216,14 @@ +@@ -4,14 +260,14 @@ 2014-07-10 Cary Coutant @@ -1468,7 +2948,7 @@ Index: gcc/ChangeLog 2014-07-10 Tom G. Christensen -@@ -33,13 +245,13 @@ +@@ -33,13 +289,13 @@ PR target/61062 * config/arm/arm_neon.h (vtrn_s8, vtrn_s16, vtrn_u8, vtrn_u16, vtrn_p8, vtrn_p16, vtrn_s32, vtrn_f32, vtrn_u32, vtrnq_s8, vtrnq_s16, vtrnq_s32, @@ -1489,7 +2969,7 @@ Index: gcc/ChangeLog 2014-07-09 Alan Lawrence -@@ -157,11 +369,9 @@ +@@ -157,11 +413,9 @@ 2014-06-24 Jakub Jelinek * gimplify.c (gimplify_scan_omp_clauses) @@ -1515,7 +2995,7 @@ Index: gcc/ChangeLog (struct gimplify_adjust_omp_clauses_data): New type. (gimplify_adjust_omp_clauses_1): Adjust for data being a struct gimplify_adjust_omp_clauses_data pointer instead -@@ -196,14 +405,12 @@ +@@ -196,14 +449,12 @@ gimple_seq * argument to omp_finish_clause hook. * omp-low.c (scan_sharing_clauses): Call scan_omp_op on non-DECL_P OMP_CLAUSE_DECL if ctx->outer. @@ -1533,7 +3013,7 @@ Index: gcc/ChangeLog 2014-06-10 Jakub Jelinek -@@ -227,8 +434,7 @@ +@@ -227,8 +478,7 @@ OMP_CLAUSE_LINEAR_STMT. * omp-low.c (lower_rec_input_clauses): Fix typo. (maybe_add_implicit_barrier_cancel, lower_omp_1): Add @@ -1543,7 +3023,7 @@ Index: gcc/ChangeLog 2014-06-30 Jason Merrill -@@ -279,8 +485,7 @@ +@@ -279,8 +529,7 @@ (aarch64_sqdmlsl_lane): Likewise. (aarch64_sqdmull_lane): Likewise. (aarch64_sqdmull2_lane): Likewise. @@ -1586,6 +3066,22 @@ Index: gcc/testsuite/gcc.target/powerpc/ppc64-abi-warn-2.c +{ +} + +Index: gcc/testsuite/gcc.target/powerpc/pr60102.c +=================================================================== +--- a/src/gcc/testsuite/gcc.target/powerpc/pr60102.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.target/powerpc/pr60102.c (.../branches/gcc-4_9-branch) +@@ -0,0 +1,11 @@ ++/* { dg-do compile } */ ++/* { dg-skip-if "not an SPE target" { ! powerpc_spe_nocache } { "*" } { "" } } */ ++/* { dg-options "-mcpu=8548 -mspe -mabi=spe -g -mfloat-gprs=double" } */ ++ ++double ++pr60102 (double x, int m) ++{ ++ double y; ++ y = m % 2 ? x : 1; ++ return y; ++} Index: gcc/testsuite/gcc.target/powerpc/ppc64-abi-warn-3.c =================================================================== --- a/src/gcc/testsuite/gcc.target/powerpc/ppc64-abi-warn-3.c (.../tags/gcc_4_9_1_release) @@ -1600,6 +3096,95 @@ Index: gcc/testsuite/gcc.target/powerpc/ppc64-abi-warn-3.c + int a __attribute__((vector_size (8))); + }; /* { dg-message "note: the layout of aggregates containing vectors with 8-byte alignment will change" } */ + +Index: gcc/testsuite/gcc.target/i386/avx512f-vpermt2pd-2.c +=================================================================== +--- a/src/gcc/testsuite/gcc.target/i386/avx512f-vpermt2pd-2.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.target/i386/avx512f-vpermt2pd-2.c (.../branches/gcc-4_9-branch) +@@ -9,7 +9,6 @@ + #define SIZE (AVX512F_LEN / 64) + #include "avx512f-mask-type.h" + #include "math.h" +-#include "values.h" + + static void + CALC (double *dst, double *src1, long long *ind, double *src2) +Index: gcc/testsuite/gcc.target/i386/avx512f-vpermt2d-2.c +=================================================================== +--- a/src/gcc/testsuite/gcc.target/i386/avx512f-vpermt2d-2.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.target/i386/avx512f-vpermt2d-2.c (.../branches/gcc-4_9-branch) +@@ -9,7 +9,6 @@ + #define SIZE (AVX512F_LEN / 32) + #include "avx512f-mask-type.h" + #include "math.h" +-#include "values.h" + + static void + CALC (int *dst, int *src1, int *ind, int *src2) +Index: gcc/testsuite/gcc.target/i386/avx512f-vpermi2q-2.c +=================================================================== +--- a/src/gcc/testsuite/gcc.target/i386/avx512f-vpermi2q-2.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.target/i386/avx512f-vpermi2q-2.c (.../branches/gcc-4_9-branch) +@@ -9,7 +9,6 @@ + #define SIZE (AVX512F_LEN / 64) + #include "avx512f-mask-type.h" + #include "math.h" +-#include "values.h" + + static void + CALC (long long *dst, long long *src1, long long *ind, long long *src2) +Index: gcc/testsuite/gcc.target/i386/pr61923.c +=================================================================== +--- a/src/gcc/testsuite/gcc.target/i386/pr61923.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.target/i386/pr61923.c (.../branches/gcc-4_9-branch) +@@ -0,0 +1,36 @@ ++/* PR debug/61923 */ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fcompare-debug" } */ ++ ++typedef struct ++{ ++ struct ++ { ++ struct ++ { ++ char head; ++ } tickets; ++ }; ++} arch_spinlock_t; ++struct ext4_map_blocks ++{ ++ int m_lblk; ++ int m_len; ++ int m_flags; ++}; ++int ext4_da_map_blocks_ei_0; ++void fn1 (int p1, struct ext4_map_blocks *p2) ++{ ++ int ret; ++ if (p2->m_flags) ++ { ++ ext4_da_map_blocks_ei_0++; ++ arch_spinlock_t *lock; ++ switch (sizeof *&lock->tickets.head) ++ case 1: ++ asm("" : "+m"(*&lock->tickets.head) : ""(0)); ++ __asm__(""); ++ ret = 0; ++ } ++ fn2 (p2->m_lblk, p2->m_len); ++} +Index: gcc/testsuite/gcc.target/i386/avx512f-vpermi2d-2.c +=================================================================== +--- a/src/gcc/testsuite/gcc.target/i386/avx512f-vpermi2d-2.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.target/i386/avx512f-vpermi2d-2.c (.../branches/gcc-4_9-branch) +@@ -9,7 +9,6 @@ + #define SIZE (AVX512F_LEN / 32) + #include "avx512f-mask-type.h" + #include "math.h" +-#include "values.h" + + static void + CALC (int *dst, int *src1, int *ind, int *src2) Index: gcc/testsuite/gcc.target/i386/pr61855.c =================================================================== --- a/src/gcc/testsuite/gcc.target/i386/pr61855.c (.../tags/gcc_4_9_1_release) @@ -1615,33 +3200,108 @@ Index: gcc/testsuite/gcc.target/i386/pr61855.c + return _mm512_getmant_ps(x, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_zero); +} + +Index: gcc/testsuite/gcc.target/i386/avx512f-vfixupimmss-2.c +=================================================================== +--- a/src/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmss-2.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmss-2.c (.../branches/gcc-4_9-branch) +@@ -6,7 +6,7 @@ + #include "avx512f-check.h" + #include "avx512f-helper.h" + #include +-#include ++#include + #include "avx512f-mask-type.h" + + void +@@ -57,10 +57,10 @@ + *r = M_PI_2; + break; + case 14: +- *r = MAXFLOAT; ++ *r = FLT_MAX; + break; + case 15: +- *r = -MAXFLOAT; ++ *r = -FLT_MAX; + break; + default: + abort (); Index: gcc/testsuite/gcc.target/i386/pr61801.c =================================================================== --- a/src/gcc/testsuite/gcc.target/i386/pr61801.c (.../tags/gcc_4_9_1_release) +++ b/src/gcc/testsuite/gcc.target/i386/pr61801.c (.../branches/gcc-4_9-branch) -@@ -0,0 +1,22 @@ +@@ -0,0 +1,21 @@ ++/* PR rtl-optimization/61801 */ +/* { dg-do compile } */ +/* { dg-options "-Os -fcompare-debug" } */ + -+int a, b, c; -+void fn1 () ++int a, c; ++int bar (void); ++void baz (void); ++ ++void ++foo (void) +{ + int d; -+ if (fn2 () && !0) ++ if (bar ()) + { -+ b = ( -+ { -+ int e; -+ fn3 (); -+ switch (0) -+ default: -+ asm volatile("" : "=a"(e) : "0"(a), "i"(0)); -+ e; -+ }); -+ d = b; ++ int e; ++ baz (); ++ asm volatile ("" : "=a" (e) : "0" (a), "i" (0)); ++ d = e; + } + c = d; +} +Index: gcc/testsuite/gcc.target/i386/avx512f-vfixupimmsd-2.c +=================================================================== +--- a/src/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmsd-2.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmsd-2.c (.../branches/gcc-4_9-branch) +@@ -6,7 +6,7 @@ + #include "avx512f-check.h" + #include "avx512f-helper.h" + #include +-#include ++#include + #include "avx512f-mask-type.h" + + void +@@ -57,10 +57,10 @@ + *r = M_PI_2; + break; + case 14: +- *r = MAXDOUBLE; ++ *r = DBL_MAX; + break; + case 15: +- *r = -MAXDOUBLE; ++ *r = -DBL_MAX; + break; + default: + abort (); +Index: gcc/testsuite/gcc.target/i386/avx512f-vpermi2ps-2.c +=================================================================== +--- a/src/gcc/testsuite/gcc.target/i386/avx512f-vpermi2ps-2.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.target/i386/avx512f-vpermi2ps-2.c (.../branches/gcc-4_9-branch) +@@ -9,7 +9,6 @@ + #define SIZE (AVX512F_LEN / 32) + #include "avx512f-mask-type.h" + #include "math.h" +-#include "values.h" + + static void + CALC (float *dst, float *src1, int *ind, float *src2) +Index: gcc/testsuite/gcc.target/i386/avx512f-vpermi2pd-2.c +=================================================================== +--- a/src/gcc/testsuite/gcc.target/i386/avx512f-vpermi2pd-2.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.target/i386/avx512f-vpermi2pd-2.c (.../branches/gcc-4_9-branch) +@@ -9,7 +9,6 @@ + #define SIZE (AVX512F_LEN / 64) + #include "avx512f-mask-type.h" + #include "math.h" +-#include "values.h" + + static void + CALC (double *dst, double *src1, long long *ind, double *src2) Index: gcc/testsuite/gcc.target/i386/pr61794.c =================================================================== --- a/src/gcc/testsuite/gcc.target/i386/pr61794.c (.../tags/gcc_4_9_1_release) @@ -1659,6 +3319,84 @@ Index: gcc/testsuite/gcc.target/i386/pr61794.c +{ + xmm = _mm512_extracti32x4_epi32 (zmm, 0); +} +Index: gcc/testsuite/gcc.target/i386/avx512f-vfixupimmps-2.c +=================================================================== +--- a/src/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmps-2.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmps-2.c (.../branches/gcc-4_9-branch) +@@ -10,7 +10,7 @@ + #define SIZE (AVX512F_LEN / 32) + #include "avx512f-mask-type.h" + #include "math.h" +-#include "values.h" ++#include "float.h" + + static void + CALC (float *r, float src, int tbl) +@@ -60,10 +60,10 @@ + *r = M_PI_2; + break; + case 14: +- *r = MAXFLOAT; ++ *r = FLT_MAX; + break; + case 15: +- *r = -MAXFLOAT; ++ *r = -FLT_MAX; + break; + default: + abort (); +Index: gcc/testsuite/gcc.target/i386/avx512f-vpermt2q-2.c +=================================================================== +--- a/src/gcc/testsuite/gcc.target/i386/avx512f-vpermt2q-2.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.target/i386/avx512f-vpermt2q-2.c (.../branches/gcc-4_9-branch) +@@ -9,7 +9,6 @@ + #define SIZE (AVX512F_LEN / 64) + #include "avx512f-mask-type.h" + #include "math.h" +-#include "values.h" + + static void + CALC (long long *dst, long long *src1, long long *ind, long long *src2) +Index: gcc/testsuite/gcc.target/i386/avx512f-vfixupimmpd-2.c +=================================================================== +--- a/src/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmpd-2.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.target/i386/avx512f-vfixupimmpd-2.c (.../branches/gcc-4_9-branch) +@@ -10,8 +10,9 @@ + #define SIZE (AVX512F_LEN / 64) + #include "avx512f-mask-type.h" + #include "math.h" +-#include "values.h" ++#include "float.h" + ++ + static void + CALC (double *r, double src, long long tbl) + { +@@ -60,10 +61,10 @@ + *r = M_PI_2; + break; + case 14: +- *r = MAXDOUBLE; ++ *r = DBL_MAX; + break; + case 15: +- *r = -MAXDOUBLE; ++ *r = -DBL_MAX; + break; + default: + abort (); +Index: gcc/testsuite/gcc.target/i386/avx512f-vpermt2ps-2.c +=================================================================== +--- a/src/gcc/testsuite/gcc.target/i386/avx512f-vpermt2ps-2.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/gcc.target/i386/avx512f-vpermt2ps-2.c (.../branches/gcc-4_9-branch) +@@ -9,7 +9,6 @@ + #define SIZE (AVX512F_LEN / 32) + #include "avx512f-mask-type.h" + #include "math.h" +-#include "values.h" + + static void + CALC (float *dst, float *src1, int *ind, float *src2) Index: gcc/testsuite/gfortran.dg/dependency_44.f90 =================================================================== --- a/src/gcc/testsuite/gfortran.dg/dependency_44.f90 (.../tags/gcc_4_9_1_release) @@ -1938,7 +3676,39 @@ Index: gcc/testsuite/ChangeLog =================================================================== --- a/src/gcc/testsuite/ChangeLog (.../tags/gcc_4_9_1_release) +++ b/src/gcc/testsuite/ChangeLog (.../branches/gcc-4_9-branch) -@@ -1,3 +1,129 @@ +@@ -1,3 +1,161 @@ ++2014-08-07 Petr Murzin ++ ++ * gcc.target/i386/avx512f-vfixupimmpd-2.c: Include float.h instead of ++ values.h, change MAXDOUBLE for DBL_MAX. ++ * gcc.target/i386/avx512f-vfixupimmsd-2.c: Ditto. ++ * gcc.target/i386/avx512f-vfixupimmps-2.c: Include float.h instead of ++ values.h, change MAXFLOAT for FLT_MAX. ++ * gcc.target/i386/avx512f-vfixupimmss-2.c: Ditto. ++ * gcc.target/i386/avx512f-vpermi2d-2.c: Do not include values.h. ++ * gcc.target/i386/avx512f-vpermi2pd-2.c: Ditto. ++ * gcc.target/i386/avx512f-vpermi2ps-2.c: Ditto. ++ * gcc.target/i386/avx512f-vpermi2q-2.c: Ditto. ++ * gcc.target/i386/avx512f-vpermt2d-2.c: Ditto. ++ * gcc.target/i386/avx512f-vpermt2pd-2.c: Ditto. ++ * gcc.target/i386/avx512f-vpermt2ps-2.c: Ditto. ++ * gcc.target/i386/avx512f-vpermt2q-2.c: Ditto. ++ ++2014-08-06 Vladimir Makarov ++ ++ PR debug/61923 ++ * gcc.target/i386/pr61923.c: New test. ++ ++2014-08-06 Jakub Jelinek ++ ++ PR rtl-optimization/61801 ++ * gcc.target/i386/pr61801.c: Rewritten. ++ ++2014-08-04 Rohit ++ ++ PR target/60102 ++ * gcc.target/powerpc/pr60102.c: New testcase. ++ +2014-08-01 Igor Zamyatin + + PR other/61963 @@ -2068,7 +3838,7 @@ Index: gcc/testsuite/ChangeLog 2014-07-16 Release Manager * GCC 4.9.1 released. -@@ -17,7 +143,8 @@ +@@ -17,7 +175,8 @@ 2014-06-09 Alan Lawrence PR target/61062 @@ -2078,7 +3848,7 @@ Index: gcc/testsuite/ChangeLog 2014-07-08 Jakub Jelinek -@@ -34,8 +161,8 @@ +@@ -34,8 +193,8 @@ 2014-07-08 Alan Lawrence @@ -2296,6 +4066,63 @@ Index: gcc/cp/parser.c { if (cp_lexer_peek_token (parser->lexer)->type == CPP_CLOSE_SQUARE) cp_lexer_consume_token (parser->lexer); +Index: gcc/haifa-sched.c +=================================================================== +--- a/src/gcc/haifa-sched.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/haifa-sched.c (.../branches/gcc-4_9-branch) +@@ -2972,7 +2972,7 @@ + { + advance_state (curr_state); + if (sched_verbose >= 6) +- fprintf (sched_dump, ";;\tAdvanced a state.\n"); ++ fprintf (sched_dump, ";;\tAdvance the current state.\n"); + } + + /* Update register pressure after scheduling INSN. */ +@@ -6007,6 +6007,7 @@ + modulo_insns_scheduled = 0; + + ls.modulo_epilogue = false; ++ ls.first_cycle_insn_p = true; + + /* Loop until all the insns in BB are scheduled. */ + while ((*current_sched_info->schedule_more_p) ()) +@@ -6077,7 +6078,6 @@ + if (must_backtrack) + goto do_backtrack; + +- ls.first_cycle_insn_p = true; + ls.shadows_only_p = false; + cycle_issued_insns = 0; + ls.can_issue_more = issue_rate; +@@ -6363,11 +6363,13 @@ + break; + } + } ++ ls.first_cycle_insn_p = true; + } + if (ls.modulo_epilogue) + success = true; + end_schedule: +- advance_one_cycle (); ++ if (!ls.first_cycle_insn_p) ++ advance_one_cycle (); + perform_replacements_new_cycle (); + if (modulo_ii > 0) + { +Index: gcc/tree-ssa-loop-ivopts.c +=================================================================== +--- a/src/gcc/tree-ssa-loop-ivopts.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/tree-ssa-loop-ivopts.c (.../branches/gcc-4_9-branch) +@@ -1679,6 +1679,8 @@ + return false; + + unsigned int align = TYPE_ALIGN (TREE_TYPE (ref)); ++ if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align) ++ align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))); + + unsigned HOST_WIDE_INT bitpos; + unsigned int ref_align; Index: gcc/tree-ssa-math-opts.c =================================================================== --- a/src/gcc/tree-ssa-math-opts.c (.../tags/gcc_4_9_1_release) @@ -2340,6 +4167,22 @@ Index: gcc/expr.c mode = smallest_mode_for_size (size * BITS_PER_UNIT, MODE_INT); temp_target = gen_reg_rtx (mode); emit_group_store (temp_target, temp, TREE_TYPE (exp), size); +Index: gcc/go/gofrontend/parse.cc +=================================================================== +--- a/src/gcc/go/gofrontend/parse.cc (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/go/gofrontend/parse.cc (.../branches/gcc-4_9-branch) +@@ -2865,7 +2865,10 @@ + // For a function literal, the next token must be a '{'. If we + // don't see that, then we may have a type expression. + if (!this->peek_token()->is_op(OPERATOR_LCURLY)) +- return Expression::make_type(type, location); ++ { ++ hold_enclosing_vars.swap(this->enclosing_vars_); ++ return Expression::make_type(type, location); ++ } + + bool hold_is_erroneous_function = this->is_erroneous_function_; + if (fntype_is_error) Index: gcc/fortran/ChangeLog =================================================================== --- a/src/gcc/fortran/ChangeLog (.../tags/gcc_4_9_1_release) @@ -2639,6 +4482,18 @@ Index: gcc/config/i386/sse.md { operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2); return "vextract32x4\t{%2, %1, %0|%0, %1, %2}"; +@@ -5992,9 +5994,9 @@ + (set_attr "mode" "")]) + + (define_insn "vec_extract_lo_" +- [(set (match_operand: 0 "" "=") ++ [(set (match_operand: 0 "" "=,v") + (vec_select: +- (match_operand:V8FI 1 "nonimmediate_operand" "vm") ++ (match_operand:V8FI 1 "nonimmediate_operand" "v,m") + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3)])))] + "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))" Index: gcc/config/i386/avx512fintrin.h =================================================================== --- a/src/gcc/config/i386/avx512fintrin.h (.../tags/gcc_4_9_1_release) @@ -3600,7 +5455,35 @@ Index: gcc/config/rs6000/rs6000.c =================================================================== --- a/src/gcc/config/rs6000/rs6000.c (.../tags/gcc_4_9_1_release) +++ b/src/gcc/config/rs6000/rs6000.c (.../branches/gcc-4_9-branch) -@@ -5871,6 +5871,34 @@ +@@ -1221,7 +1221,12 @@ + /* Soft frame pointer. */ + "sfp", + /* HTM SPR registers. */ +- "tfhar", "tfiar", "texasr" ++ "tfhar", "tfiar", "texasr", ++ /* SPE High registers. */ ++ "0", "1", "2", "3", "4", "5", "6", "7", ++ "8", "9", "10", "11", "12", "13", "14", "15", ++ "16", "17", "18", "19", "20", "21", "22", "23", ++ "24", "25", "26", "27", "28", "29", "30", "31" + }; + + #ifdef TARGET_REGNAMES +@@ -1249,7 +1254,12 @@ + /* Soft frame pointer. */ + "sfp", + /* HTM SPR registers. */ +- "tfhar", "tfiar", "texasr" ++ "tfhar", "tfiar", "texasr", ++ /* SPE High registers. */ ++ "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7", ++ "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15", ++ "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23", ++ "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31" + }; + #endif + +@@ -5871,6 +5881,34 @@ return align; } @@ -3635,7 +5518,7 @@ Index: gcc/config/rs6000/rs6000.c /* AIX increases natural record alignment to doubleword if the first field is an FP double while the FP fields remain word aligned. */ -@@ -9180,14 +9208,51 @@ +@@ -9180,14 +9218,51 @@ || (type && TREE_CODE (type) == VECTOR_TYPE && int_size_in_bytes (type) >= 16)) return 128; @@ -3694,7 +5577,7 @@ Index: gcc/config/rs6000/rs6000.c } /* The offset in words to the start of the parameter save area. */ -@@ -10225,6 +10290,7 @@ +@@ -10225,6 +10300,7 @@ rtx r, off; int i, k = 0; unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3; @@ -3702,7 +5585,7 @@ Index: gcc/config/rs6000/rs6000.c /* Do we also need to pass this argument in the parameter save area? */ -@@ -10253,6 +10319,37 @@ +@@ -10253,6 +10329,37 @@ rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off); } @@ -3740,6 +5623,331 @@ Index: gcc/config/rs6000/rs6000.c return rs6000_finish_function_arg (mode, rvec, k); } else if (align_words < GP_ARG_NUM_REG) +@@ -31074,13 +31181,13 @@ + { + if (BYTES_BIG_ENDIAN) + { +- parts[2 * i] = gen_rtx_REG (SImode, regno + 1200); ++ parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO); + parts[2 * i + 1] = gen_rtx_REG (SImode, regno); + } + else + { + parts[2 * i] = gen_rtx_REG (SImode, regno); +- parts[2 * i + 1] = gen_rtx_REG (SImode, regno + 1200); ++ parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO); + } + } + +@@ -31100,11 +31207,11 @@ + rtx mem = gen_rtx_MEM (BLKmode, addr); + rtx value = gen_int_mode (4, mode); + +- for (i = 1201; i < 1232; i++) ++ for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++) + { +- int column = DWARF_REG_TO_UNWIND_COLUMN (i); +- HOST_WIDE_INT offset +- = DWARF_FRAME_REGNUM (column) * GET_MODE_SIZE (mode); ++ int column = DWARF_REG_TO_UNWIND_COLUMN ++ (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true)); ++ HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode); + + emit_move_insn (adjust_address (mem, mode, offset), value); + } +@@ -31123,9 +31230,9 @@ + + for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++) + { +- int column = DWARF_REG_TO_UNWIND_COLUMN (i); +- HOST_WIDE_INT offset +- = DWARF_FRAME_REGNUM (column) * GET_MODE_SIZE (mode); ++ int column = DWARF_REG_TO_UNWIND_COLUMN ++ (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true)); ++ HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode); + + emit_move_insn (adjust_address (mem, mode, offset), value); + } +@@ -31157,9 +31264,8 @@ + return 99; + if (regno == SPEFSCR_REGNO) + return 612; +- /* SPE high reg number. We get these values of regno from +- rs6000_dwarf_register_span. */ +- gcc_assert (regno >= 1200 && regno < 1232); ++ if (SPE_HIGH_REGNO_P (regno)) ++ return regno - FIRST_SPE_HIGH_REGNO + 1200; + return regno; + } + +Index: gcc/config/rs6000/rs6000.h +=================================================================== +--- a/src/gcc/config/rs6000/rs6000.h (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/rs6000/rs6000.h (.../branches/gcc-4_9-branch) +@@ -930,35 +930,36 @@ + + The 3 HTM registers aren't also included in DWARF_FRAME_REGISTERS. */ + +-#define FIRST_PSEUDO_REGISTER 117 ++#define FIRST_PSEUDO_REGISTER 149 + + /* This must be included for pre gcc 3.0 glibc compatibility. */ + #define PRE_GCC3_DWARF_FRAME_REGISTERS 77 + +-/* Add 32 dwarf columns for synthetic SPE registers. */ +-#define DWARF_FRAME_REGISTERS ((FIRST_PSEUDO_REGISTER - 4) + 32) ++/* True if register is an SPE High register. */ ++#define SPE_HIGH_REGNO_P(N) \ ++ ((N) >= FIRST_SPE_HIGH_REGNO && (N) <= LAST_SPE_HIGH_REGNO) + ++/* SPE high registers added as hard regs. ++ The sfp register and 3 HTM registers ++ aren't included in DWARF_FRAME_REGISTERS. */ ++#define DWARF_FRAME_REGISTERS (FIRST_PSEUDO_REGISTER - 4) ++ + /* The SPE has an additional 32 synthetic registers, with DWARF debug + info numbering for these registers starting at 1200. While eh_frame + register numbering need not be the same as the debug info numbering, +- we choose to number these regs for eh_frame at 1200 too. This allows +- future versions of the rs6000 backend to add hard registers and +- continue to use the gcc hard register numbering for eh_frame. If the +- extra SPE registers in eh_frame were numbered starting from the +- current value of FIRST_PSEUDO_REGISTER, then if FIRST_PSEUDO_REGISTER +- changed we'd need to introduce a mapping in DWARF_FRAME_REGNUM to +- avoid invalidating older SPE eh_frame info. ++ we choose to number these regs for eh_frame at 1200 too. + + We must map them here to avoid huge unwinder tables mostly consisting + of unused space. */ + #define DWARF_REG_TO_UNWIND_COLUMN(r) \ +- ((r) > 1200 ? ((r) - 1200 + (DWARF_FRAME_REGISTERS - 32)) : (r)) ++ ((r) >= 1200 ? ((r) - 1200 + (DWARF_FRAME_REGISTERS - 32)) : (r)) + + /* Use standard DWARF numbering for DWARF debugging information. */ + #define DBX_REGISTER_NUMBER(REGNO) rs6000_dbx_register_number (REGNO) + + /* Use gcc hard register numbering for eh_frame. */ +-#define DWARF_FRAME_REGNUM(REGNO) (REGNO) ++#define DWARF_FRAME_REGNUM(REGNO) \ ++ (SPE_HIGH_REGNO_P (REGNO) ? ((REGNO) - FIRST_SPE_HIGH_REGNO + 1200) : (REGNO)) + + /* Map register numbers held in the call frame info that gcc has + collected using DWARF_FRAME_REGNUM to those that should be output in +@@ -992,7 +993,10 @@ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1 \ +- , 1, 1, 1, 1, 1, 1 \ ++ , 1, 1, 1, 1, 1, 1, \ ++ /* SPE High registers. */ \ ++ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ ++ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 \ + } + + /* 1 for registers not available across function calls. +@@ -1012,7 +1016,10 @@ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 1, 1 \ +- , 1, 1, 1, 1, 1, 1 \ ++ , 1, 1, 1, 1, 1, 1, \ ++ /* SPE High registers. */ \ ++ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \ ++ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 \ + } + + /* Like `CALL_USED_REGISTERS' except this macro doesn't require that +@@ -1031,7 +1038,10 @@ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + 0, 0 \ +- , 0, 0, 0, 0, 0, 0 \ ++ , 0, 0, 0, 0, 0, 0, \ ++ /* SPE High registers. */ \ ++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ ++ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 \ + } + + #define TOTAL_ALTIVEC_REGS (LAST_ALTIVEC_REGNO - FIRST_ALTIVEC_REGNO + 1) +@@ -1114,7 +1124,10 @@ + 96, 95, 94, 93, 92, 91, \ + 108, 107, 106, 105, 104, 103, 102, 101, 100, 99, 98, 97, \ + 109, 110, \ +- 111, 112, 113, 114, 115, 116 \ ++ 111, 112, 113, 114, 115, 116, \ ++ 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, \ ++ 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, \ ++ 141, 142, 143, 144, 145, 146, 147, 148 \ + } + + /* True if register is floating-point. */ +@@ -1349,6 +1362,7 @@ + CR_REGS, + NON_FLOAT_REGS, + CA_REGS, ++ SPE_HIGH_REGS, + ALL_REGS, + LIM_REG_CLASSES + }; +@@ -1380,6 +1394,7 @@ + "CR_REGS", \ + "NON_FLOAT_REGS", \ + "CA_REGS", \ ++ "SPE_HIGH_REGS", \ + "ALL_REGS" \ + } + +@@ -1387,30 +1402,54 @@ + This is an initializer for a vector of HARD_REG_SET + of length N_REG_CLASSES. */ + +-#define REG_CLASS_CONTENTS \ +-{ \ +- { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \ +- { 0xfffffffe, 0x00000000, 0x00000008, 0x00020000 }, /* BASE_REGS */ \ +- { 0xffffffff, 0x00000000, 0x00000008, 0x00020000 }, /* GENERAL_REGS */ \ +- { 0x00000000, 0xffffffff, 0x00000000, 0x00000000 }, /* FLOAT_REGS */ \ +- { 0x00000000, 0x00000000, 0xffffe000, 0x00001fff }, /* ALTIVEC_REGS */ \ +- { 0x00000000, 0xffffffff, 0xffffe000, 0x00001fff }, /* VSX_REGS */ \ +- { 0x00000000, 0x00000000, 0x00000000, 0x00002000 }, /* VRSAVE_REGS */ \ +- { 0x00000000, 0x00000000, 0x00000000, 0x00004000 }, /* VSCR_REGS */ \ +- { 0x00000000, 0x00000000, 0x00000000, 0x00008000 }, /* SPE_ACC_REGS */ \ +- { 0x00000000, 0x00000000, 0x00000000, 0x00010000 }, /* SPEFSCR_REGS */ \ +- { 0x00000000, 0x00000000, 0x00000000, 0x00040000 }, /* SPR_REGS */ \ +- { 0xffffffff, 0xffffffff, 0x00000008, 0x00020000 }, /* NON_SPECIAL_REGS */ \ +- { 0x00000000, 0x00000000, 0x00000002, 0x00000000 }, /* LINK_REGS */ \ +- { 0x00000000, 0x00000000, 0x00000004, 0x00000000 }, /* CTR_REGS */ \ +- { 0x00000000, 0x00000000, 0x00000006, 0x00000000 }, /* LINK_OR_CTR_REGS */ \ +- { 0x00000000, 0x00000000, 0x00000006, 0x00002000 }, /* SPECIAL_REGS */ \ +- { 0xffffffff, 0x00000000, 0x0000000e, 0x00022000 }, /* SPEC_OR_GEN_REGS */ \ +- { 0x00000000, 0x00000000, 0x00000010, 0x00000000 }, /* CR0_REGS */ \ +- { 0x00000000, 0x00000000, 0x00000ff0, 0x00000000 }, /* CR_REGS */ \ +- { 0xffffffff, 0x00000000, 0x00000ffe, 0x00020000 }, /* NON_FLOAT_REGS */ \ +- { 0x00000000, 0x00000000, 0x00001000, 0x00000000 }, /* CA_REGS */ \ +- { 0xffffffff, 0xffffffff, 0xfffffffe, 0x0007ffff } /* ALL_REGS */ \ ++#define REG_CLASS_CONTENTS \ ++{ \ ++ /* NO_REGS. */ \ ++ { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, \ ++ /* BASE_REGS. */ \ ++ { 0xfffffffe, 0x00000000, 0x00000008, 0x00020000, 0x00000000 }, \ ++ /* GENERAL_REGS. */ \ ++ { 0xffffffff, 0x00000000, 0x00000008, 0x00020000, 0x00000000 }, \ ++ /* FLOAT_REGS. */ \ ++ { 0x00000000, 0xffffffff, 0x00000000, 0x00000000, 0x00000000 }, \ ++ /* ALTIVEC_REGS. */ \ ++ { 0x00000000, 0x00000000, 0xffffe000, 0x00001fff, 0x00000000 }, \ ++ /* VSX_REGS. */ \ ++ { 0x00000000, 0xffffffff, 0xffffe000, 0x00001fff, 0x00000000 }, \ ++ /* VRSAVE_REGS. */ \ ++ { 0x00000000, 0x00000000, 0x00000000, 0x00002000, 0x00000000 }, \ ++ /* VSCR_REGS. */ \ ++ { 0x00000000, 0x00000000, 0x00000000, 0x00004000, 0x00000000 }, \ ++ /* SPE_ACC_REGS. */ \ ++ { 0x00000000, 0x00000000, 0x00000000, 0x00008000, 0x00000000 }, \ ++ /* SPEFSCR_REGS. */ \ ++ { 0x00000000, 0x00000000, 0x00000000, 0x00010000, 0x00000000 }, \ ++ /* SPR_REGS. */ \ ++ { 0x00000000, 0x00000000, 0x00000000, 0x00040000, 0x00000000 }, \ ++ /* NON_SPECIAL_REGS. */ \ ++ { 0xffffffff, 0xffffffff, 0x00000008, 0x00020000, 0x00000000 }, \ ++ /* LINK_REGS. */ \ ++ { 0x00000000, 0x00000000, 0x00000002, 0x00000000, 0x00000000 }, \ ++ /* CTR_REGS. */ \ ++ { 0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x00000000 }, \ ++ /* LINK_OR_CTR_REGS. */ \ ++ { 0x00000000, 0x00000000, 0x00000006, 0x00000000, 0x00000000 }, \ ++ /* SPECIAL_REGS. */ \ ++ { 0x00000000, 0x00000000, 0x00000006, 0x00002000, 0x00000000 }, \ ++ /* SPEC_OR_GEN_REGS. */ \ ++ { 0xffffffff, 0x00000000, 0x0000000e, 0x00022000, 0x00000000 }, \ ++ /* CR0_REGS. */ \ ++ { 0x00000000, 0x00000000, 0x00000010, 0x00000000, 0x00000000 }, \ ++ /* CR_REGS. */ \ ++ { 0x00000000, 0x00000000, 0x00000ff0, 0x00000000, 0x00000000 }, \ ++ /* NON_FLOAT_REGS. */ \ ++ { 0xffffffff, 0x00000000, 0x00000ffe, 0x00020000, 0x00000000 }, \ ++ /* CA_REGS. */ \ ++ { 0x00000000, 0x00000000, 0x00001000, 0x00000000, 0x00000000 }, \ ++ /* SPE_HIGH_REGS. */ \ ++ { 0x00000000, 0x00000000, 0x00000000, 0xffe00000, 0x001fffff }, \ ++ /* ALL_REGS. */ \ ++ { 0xffffffff, 0xffffffff, 0xfffffffe, 0xffe7ffff, 0x001fffff } \ + } + + /* The same information, inverted: +@@ -2349,6 +2388,39 @@ + &rs6000_reg_names[114][0], /* tfhar */ \ + &rs6000_reg_names[115][0], /* tfiar */ \ + &rs6000_reg_names[116][0], /* texasr */ \ ++ \ ++ &rs6000_reg_names[117][0], /* SPE rh0. */ \ ++ &rs6000_reg_names[118][0], /* SPE rh1. */ \ ++ &rs6000_reg_names[119][0], /* SPE rh2. */ \ ++ &rs6000_reg_names[120][0], /* SPE rh3. */ \ ++ &rs6000_reg_names[121][0], /* SPE rh4. */ \ ++ &rs6000_reg_names[122][0], /* SPE rh5. */ \ ++ &rs6000_reg_names[123][0], /* SPE rh6. */ \ ++ &rs6000_reg_names[124][0], /* SPE rh7. */ \ ++ &rs6000_reg_names[125][0], /* SPE rh8. */ \ ++ &rs6000_reg_names[126][0], /* SPE rh9. */ \ ++ &rs6000_reg_names[127][0], /* SPE rh10. */ \ ++ &rs6000_reg_names[128][0], /* SPE rh11. */ \ ++ &rs6000_reg_names[129][0], /* SPE rh12. */ \ ++ &rs6000_reg_names[130][0], /* SPE rh13. */ \ ++ &rs6000_reg_names[131][0], /* SPE rh14. */ \ ++ &rs6000_reg_names[132][0], /* SPE rh15. */ \ ++ &rs6000_reg_names[133][0], /* SPE rh16. */ \ ++ &rs6000_reg_names[134][0], /* SPE rh17. */ \ ++ &rs6000_reg_names[135][0], /* SPE rh18. */ \ ++ &rs6000_reg_names[136][0], /* SPE rh19. */ \ ++ &rs6000_reg_names[137][0], /* SPE rh20. */ \ ++ &rs6000_reg_names[138][0], /* SPE rh21. */ \ ++ &rs6000_reg_names[139][0], /* SPE rh22. */ \ ++ &rs6000_reg_names[140][0], /* SPE rh22. */ \ ++ &rs6000_reg_names[141][0], /* SPE rh24. */ \ ++ &rs6000_reg_names[142][0], /* SPE rh25. */ \ ++ &rs6000_reg_names[143][0], /* SPE rh26. */ \ ++ &rs6000_reg_names[144][0], /* SPE rh27. */ \ ++ &rs6000_reg_names[145][0], /* SPE rh28. */ \ ++ &rs6000_reg_names[146][0], /* SPE rh29. */ \ ++ &rs6000_reg_names[147][0], /* SPE rh30. */ \ ++ &rs6000_reg_names[148][0], /* SPE rh31. */ \ + } + + /* Table of additional register names to use in user input. */ +@@ -2404,7 +2476,17 @@ + {"vs56", 101},{"vs57", 102},{"vs58", 103},{"vs59", 104}, \ + {"vs60", 105},{"vs61", 106},{"vs62", 107},{"vs63", 108}, \ + /* Transactional Memory Facility (HTM) Registers. */ \ +- {"tfhar", 114}, {"tfiar", 115}, {"texasr", 116} } ++ {"tfhar", 114}, {"tfiar", 115}, {"texasr", 116}, \ ++ /* SPE high registers. */ \ ++ {"rh0", 117}, {"rh1", 118}, {"rh2", 119}, {"rh3", 120}, \ ++ {"rh4", 121}, {"rh5", 122}, {"rh6", 123}, {"rh7", 124}, \ ++ {"rh8", 125}, {"rh9", 126}, {"rh10", 127}, {"rh11", 128}, \ ++ {"rh12", 129}, {"rh13", 130}, {"rh14", 131}, {"rh15", 132}, \ ++ {"rh16", 133}, {"rh17", 134}, {"rh18", 135}, {"rh19", 136}, \ ++ {"rh20", 137}, {"rh21", 138}, {"rh22", 139}, {"rh23", 140}, \ ++ {"rh24", 141}, {"rh25", 142}, {"rh26", 143}, {"rh27", 144}, \ ++ {"rh28", 145}, {"rh29", 146}, {"rh30", 147}, {"rh31", 148}, \ ++} + + /* This is how to output an element of a case-vector that is relative. */ + +Index: gcc/config/rs6000/rs6000.md +=================================================================== +--- a/src/gcc/config/rs6000/rs6000.md (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/config/rs6000/rs6000.md (.../branches/gcc-4_9-branch) +@@ -56,6 +56,8 @@ + (TFHAR_REGNO 114) + (TFIAR_REGNO 115) + (TEXASR_REGNO 116) ++ (FIRST_SPE_HIGH_REGNO 117) ++ (LAST_SPE_HIGH_REGNO 148) + ]) + + ;; Index: gcc/config/rs6000/sysv4.h =================================================================== --- a/src/gcc/config/rs6000/sysv4.h (.../tags/gcc_4_9_1_release) diff --git a/debian/rules.patch b/debian/rules.patch index 0f0cbaa..0c0e977 100644 --- a/debian/rules.patch +++ b/debian/rules.patch @@ -87,10 +87,6 @@ debian_patches += \ # FIXME: only needed for isl-0.13 for now # isl-0.13-compat \ -ifeq (,$(filter $(distrelease),wheezy squeeze dapper hardy lucid maverick natty oneiric precise quantal raring)) - debian_patches += libstdc++-python3 -endif - # $(if $(filter yes, $(DEB_CROSS)),,gcc-print-file-name) \ # libstdc++-nothumb-check \ # TODO: update ... -- cgit v1.2.3 From a620601f16036f006a8dd21ad43052404565a1fd Mon Sep 17 00:00:00 2001 From: doko Date: Fri, 8 Aug 2014 15:32:40 +0000 Subject: * Update to SVN 20140808 (r213759) from the gcc-4_9-branch. git-svn-id: svn://svn.debian.org/svn/gcccvs/branches/sid/gcc-4.9@7560 6ca36cf4-e1d1-0310-8c6f-e303bb2178ca --- debian/changelog | 4 +- debian/patches/svn-updates.diff | 293 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 287 insertions(+), 10 deletions(-) (limited to 'debian') diff --git a/debian/changelog b/debian/changelog index 4b087d4..2af9404 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,10 +1,10 @@ gcc-4.9 (4.9.1-5) UNRELEASED; urgency=medium - * Update to SVN 20140807 (r213709) from the gcc-4_9-branch. + * Update to SVN 20140808 (r213759) from the gcc-4_9-branch. - Fix PR tree-optimization/61964. LP: #1347147. * Fix libphobos cross build. - -- Matthias Klose Thu, 07 Aug 2014 15:19:59 +0200 + -- Matthias Klose Fri, 08 Aug 2014 17:28:55 +0200 gcc-4.9 (4.9.1-4) unstable; urgency=high diff --git a/debian/patches/svn-updates.diff b/debian/patches/svn-updates.diff index 67a29a1..0416086 100644 --- a/debian/patches/svn-updates.diff +++ b/debian/patches/svn-updates.diff @@ -1,10 +1,10 @@ -# DP: updates from the 4.9 branch upto 20140807 (r213709). +# DP: updates from the 4.9 branch upto 20140808 (r213759). last_update() { cat > ${dir}LAST_UPDATED ++ ++ PR tree-optimization/60707 ++ * gfortran.dg/pr45636.f90: xfail on 32-bit hppa*-*-*. ++ ++ * gcc.dg/atomic/c11-atomic-exec-4.c: Undefine _POSIX_C_SOURCE before ++ defining in dg-options. ++ * gcc.dg/atomic/c11-atomic-exec-5.c: Likewise. ++ ++ * gcc.dg/atomic/stdatomic-flag.c: Add xfail comment. ++ ++ * gcc.c-torture/compile/pr60655-1.c: Don't add -fdata-sections option ++ on 32-bit hppa-hpux. ++ ++ * gcc.dg/pr57233.c: Add -fno-common option on hppa*-*-hpux*. ++ +2014-08-07 Petr Murzin + + * gcc.target/i386/avx512f-vfixupimmpd-2.c: Include float.h instead of @@ -3838,7 +3923,7 @@ Index: gcc/testsuite/ChangeLog 2014-07-16 Release Manager * GCC 4.9.1 released. -@@ -17,7 +175,8 @@ +@@ -17,7 +191,8 @@ 2014-06-09 Alan Lawrence PR target/61062 @@ -3848,7 +3933,7 @@ Index: gcc/testsuite/ChangeLog 2014-07-08 Jakub Jelinek -@@ -34,8 +193,8 @@ +@@ -34,8 +209,8 @@ 2014-07-08 Alan Lawrence @@ -3859,6 +3944,36 @@ Index: gcc/testsuite/ChangeLog PR target/59843 * gcc.dg/vect/vect-singleton_1.c: New file. +Index: gcc/testsuite/g++.dg/ext/restrict2.C +=================================================================== +--- a/src/gcc/testsuite/g++.dg/ext/restrict2.C (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/g++.dg/ext/restrict2.C (.../branches/gcc-4_9-branch) +@@ -0,0 +1,8 @@ ++// PR c++/60872 ++// { dg-options "" } ++ ++typedef double *__restrict T; ++void f(T* p) ++{ ++ void *p2 = p; ++} +Index: gcc/testsuite/g++.dg/expr/cond12.C +=================================================================== +--- a/src/gcc/testsuite/g++.dg/expr/cond12.C (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/g++.dg/expr/cond12.C (.../branches/gcc-4_9-branch) +@@ -0,0 +1,12 @@ ++// PR c++/58714 ++// { dg-do run } ++ ++struct X { ++ X& operator=(const X&){} ++ X& operator=(X&){__builtin_abort();} ++}; ++ ++int main(int argv,char**) { ++ X a, b; ++ ((argv > 2) ? a : b) = X(); ++} Index: gcc/testsuite/g++.dg/compat/struct-layout-1.exp =================================================================== --- a/src/gcc/testsuite/g++.dg/compat/struct-layout-1.exp (.../tags/gcc_4_9_1_release) @@ -3873,6 +3988,75 @@ Index: gcc/testsuite/g++.dg/compat/struct-layout-1.exp g++_init # Save variables for the C++ compiler under test, which each test will +Index: gcc/testsuite/g++.dg/cpp0x/constexpr-empty7.C +=================================================================== +--- a/src/gcc/testsuite/g++.dg/cpp0x/constexpr-empty7.C (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/g++.dg/cpp0x/constexpr-empty7.C (.../branches/gcc-4_9-branch) +@@ -0,0 +1,28 @@ ++// PR c++/61959 ++// { dg-do compile { target c++11 } } ++ ++template struct BasePoint ++{ ++ Coord x, y; ++ constexpr BasePoint (Coord, Coord) : x (0), y (0) {} ++}; ++template struct BaseCoord ++{ ++ int value; ++ constexpr BaseCoord (T) : value (1) {} ++}; ++template struct IntCoordTyped : BaseCoord, units ++{ ++ typedef BaseCoord Super; ++ constexpr IntCoordTyped (int) : Super (0) {} ++}; ++template ++struct IntPointTyped : BasePoint >, units ++{ ++ typedef BasePoint > Super; ++ constexpr IntPointTyped (int, int) : Super (0, 0) {} ++}; ++struct A ++{ ++}; ++IntPointTyped a (0, 0); +Index: gcc/testsuite/g++.dg/cpp0x/rv-cond1.C +=================================================================== +--- a/src/gcc/testsuite/g++.dg/cpp0x/rv-cond1.C (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/g++.dg/cpp0x/rv-cond1.C (.../branches/gcc-4_9-branch) +@@ -0,0 +1,13 @@ ++// PR c++/58714 ++// { dg-do compile { target c++11 } } ++ ++struct X { ++ X& operator=(const X&) = delete; ++ X& operator=(X&& ) = default; ++}; ++ ++void f(bool t) { ++ X a, b; ++ *(t ? &a : &b) = X(); ++ (t ? a : b) = X(); ++} +Index: gcc/testsuite/g++.dg/cpp0x/constexpr-array7.C +=================================================================== +--- a/src/gcc/testsuite/g++.dg/cpp0x/constexpr-array7.C (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/testsuite/g++.dg/cpp0x/constexpr-array7.C (.../branches/gcc-4_9-branch) +@@ -0,0 +1,13 @@ ++// PR c++/61994 ++// { dg-do compile { target c++11 } } ++ ++struct A { int i,j; }; ++ ++struct X { ++ A a = {1,1}; ++}; ++ ++constexpr X table[1][1] = {{ {} }}; ++ ++#define SA(X) static_assert(X,#X) ++SA(table[0][0].a.i == 1); Index: gcc/testsuite/g++.dg/ipa/pr61160-1.C =================================================================== --- a/src/gcc/testsuite/g++.dg/ipa/pr61160-1.C (.../tags/gcc_4_9_1_release) @@ -3970,11 +4154,72 @@ Index: gcc/testsuite/c-c++-common/cilk-plus/AN/pr61455.c + int a[2]; + int b = a[:]; /* { dg-error "cannot be scalar" } */ +} +Index: gcc/cp/init.c +=================================================================== +--- a/src/gcc/cp/init.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/cp/init.c (.../branches/gcc-4_9-branch) +@@ -3846,6 +3846,13 @@ + + stmt_expr = finish_init_stmts (is_global, stmt_expr, compound_stmt); + ++ current_stmt_tree ()->stmts_are_full_exprs_p = destroy_temps; ++ ++ if (errors) ++ return error_mark_node; ++ if (const_init) ++ return build2 (INIT_EXPR, atype, obase, const_init); ++ + /* Now make the result have the correct type. */ + if (TREE_CODE (atype) == ARRAY_TYPE) + { +@@ -3855,12 +3862,6 @@ + TREE_NO_WARNING (stmt_expr) = 1; + } + +- current_stmt_tree ()->stmts_are_full_exprs_p = destroy_temps; +- +- if (const_init) +- return build2 (INIT_EXPR, atype, obase, const_init); +- if (errors) +- return error_mark_node; + return stmt_expr; + } + +Index: gcc/cp/tree.c +=================================================================== +--- a/src/gcc/cp/tree.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/cp/tree.c (.../branches/gcc-4_9-branch) +@@ -3795,6 +3795,10 @@ + { + init_expr = get_target_expr (exp); + exp = TARGET_EXPR_SLOT (init_expr); ++ if (CLASS_TYPE_P (TREE_TYPE (exp))) ++ exp = move (exp); ++ else ++ exp = rvalue (exp); + } + else + { Index: gcc/cp/ChangeLog =================================================================== --- a/src/gcc/cp/ChangeLog (.../tags/gcc_4_9_1_release) +++ b/src/gcc/cp/ChangeLog (.../branches/gcc-4_9-branch) -@@ -1,3 +1,19 @@ +@@ -1,3 +1,34 @@ ++2014-08-07 Jason Merrill ++ ++ PR c++/61959 ++ * semantics.c (cxx_eval_bare_aggregate): Handle POINTER_PLUS_EXPR. ++ ++ PR c++/61994 ++ * init.c (build_vec_init): Leave atype an ARRAY_TYPE ++ if we're just returning an INIT_EXPR. ++ ++ PR c++/60872 ++ * call.c (standard_conversion): Don't try to apply restrict to void. ++ ++ PR c++/58714 ++ * tree.c (stabilize_expr): A stabilized prvalue is an xvalue. ++ +2014-08-01 Igor Zamyatin + + * cp-array-notation.c (expand_an_in_modify_expr): Fix the misprint @@ -4053,6 +4298,21 @@ Index: gcc/cp/cp-array-notation.c case STATEMENT_LIST: { tree_stmt_iterator i; +Index: gcc/cp/semantics.c +=================================================================== +--- a/src/gcc/cp/semantics.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/cp/semantics.c (.../branches/gcc-4_9-branch) +@@ -8955,7 +8955,9 @@ + constructor_elt *inner = base_field_constructor_elt (n, ce->index); + inner->value = elt; + } +- else if (ce->index && TREE_CODE (ce->index) == NOP_EXPR) ++ else if (ce->index ++ && (TREE_CODE (ce->index) == NOP_EXPR ++ || TREE_CODE (ce->index) == POINTER_PLUS_EXPR)) + { + /* This is an initializer for an empty base; now that we've + checked that it's constant, we can ignore it. */ Index: gcc/cp/parser.c =================================================================== --- a/src/gcc/cp/parser.c (.../tags/gcc_4_9_1_release) @@ -4066,6 +4326,23 @@ Index: gcc/cp/parser.c { if (cp_lexer_peek_token (parser->lexer)->type == CPP_CLOSE_SQUARE) cp_lexer_consume_token (parser->lexer); +Index: gcc/cp/call.c +=================================================================== +--- a/src/gcc/cp/call.c (.../tags/gcc_4_9_1_release) ++++ b/src/gcc/cp/call.c (.../branches/gcc-4_9-branch) +@@ -1208,9 +1208,10 @@ + && TREE_CODE (TREE_TYPE (from)) != FUNCTION_TYPE) + { + tree nfrom = TREE_TYPE (from); ++ /* Don't try to apply restrict to void. */ ++ int quals = cp_type_quals (nfrom) & ~TYPE_QUAL_RESTRICT; + from = build_pointer_type +- (cp_build_qualified_type (void_type_node, +- cp_type_quals (nfrom))); ++ (cp_build_qualified_type (void_type_node, quals)); + conv = build_conv (ck_ptr, from, conv); + } + else if (TYPE_PTRDATAMEM_P (from)) Index: gcc/haifa-sched.c =================================================================== --- a/src/gcc/haifa-sched.c (.../tags/gcc_4_9_1_release) -- cgit v1.2.3