diff options
Diffstat (limited to 'debian/patches/gcc-linaro.diff')
-rw-r--r-- | debian/patches/gcc-linaro.diff | 162120 |
1 files changed, 3 insertions, 162117 deletions
diff --git a/debian/patches/gcc-linaro.diff b/debian/patches/gcc-linaro.diff index c2e8235..ae1c251 100644 --- a/debian/patches/gcc-linaro.diff +++ b/debian/patches/gcc-linaro.diff @@ -1,162123 +1,9 @@ -# DP: Changes for the Linaro 6-2017.03 release. +# DP: Changes for the Linaro 8-2018.xx snapshot. -MSG=$(git log origin/linaro/gcc-6-branch --format=format:"%s" -n 1 --grep "Merge branches"); SVN=${MSG##* }; git log origin/gcc-6-branch --format=format:"%H" -n 1 --grep "gcc-6-branch@${SVN%.}" +MSG=$(git log origin/linaro/gcc-8-branch --format=format:"%s" -n 1 --grep "Merge branches"); SVN=${MSG##* }; git log origin/gcc-7-branch --format=format:"%H" -n 1 --grep "gcc-7-branch@${SVN%.}" -LANG=C git diff --no-renames 4b7882c54dabbb54686cb577f2a2cf28e93e743b..630c5507bb37d2caaef60a6f0773e4c820d76fe0 \ +LANG=C git diff --no-renames bb85d61e6bfbadee4494e034a5d8187cf0626aed 1604249e382610b087a72d0d07103f815458cec0 \ | egrep -v '^(diff|index) ' \ | filterdiff --strip=1 --addoldprefix=a/src/ --addnewprefix=b/src/ \ | sed 's,a/src//dev/null,/dev/null,' ---- a/src/contrib/compare_tests -+++ b/src/contrib/compare_tests -@@ -107,8 +107,8 @@ elif [ -d "$1" -o -d "$2" ] ; then - usage "Must specify either two directories or two files" - fi - --sed 's/^XFAIL/FAIL/; s/^XPASS/PASS/' < "$1" | awk '/^Running target / {target = $3} { if (target != "unix") { sub(/: /, "&"target": " ); }; print $0; }' | cut -c1-2000 >$tmp1 --sed 's/^XFAIL/FAIL/; s/^XPASS/PASS/' < "$2" | awk '/^Running target / {target = $3} { if (target != "unix") { sub(/: /, "&"target": " ); }; print $0; }' | cut -c1-2000 >$tmp2 -+sed 's/^XFAIL/FAIL/; s/^ERROR/FAIL/; s/^XPASS/PASS/' < "$1" | awk '/^Running target / {target = $3} { if (target != "unix") { sub(/: /, "&"target": " ); }; print $0; }' | cut -c1-2000 >$tmp1 -+sed 's/^XFAIL/FAIL/; s/^ERROR/FAIL/; s/^XPASS/PASS/' < "$2" | awk '/^Running target / {target = $3} { if (target != "unix") { sub(/: /, "&"target": " ); }; print $0; }' | cut -c1-2000 >$tmp2 - - before=$tmp1 - now=$tmp2 ---- a/src/contrib/dg-extract-results.py -+++ b/src/contrib/dg-extract-results.py -@@ -134,6 +134,7 @@ class Prog: - self.end_line = None - # Known summary types. - self.count_names = [ -+ '# of DejaGnu errors\t\t', - '# of expected passes\t\t', - '# of unexpected failures\t', - '# of unexpected successes\t', -@@ -245,6 +246,10 @@ class Prog: - segment = Segment (filename, file.tell()) - variation.header = segment - -+ # Parse the rest of the summary (the '# of ' lines). -+ if len (variation.counts) == 0: -+ variation.counts = self.zero_counts() -+ - # Parse up until the first line of the summary. - if num_variations == 1: - end = '\t\t=== ' + tool.name + ' Summary ===\n' -@@ -291,6 +296,11 @@ class Prog: - harness.results.append ((key, line)) - if not first_key and sort_logs: - first_key = key -+ if line.startswith ('ERROR: (DejaGnu)'): -+ for i in range (len (self.count_names)): -+ if 'DejaGnu errors' in self.count_names[i]: -+ variation.counts[i] += 1 -+ break - - # 'Using ...' lines are only interesting in a header. Splitting - # the test up into parallel runs leads to more 'Using ...' lines -@@ -309,9 +319,6 @@ class Prog: - segment.lines -= final_using - harness.add_segment (first_key, segment) - -- # Parse the rest of the summary (the '# of ' lines). -- if len (variation.counts) == 0: -- variation.counts = self.zero_counts() - while True: - before = file.tell() - line = file.readline() ---- a/src/contrib/dg-extract-results.sh -+++ b/src/contrib/dg-extract-results.sh -@@ -369,10 +369,11 @@ EOF - BEGIN { - variant="$VAR" - tool="$TOOL" -- passcnt=0; failcnt=0; untstcnt=0; xpasscnt=0; xfailcnt=0; kpasscnt=0; kfailcnt=0; unsupcnt=0; unrescnt=0; -+ passcnt=0; failcnt=0; untstcnt=0; xpasscnt=0; xfailcnt=0; kpasscnt=0; kfailcnt=0; unsupcnt=0; unrescnt=0; dgerrorcnt=0; - curvar=""; insummary=0 - } - /^Running target / { curvar = \$3; next } -+/^ERROR: \(DejaGnu\)/ { if (variant == curvar) dgerrorcnt += 1 } - /^# of / { if (variant == curvar) insummary = 1 } - /^# of expected passes/ { if (insummary == 1) passcnt += \$5; next; } - /^# of unexpected successes/ { if (insummary == 1) xpasscnt += \$5; next; } -@@ -390,6 +391,7 @@ BEGIN { - { next } - END { - printf ("\t\t=== %s Summary for %s ===\n\n", tool, variant) -+ if (dgerrorcnt != 0) printf ("# of DejaGnu errors\t\t%d\n", dgerrorcnt) - if (passcnt != 0) printf ("# of expected passes\t\t%d\n", passcnt) - if (failcnt != 0) printf ("# of unexpected failures\t%d\n", failcnt) - if (xpasscnt != 0) printf ("# of unexpected successes\t%d\n", xpasscnt) -@@ -419,8 +421,9 @@ TOTAL_AWK=${TMP}/total.awk - cat << EOF > $TOTAL_AWK - BEGIN { - tool="$TOOL" -- passcnt=0; failcnt=0; untstcnt=0; xpasscnt=0; xfailcnt=0; kfailcnt=0; unsupcnt=0; unrescnt=0 -+ passcnt=0; failcnt=0; untstcnt=0; xpasscnt=0; xfailcnt=0; kfailcnt=0; unsupcnt=0; unrescnt=0; dgerrorcnt=0 - } -+/^# of DejaGnu errors/ { dgerrorcnt += \$5 } - /^# of expected passes/ { passcnt += \$5 } - /^# of unexpected failures/ { failcnt += \$5 } - /^# of unexpected successes/ { xpasscnt += \$5 } -@@ -431,7 +434,8 @@ BEGIN { - /^# of unresolved testcases/ { unrescnt += \$5 } - /^# of unsupported tests/ { unsupcnt += \$5 } - END { -- printf ("\n\t\t=== %s Summary ===\n\n", tool) -+ printf ("\n\t\t=== %s MySummary ===\n\n", tool) -+ if (dgerrorcnt != 0) printf ("# of DejaGnu errors\t\t%d\n", dgerrorcnt) - if (passcnt != 0) printf ("# of expected passes\t\t%d\n", passcnt) - if (failcnt != 0) printf ("# of unexpected failures\t%d\n", failcnt) - if (xpasscnt != 0) printf ("# of unexpected successes\t%d\n", xpasscnt) ---- /dev/null -+++ b/src/gcc/LINARO-VERSION -@@ -0,0 +1 @@ -+Snapshot 6.3-2017.03 ---- a/src/gcc/Makefile.in -+++ b/src/gcc/Makefile.in -@@ -832,10 +832,12 @@ BASEVER := $(srcdir)/BASE-VER # 4.x.y - DEVPHASE := $(srcdir)/DEV-PHASE # experimental, prerelease, "" - DATESTAMP := $(srcdir)/DATESTAMP # YYYYMMDD or empty - REVISION := $(srcdir)/REVISION # [BRANCH revision XXXXXX] -+LINAROVER := $(srcdir)/LINARO-VERSION # M.x-YYYY.MM[-S][~dev] - - BASEVER_c := $(shell cat $(BASEVER)) - DEVPHASE_c := $(shell cat $(DEVPHASE)) - DATESTAMP_c := $(shell cat $(DATESTAMP)) -+LINAROVER_c := $(shell cat $(LINAROVER)) - - ifeq (,$(wildcard $(REVISION))) - REVISION_c := -@@ -862,6 +864,7 @@ DATESTAMP_s := \ - "\"$(if $(DEVPHASE_c)$(filter-out 0,$(PATCHLEVEL_c)), $(DATESTAMP_c))\"" - PKGVERSION_s:= "\"@PKGVERSION@\"" - BUGURL_s := "\"@REPORT_BUGS_TO@\"" -+LINAROVER_s := "\"$(LINAROVER_c)\"" - - PKGVERSION := @PKGVERSION@ - BUGURL_TEXI := @REPORT_BUGS_TEXI@ -@@ -2701,8 +2704,9 @@ PREPROCESSOR_DEFINES = \ - -DSTANDARD_EXEC_PREFIX=\"$(libdir)/gcc/\" \ - @TARGET_SYSTEM_ROOT_DEFINE@ - --CFLAGS-cppbuiltin.o += $(PREPROCESSOR_DEFINES) -DBASEVER=$(BASEVER_s) --cppbuiltin.o: $(BASEVER) -+CFLAGS-cppbuiltin.o += $(PREPROCESSOR_DEFINES) -DBASEVER=$(BASEVER_s) \ -+ -DLINAROVER=$(LINAROVER_s) -+cppbuiltin.o: $(BASEVER) $(LINAROVER) - - CFLAGS-cppdefault.o += $(PREPROCESSOR_DEFINES) - ---- a/src/gcc/ada/gcc-interface/misc.c -+++ b/src/gcc/ada/gcc-interface/misc.c -@@ -255,8 +255,7 @@ static bool - gnat_post_options (const char **pfilename ATTRIBUTE_UNUSED) - { - /* Excess precision other than "fast" requires front-end support. */ -- if (flag_excess_precision_cmdline == EXCESS_PRECISION_STANDARD -- && TARGET_FLT_EVAL_METHOD_NON_DEFAULT) -+ if (flag_excess_precision_cmdline == EXCESS_PRECISION_STANDARD) - sorry ("-fexcess-precision=standard for Ada"); - flag_excess_precision_cmdline = EXCESS_PRECISION_FAST; - ---- a/src/gcc/builtins.c -+++ b/src/gcc/builtins.c -@@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see - #include "target.h" - #include "rtl.h" - #include "tree.h" -+#include "memmodel.h" - #include "gimple.h" - #include "predict.h" - #include "tm_p.h" ---- a/src/gcc/c-family/c-common.c -+++ b/src/gcc/c-family/c-common.c -@@ -25,6 +25,7 @@ along with GCC; see the file COPYING3. If not see - #include "target.h" - #include "function.h" - #include "tree.h" -+#include "memmodel.h" - #include "c-common.h" - #include "gimple-expr.h" - #include "tm_p.h" ---- a/src/gcc/c-family/c-opts.c -+++ b/src/gcc/c-family/c-opts.c -@@ -772,8 +772,7 @@ c_common_post_options (const char **pfilename) - support. */ - if (c_dialect_cxx ()) - { -- if (flag_excess_precision_cmdline == EXCESS_PRECISION_STANDARD -- && TARGET_FLT_EVAL_METHOD_NON_DEFAULT) -+ if (flag_excess_precision_cmdline == EXCESS_PRECISION_STANDARD) - sorry ("-fexcess-precision=standard for C++"); - flag_excess_precision_cmdline = EXCESS_PRECISION_FAST; - } ---- a/src/gcc/calls.c -+++ b/src/gcc/calls.c -@@ -194,10 +194,19 @@ prepare_call_address (tree fndecl_or_type, rtx funexp, rtx static_chain_value, - && targetm.small_register_classes_for_mode_p (FUNCTION_MODE)) - ? force_not_mem (memory_address (FUNCTION_MODE, funexp)) - : memory_address (FUNCTION_MODE, funexp)); -- else if (! sibcallp) -+ else - { -- if (!NO_FUNCTION_CSE && optimize && ! flag_no_function_cse) -- funexp = force_reg (Pmode, funexp); -+ /* funexp could be a SYMBOL_REF represents a function pointer which is -+ of ptr_mode. In this case, it should be converted into address mode -+ to be a valid address for memory rtx pattern. See PR 64971. */ -+ if (GET_MODE (funexp) != Pmode) -+ funexp = convert_memory_address (Pmode, funexp); -+ -+ if (! sibcallp) -+ { -+ if (!NO_FUNCTION_CSE && optimize && ! flag_no_function_cse) -+ funexp = force_reg (Pmode, funexp); -+ } - } - - if (static_chain_value != 0 ---- a/src/gcc/cfg.c -+++ b/src/gcc/cfg.c -@@ -1064,7 +1064,7 @@ free_original_copy_tables (void) - delete bb_copy; - bb_copy = NULL; - delete bb_original; -- bb_copy = NULL; -+ bb_original = NULL; - delete loop_copy; - loop_copy = NULL; - delete original_copy_bb_pool; ---- a/src/gcc/common/config/arm/arm-common.c -+++ b/src/gcc/common/config/arm/arm-common.c -@@ -97,6 +97,49 @@ arm_rewrite_mcpu (int argc, const char **argv) - return arm_rewrite_selected_cpu (argv[argc - 1]); - } - -+struct arm_arch_core_flag -+{ -+ const char *const name; -+ const arm_feature_set flags; -+}; -+ -+static const struct arm_arch_core_flag arm_arch_core_flags[] = -+{ -+#undef ARM_CORE -+#define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \ -+ {NAME, FLAGS}, -+#include "config/arm/arm-cores.def" -+#undef ARM_CORE -+#undef ARM_ARCH -+#define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \ -+ {NAME, FLAGS}, -+#include "config/arm/arm-arches.def" -+#undef ARM_ARCH -+}; -+ -+/* Called by the driver to check whether the target denoted by current -+ command line options is a Thumb-only target. ARGV is an array of -+ -march and -mcpu values (ie. it contains the rhs after the equal -+ sign) and we use the last one of them to make a decision. The -+ number of elements in ARGV is given in ARGC. */ -+const char * -+arm_target_thumb_only (int argc, const char **argv) -+{ -+ unsigned int opt; -+ -+ if (argc) -+ { -+ for (opt = 0; opt < (ARRAY_SIZE (arm_arch_core_flags)); opt++) -+ if ((strcmp (argv[argc - 1], arm_arch_core_flags[opt].name) == 0) -+ && !ARM_FSET_HAS_CPU1(arm_arch_core_flags[opt].flags, FL_NOTM)) -+ return "-mthumb"; -+ -+ return NULL; -+ } -+ else -+ return NULL; -+} -+ - #undef ARM_CPU_NAME_LENGTH - - ---- a/src/gcc/config.gcc -+++ b/src/gcc/config.gcc -@@ -307,7 +307,7 @@ m32c*-*-*) - ;; - aarch64*-*-*) - cpu_type=aarch64 -- extra_headers="arm_neon.h arm_acle.h" -+ extra_headers="arm_fp16.h arm_neon.h arm_acle.h" - c_target_objs="aarch64-c.o" - cxx_target_objs="aarch64-c.o" - extra_objs="aarch64-builtins.o aarch-common.o cortex-a57-fma-steering.o" -@@ -327,7 +327,7 @@ arc*-*-*) - arm*-*-*) - cpu_type=arm - extra_objs="arm-builtins.o aarch-common.o" -- extra_headers="mmintrin.h arm_neon.h arm_acle.h" -+ extra_headers="mmintrin.h arm_neon.h arm_acle.h arm_fp16.h arm_cmse.h" - target_type_format_char='%' - c_target_objs="arm-c.o" - cxx_target_objs="arm-c.o" -@@ -1500,7 +1500,7 @@ i[34567]86-*-linux* | i[34567]86-*-kfreebsd*-gnu | i[34567]86-*-knetbsd*-gnu | i - extra_options="${extra_options} linux-android.opt" - # Assume modern glibc if not targeting Android nor uclibc. - case ${target} in -- *-*-*android*|*-*-*uclibc*) -+ *-*-*android*|*-*-*uclibc*|*-*-*musl*) - ;; - *) - default_gnu_indirect_function=yes -@@ -1569,7 +1569,7 @@ x86_64-*-linux* | x86_64-*-kfreebsd*-gnu | x86_64-*-knetbsd*-gnu) - extra_options="${extra_options} linux-android.opt" - # Assume modern glibc if not targeting Android nor uclibc. - case ${target} in -- *-*-*android*|*-*-*uclibc*) -+ *-*-*android*|*-*-*uclibc*|*-*-*musl*) - ;; - *) - default_gnu_indirect_function=yes -@@ -3811,38 +3811,51 @@ case "${target}" in - # Add extra multilibs - if test "x$with_multilib_list" != x; then - arm_multilibs=`echo $with_multilib_list | sed -e 's/,/ /g'` -- for arm_multilib in ${arm_multilibs}; do -- case ${arm_multilib} in -- aprofile) -+ case ${arm_multilibs} in -+ aprofile) - # Note that arm/t-aprofile is a - # stand-alone make file fragment to be - # used only with itself. We do not - # specifically use the - # TM_MULTILIB_OPTION framework because - # this shorthand is more -- # pragmatic. Additionally it is only -- # designed to work without any -- # with-cpu, with-arch with-mode -- # with-fpu or with-float options. -- if test "x$with_arch" != x \ -- || test "x$with_cpu" != x \ -- || test "x$with_float" != x \ -- || test "x$with_fpu" != x \ -- || test "x$with_mode" != x ; then -- echo "Error: You cannot use any of --with-arch/cpu/fpu/float/mode with --with-multilib-list=aprofile" 1>&2 -- exit 1 -- fi -- tmake_file="${tmake_file} arm/t-aprofile" -- break -- ;; -- default) -- ;; -- *) -- echo "Error: --with-multilib-list=${with_multilib_list} not supported." 1>&2 -- exit 1 -- ;; -- esac -- done -+ # pragmatic. -+ tmake_profile_file="arm/t-aprofile" -+ ;; -+ rmprofile) -+ # Note that arm/t-rmprofile is a -+ # stand-alone make file fragment to be -+ # used only with itself. We do not -+ # specifically use the -+ # TM_MULTILIB_OPTION framework because -+ # this shorthand is more -+ # pragmatic. -+ tmake_profile_file="arm/t-rmprofile" -+ ;; -+ default) -+ ;; -+ *) -+ echo "Error: --with-multilib-list=${with_multilib_list} not supported." 1>&2 -+ exit 1 -+ ;; -+ esac -+ -+ if test "x${tmake_profile_file}" != x ; then -+ # arm/t-aprofile and arm/t-rmprofile are only -+ # designed to work without any with-cpu, -+ # with-arch, with-mode, with-fpu or with-float -+ # options. -+ if test "x$with_arch" != x \ -+ || test "x$with_cpu" != x \ -+ || test "x$with_float" != x \ -+ || test "x$with_fpu" != x \ -+ || test "x$with_mode" != x ; then -+ echo "Error: You cannot use any of --with-arch/cpu/fpu/float/mode with --with-multilib-list=${with_multilib_list}" 1>&2 -+ exit 1 -+ fi -+ -+ tmake_file="${tmake_file} ${tmake_profile_file}" -+ fi - fi - ;; - ---- a/src/gcc/config/aarch64/aarch64-arches.def -+++ b/src/gcc/config/aarch64/aarch64-arches.def -@@ -32,4 +32,6 @@ - - AARCH64_ARCH("armv8-a", generic, 8A, 8, AARCH64_FL_FOR_ARCH8) - AARCH64_ARCH("armv8.1-a", generic, 8_1A, 8, AARCH64_FL_FOR_ARCH8_1) -+AARCH64_ARCH("armv8.2-a", generic, 8_2A, 8, AARCH64_FL_FOR_ARCH8_2) -+AARCH64_ARCH("armv8.3-a", generic, 8_3A, 8, AARCH64_FL_FOR_ARCH8_3) - ---- a/src/gcc/config/aarch64/aarch64-builtins.c -+++ b/src/gcc/config/aarch64/aarch64-builtins.c -@@ -62,6 +62,7 @@ - #define si_UP SImode - #define sf_UP SFmode - #define hi_UP HImode -+#define hf_UP HFmode - #define qi_UP QImode - #define UP(X) X##_UP - -@@ -139,6 +140,10 @@ aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_none, qualifier_none, qualifier_unsigned }; - #define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers) - static enum aarch64_type_qualifiers -+aarch64_types_binop_uss_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_unsigned, qualifier_none, qualifier_none }; -+#define TYPES_BINOP_USS (aarch64_types_binop_uss_qualifiers) -+static enum aarch64_type_qualifiers - aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_poly, qualifier_poly, qualifier_poly }; - #define TYPES_BINOPP (aarch64_types_binopp_qualifiers) -@@ -164,6 +169,10 @@ aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] - #define TYPES_QUADOP_LANE (aarch64_types_quadop_lane_qualifiers) - - static enum aarch64_type_qualifiers -+aarch64_types_binop_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_poly, qualifier_none, qualifier_immediate }; -+#define TYPES_GETREGP (aarch64_types_binop_imm_p_qualifiers) -+static enum aarch64_type_qualifiers - aarch64_types_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_none, qualifier_none, qualifier_immediate }; - #define TYPES_GETREG (aarch64_types_binop_imm_qualifiers) -@@ -173,16 +182,29 @@ aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_unsigned, qualifier_none, qualifier_immediate }; - #define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers) - static enum aarch64_type_qualifiers -+aarch64_types_fcvt_from_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_none, qualifier_unsigned, qualifier_immediate }; -+#define TYPES_FCVTIMM_SUS (aarch64_types_fcvt_from_unsigned_qualifiers) -+static enum aarch64_type_qualifiers - aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate }; - #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers) - - static enum aarch64_type_qualifiers --aarch64_types_ternop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] -- = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate }; --#define TYPES_SETREG (aarch64_types_ternop_imm_qualifiers) --#define TYPES_SHIFTINSERT (aarch64_types_ternop_imm_qualifiers) --#define TYPES_SHIFTACC (aarch64_types_ternop_imm_qualifiers) -+aarch64_types_ternop_s_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_none, qualifier_none, qualifier_poly, qualifier_immediate}; -+#define TYPES_SETREGP (aarch64_types_ternop_s_imm_p_qualifiers) -+static enum aarch64_type_qualifiers -+aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate}; -+#define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers) -+#define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers) -+#define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers) -+ -+static enum aarch64_type_qualifiers -+aarch64_types_ternop_p_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_poly, qualifier_poly, qualifier_poly, qualifier_immediate}; -+#define TYPES_SHIFTINSERTP (aarch64_types_ternop_p_imm_qualifiers) - - static enum aarch64_type_qualifiers - aarch64_types_unsigned_shiftacc_qualifiers[SIMD_MAX_BUILTIN_ARGS] -@@ -197,6 +219,11 @@ aarch64_types_combine_qualifiers[SIMD_MAX_BUILTIN_ARGS] - #define TYPES_COMBINE (aarch64_types_combine_qualifiers) - - static enum aarch64_type_qualifiers -+aarch64_types_combine_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_poly, qualifier_poly, qualifier_poly }; -+#define TYPES_COMBINEP (aarch64_types_combine_p_qualifiers) -+ -+static enum aarch64_type_qualifiers - aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_none, qualifier_const_pointer_map_mode }; - #define TYPES_LOAD1 (aarch64_types_load1_qualifiers) -@@ -229,6 +256,10 @@ aarch64_types_bsl_u_qualifiers[SIMD_MAX_BUILTIN_ARGS] - qualifier_map_mode | qualifier_pointer to build a pointer to the - element type of the vector. */ - static enum aarch64_type_qualifiers -+aarch64_types_store1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] -+ = { qualifier_void, qualifier_pointer_map_mode, qualifier_poly }; -+#define TYPES_STORE1P (aarch64_types_store1_p_qualifiers) -+static enum aarch64_type_qualifiers - aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_void, qualifier_pointer_map_mode, qualifier_none }; - #define TYPES_STORE1 (aarch64_types_store1_qualifiers) -@@ -753,16 +784,16 @@ aarch64_init_simd_builtins (void) - - if (qualifiers & qualifier_unsigned) - { -- type_signature[arg_num] = 'u'; -+ type_signature[op_num] = 'u'; - print_type_signature_p = true; - } - else if (qualifiers & qualifier_poly) - { -- type_signature[arg_num] = 'p'; -+ type_signature[op_num] = 'p'; - print_type_signature_p = true; - } - else -- type_signature[arg_num] = 's'; -+ type_signature[op_num] = 's'; - - /* Skip an internal operand for vget_{low, high}. */ - if (qualifiers & qualifier_internal) ---- a/src/gcc/config/aarch64/aarch64-c.c -+++ b/src/gcc/config/aarch64/aarch64-c.c -@@ -95,6 +95,11 @@ aarch64_update_cpp_builtins (cpp_reader *pfile) - else - cpp_undef (pfile, "__ARM_FP"); - -+ aarch64_def_or_undef (TARGET_FP_F16INST, -+ "__ARM_FEATURE_FP16_SCALAR_ARITHMETIC", pfile); -+ aarch64_def_or_undef (TARGET_SIMD_F16INST, -+ "__ARM_FEATURE_FP16_VECTOR_ARITHMETIC", pfile); -+ - aarch64_def_or_undef (TARGET_SIMD, "__ARM_FEATURE_NUMERIC_MAXMIN", pfile); - aarch64_def_or_undef (TARGET_SIMD, "__ARM_NEON", pfile); - ---- a/src/gcc/config/aarch64/aarch64-cores.def -+++ b/src/gcc/config/aarch64/aarch64-cores.def -@@ -40,17 +40,33 @@ - - /* V8 Architecture Processors. */ - -+/* ARM ('A') cores. */ - AARCH64_CORE("cortex-a35", cortexa35, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, "0x41", "0xd04") - AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, "0x41", "0xd03") - AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07") - AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08") -+AARCH64_CORE("cortex-a73", cortexa73, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, "0x41", "0xd09") -+ -+/* Samsung ('S') cores. */ - AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, "0x53", "0x001") --AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, "0x51", "0x800") -+ -+/* Qualcomm ('Q') cores. */ -+AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, "0x51", "0x800") -+ -+/* Cavium ('C') cores. */ - AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, "0x43", "0x0a1") -+ -+/* APM ('P') cores. */ - AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xgene1, "0x50", "0x000") - -+/* V8.1 Architecture Processors. */ -+ -+/* Broadcom ('B') cores. */ -+AARCH64_CORE("vulcan", vulcan, cortexa57, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, vulcan, "0x42", "0x516") -+ - /* V8 big.LITTLE implementations. */ - - AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07.0xd03") - AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08.0xd03") -- -+AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, "0x41", "0xd09.0xd04") -+AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, "0x41", "0xd09.0xd03") ---- a/src/gcc/config/aarch64/aarch64-cost-tables.h -+++ b/src/gcc/config/aarch64/aarch64-cost-tables.h -@@ -127,6 +127,108 @@ const struct cpu_cost_table thunderx_extra_costs = - } - }; - -+const struct cpu_cost_table vulcan_extra_costs = -+{ -+ /* ALU */ -+ { -+ 0, /* Arith. */ -+ 0, /* Logical. */ -+ 0, /* Shift. */ -+ 0, /* Shift_reg. */ -+ COSTS_N_INSNS (1), /* Arith_shift. */ -+ COSTS_N_INSNS (1), /* Arith_shift_reg. */ -+ COSTS_N_INSNS (1), /* Log_shift. */ -+ COSTS_N_INSNS (1), /* Log_shift_reg. */ -+ 0, /* Extend. */ -+ COSTS_N_INSNS (1), /* Extend_arith. */ -+ 0, /* Bfi. */ -+ 0, /* Bfx. */ -+ COSTS_N_INSNS (3), /* Clz. */ -+ 0, /* Rev. */ -+ 0, /* Non_exec. */ -+ true /* Non_exec_costs_exec. */ -+ }, -+ { -+ /* MULT SImode */ -+ { -+ COSTS_N_INSNS (4), /* Simple. */ -+ COSTS_N_INSNS (4), /* Flag_setting. */ -+ COSTS_N_INSNS (4), /* Extend. */ -+ COSTS_N_INSNS (5), /* Add. */ -+ COSTS_N_INSNS (5), /* Extend_add. */ -+ COSTS_N_INSNS (18) /* Idiv. */ -+ }, -+ /* MULT DImode */ -+ { -+ COSTS_N_INSNS (4), /* Simple. */ -+ 0, /* Flag_setting. */ -+ COSTS_N_INSNS (4), /* Extend. */ -+ COSTS_N_INSNS (5), /* Add. */ -+ COSTS_N_INSNS (5), /* Extend_add. */ -+ COSTS_N_INSNS (26) /* Idiv. */ -+ } -+ }, -+ /* LD/ST */ -+ { -+ COSTS_N_INSNS (4), /* Load. */ -+ COSTS_N_INSNS (4), /* Load_sign_extend. */ -+ COSTS_N_INSNS (5), /* Ldrd. */ -+ COSTS_N_INSNS (4), /* Ldm_1st. */ -+ 1, /* Ldm_regs_per_insn_1st. */ -+ 1, /* Ldm_regs_per_insn_subsequent. */ -+ COSTS_N_INSNS (4), /* Loadf. */ -+ COSTS_N_INSNS (4), /* Loadd. */ -+ COSTS_N_INSNS (4), /* Load_unaligned. */ -+ 0, /* Store. */ -+ 0, /* Strd. */ -+ 0, /* Stm_1st. */ -+ 1, /* Stm_regs_per_insn_1st. */ -+ 1, /* Stm_regs_per_insn_subsequent. */ -+ 0, /* Storef. */ -+ 0, /* Stored. */ -+ 0, /* Store_unaligned. */ -+ COSTS_N_INSNS (1), /* Loadv. */ -+ COSTS_N_INSNS (1) /* Storev. */ -+ }, -+ { -+ /* FP SFmode */ -+ { -+ COSTS_N_INSNS (4), /* Div. */ -+ COSTS_N_INSNS (1), /* Mult. */ -+ COSTS_N_INSNS (1), /* Mult_addsub. */ -+ COSTS_N_INSNS (1), /* Fma. */ -+ COSTS_N_INSNS (1), /* Addsub. */ -+ COSTS_N_INSNS (1), /* Fpconst. */ -+ COSTS_N_INSNS (1), /* Neg. */ -+ COSTS_N_INSNS (1), /* Compare. */ -+ COSTS_N_INSNS (2), /* Widen. */ -+ COSTS_N_INSNS (2), /* Narrow. */ -+ COSTS_N_INSNS (2), /* Toint. */ -+ COSTS_N_INSNS (2), /* Fromint. */ -+ COSTS_N_INSNS (2) /* Roundint. */ -+ }, -+ /* FP DFmode */ -+ { -+ COSTS_N_INSNS (6), /* Div. */ -+ COSTS_N_INSNS (1), /* Mult. */ -+ COSTS_N_INSNS (1), /* Mult_addsub. */ -+ COSTS_N_INSNS (1), /* Fma. */ -+ COSTS_N_INSNS (1), /* Addsub. */ -+ COSTS_N_INSNS (1), /* Fpconst. */ -+ COSTS_N_INSNS (1), /* Neg. */ -+ COSTS_N_INSNS (1), /* Compare. */ -+ COSTS_N_INSNS (2), /* Widen. */ -+ COSTS_N_INSNS (2), /* Narrow. */ -+ COSTS_N_INSNS (2), /* Toint. */ -+ COSTS_N_INSNS (2), /* Fromint. */ -+ COSTS_N_INSNS (2) /* Roundint. */ -+ } -+ }, -+ /* Vector */ -+ { -+ COSTS_N_INSNS (1) /* Alu. */ -+ } -+}; - - - #endif ---- a/src/gcc/config/aarch64/aarch64-elf.h -+++ b/src/gcc/config/aarch64/aarch64-elf.h -@@ -25,15 +25,6 @@ - #define ASM_OUTPUT_LABELREF(FILE, NAME) \ - aarch64_asm_output_labelref (FILE, NAME) - --#define ASM_OUTPUT_DEF(FILE, NAME1, NAME2) \ -- do \ -- { \ -- assemble_name (FILE, NAME1); \ -- fputs (" = ", FILE); \ -- assemble_name (FILE, NAME2); \ -- fputc ('\n', FILE); \ -- } while (0) -- - #define TEXT_SECTION_ASM_OP "\t.text" - #define DATA_SECTION_ASM_OP "\t.data" - #define BSS_SECTION_ASM_OP "\t.bss" ---- a/src/gcc/config/aarch64/aarch64-modes.def -+++ b/src/gcc/config/aarch64/aarch64-modes.def -@@ -21,8 +21,6 @@ - CC_MODE (CCFP); - CC_MODE (CCFPE); - CC_MODE (CC_SWP); --CC_MODE (CC_ZESWP); /* zero-extend LHS (but swap to make it RHS). */ --CC_MODE (CC_SESWP); /* sign-extend LHS (but swap to make it RHS). */ - CC_MODE (CC_NZ); /* Only N and Z bits of condition flags are valid. */ - CC_MODE (CC_Z); /* Only Z bit of condition flags is valid. */ - CC_MODE (CC_C); /* Only C bit of condition flags is valid. */ ---- a/src/gcc/config/aarch64/aarch64-option-extensions.def -+++ b/src/gcc/config/aarch64/aarch64-option-extensions.def -@@ -39,8 +39,8 @@ - that are required. Their order is not important. */ - - /* Enabling "fp" just enables "fp". -- Disabling "fp" also disables "simd", "crypto". */ --AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, 0, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO, "fp") -+ Disabling "fp" also disables "simd", "crypto" and "fp16". */ -+AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, 0, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO | AARCH64_FL_F16, "fp") - - /* Enabling "simd" also enables "fp". - Disabling "simd" also disables "crypto". */ -@@ -55,3 +55,7 @@ AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, 0, 0, "crc32") - - /* Enabling or disabling "lse" only changes "lse". */ - AARCH64_OPT_EXTENSION("lse", AARCH64_FL_LSE, 0, 0, "atomics") -+ -+/* Enabling "fp16" also enables "fp". -+ Disabling "fp16" just disables "fp16". */ -+AARCH64_OPT_EXTENSION("fp16", AARCH64_FL_F16, AARCH64_FL_FP, 0, "fp16") ---- /dev/null -+++ b/src/gcc/config/aarch64/aarch64-passes.def -@@ -0,0 +1,21 @@ -+/* AArch64-specific passes declarations. -+ Copyright (C) 2016 Free Software Foundation, Inc. -+ Contributed by ARM Ltd. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by -+ the Free Software Foundation; either version 3, or (at your option) -+ any later version. -+ -+ GCC is distributed in the hope that it will be useful, but -+ WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ General Public License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with GCC; see the file COPYING3. If not see -+ <http://www.gnu.org/licenses/>. */ -+ -+INSERT_PASS_AFTER (pass_regrename, 1, pass_fma_steering); ---- a/src/gcc/config/aarch64/aarch64-protos.h -+++ b/src/gcc/config/aarch64/aarch64-protos.h -@@ -178,6 +178,25 @@ struct cpu_branch_cost - const int unpredictable; /* Unpredictable branch or optimizing for speed. */ - }; - -+/* Control approximate alternatives to certain FP operators. */ -+#define AARCH64_APPROX_MODE(MODE) \ -+ ((MIN_MODE_FLOAT <= (MODE) && (MODE) <= MAX_MODE_FLOAT) \ -+ ? (1 << ((MODE) - MIN_MODE_FLOAT)) \ -+ : (MIN_MODE_VECTOR_FLOAT <= (MODE) && (MODE) <= MAX_MODE_VECTOR_FLOAT) \ -+ ? (1 << ((MODE) - MIN_MODE_VECTOR_FLOAT \ -+ + MAX_MODE_FLOAT - MIN_MODE_FLOAT + 1)) \ -+ : (0)) -+#define AARCH64_APPROX_NONE (0) -+#define AARCH64_APPROX_ALL (-1) -+ -+/* Allowed modes for approximations. */ -+struct cpu_approx_modes -+{ -+ const unsigned int division; /* Division. */ -+ const unsigned int sqrt; /* Square root. */ -+ const unsigned int recip_sqrt; /* Reciprocal square root. */ -+}; -+ - struct tune_params - { - const struct cpu_cost_table *insn_extra_cost; -@@ -185,6 +204,7 @@ struct tune_params - const struct cpu_regmove_cost *regmove_cost; - const struct cpu_vector_cost *vec_costs; - const struct cpu_branch_cost *branch_costs; -+ const struct cpu_approx_modes *approx_modes; - int memmov_cost; - int issue_rate; - unsigned int fusible_ops; -@@ -282,14 +302,14 @@ int aarch64_get_condition_code (rtx); - bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode); - int aarch64_branch_cost (bool, bool); - enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx); --bool aarch64_cannot_change_mode_class (machine_mode, -- machine_mode, -- enum reg_class); - bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT); - bool aarch64_constant_address_p (rtx); -+bool aarch64_emit_approx_div (rtx, rtx, rtx); -+bool aarch64_emit_approx_sqrt (rtx, rtx, bool); - bool aarch64_expand_movmem (rtx *); - bool aarch64_float_const_zero_rtx_p (rtx); - bool aarch64_function_arg_regno_p (unsigned); -+bool aarch64_fusion_enabled_p (enum aarch64_fusion_pairs); - bool aarch64_gen_movmemqi (rtx *); - bool aarch64_gimple_fold_builtin (gimple_stmt_iterator *); - bool aarch64_is_extend_from_extract (machine_mode, rtx, rtx); -@@ -298,6 +318,7 @@ bool aarch64_is_noplt_call_p (rtx); - bool aarch64_label_mentioned_p (rtx); - void aarch64_declare_function_name (FILE *, const char*, tree); - bool aarch64_legitimate_pic_operand_p (rtx); -+bool aarch64_mask_and_shift_for_ubfiz_p (machine_mode, rtx, rtx); - bool aarch64_modes_tieable_p (machine_mode mode1, - machine_mode mode2); - bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx); -@@ -320,6 +341,7 @@ bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode); - bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool); - bool aarch64_simd_valid_immediate (rtx, machine_mode, bool, - struct simd_immediate_info *); -+bool aarch64_split_dimode_const_store (rtx, rtx); - bool aarch64_symbolic_address_p (rtx); - bool aarch64_uimm12_shift (HOST_WIDE_INT); - bool aarch64_use_return_insn_p (void); -@@ -335,11 +357,9 @@ machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned, - machine_mode); - int aarch64_hard_regno_mode_ok (unsigned, machine_mode); - int aarch64_hard_regno_nregs (unsigned, machine_mode); --int aarch64_simd_attr_length_move (rtx_insn *); - int aarch64_uxt_size (int, HOST_WIDE_INT); - int aarch64_vec_fpconst_pow_of_2 (rtx); - rtx aarch64_eh_return_handler_rtx (void); --rtx aarch64_legitimize_reload_address (rtx *, machine_mode, int, int, int); - rtx aarch64_mask_from_zextract_ops (rtx, rtx); - const char *aarch64_output_move_struct (rtx *operands); - rtx aarch64_return_addr (int, rtx); -@@ -352,7 +372,6 @@ unsigned aarch64_dbx_register_number (unsigned); - unsigned aarch64_trampoline_size (void); - void aarch64_asm_output_labelref (FILE *, const char *); - void aarch64_cpu_cpp_builtins (cpp_reader *); --void aarch64_elf_asm_named_section (const char *, unsigned, tree); - const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *); - const char * aarch64_output_probe_stack_range (rtx, rtx); - void aarch64_err_no_fpadvsimd (machine_mode, const char *); -@@ -369,7 +388,6 @@ void aarch64_register_pragmas (void); - void aarch64_relayout_simd_types (void); - void aarch64_reset_previous_fndecl (void); - void aarch64_save_restore_target_globals (tree); --void aarch64_emit_approx_rsqrt (rtx, rtx); - - /* Initialize builtins for SIMD intrinsics. */ - void init_aarch64_simd_builtins (void); -@@ -436,7 +454,6 @@ int aarch64_ccmp_mode_to_code (enum machine_mode mode); - bool extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset); - bool aarch64_operands_ok_for_ldpstp (rtx *, bool, enum machine_mode); - bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, enum machine_mode); --extern bool aarch64_nopcrelative_literal_loads; - - extern void aarch64_asm_output_pool_epilogue (FILE *, const char *, - tree, HOST_WIDE_INT); -@@ -450,4 +467,6 @@ enum aarch64_parse_opt_result aarch64_parse_extension (const char *, - std::string aarch64_get_extension_string_for_isa_flags (unsigned long, - unsigned long); - -+rtl_opt_pass *make_pass_fma_steering (gcc::context *ctxt); -+ - #endif /* GCC_AARCH64_PROTOS_H */ ---- a/src/gcc/config/aarch64/aarch64-simd-builtins.def -+++ b/src/gcc/config/aarch64/aarch64-simd-builtins.def -@@ -40,9 +40,10 @@ - 10 - CODE_FOR_<name><mode>. */ - - BUILTIN_VDC (COMBINE, combine, 0) -+ VAR1 (COMBINEP, combine, 0, di) - BUILTIN_VB (BINOP, pmul, 0) -- BUILTIN_VALLF (BINOP, fmulx, 0) -- BUILTIN_VDQF_DF (UNOP, sqrt, 2) -+ BUILTIN_VHSDF_HSDF (BINOP, fmulx, 0) -+ BUILTIN_VHSDF_DF (UNOP, sqrt, 2) - BUILTIN_VD_BHSI (BINOP, addp, 0) - VAR1 (UNOP, addp, 0, di) - BUILTIN_VDQ_BHSI (UNOP, clrsb, 2) -@@ -68,14 +69,23 @@ - BUILTIN_VDC (GETREG, get_dregoi, 0) - BUILTIN_VDC (GETREG, get_dregci, 0) - BUILTIN_VDC (GETREG, get_dregxi, 0) -+ VAR1 (GETREGP, get_dregoi, 0, di) -+ VAR1 (GETREGP, get_dregci, 0, di) -+ VAR1 (GETREGP, get_dregxi, 0, di) - /* Implemented by aarch64_get_qreg<VSTRUCT:mode><VQ:mode>. */ - BUILTIN_VQ (GETREG, get_qregoi, 0) - BUILTIN_VQ (GETREG, get_qregci, 0) - BUILTIN_VQ (GETREG, get_qregxi, 0) -+ VAR1 (GETREGP, get_qregoi, 0, v2di) -+ VAR1 (GETREGP, get_qregci, 0, v2di) -+ VAR1 (GETREGP, get_qregxi, 0, v2di) - /* Implemented by aarch64_set_qreg<VSTRUCT:mode><VQ:mode>. */ - BUILTIN_VQ (SETREG, set_qregoi, 0) - BUILTIN_VQ (SETREG, set_qregci, 0) - BUILTIN_VQ (SETREG, set_qregxi, 0) -+ VAR1 (SETREGP, set_qregoi, 0, v2di) -+ VAR1 (SETREGP, set_qregci, 0, v2di) -+ VAR1 (SETREGP, set_qregxi, 0, v2di) - /* Implemented by aarch64_ld<VSTRUCT:nregs><VDC:mode>. */ - BUILTIN_VDC (LOADSTRUCT, ld2, 0) - BUILTIN_VDC (LOADSTRUCT, ld3, 0) -@@ -224,6 +234,7 @@ - BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0) - BUILTIN_VSDQ_I_DI (USHIFTACC, usri_n, 0) - BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0) -+ VAR2 (SHIFTINSERTP, ssli_n, 0, di, v2di) - BUILTIN_VSDQ_I_DI (USHIFTACC, usli_n, 0) - /* Implemented by aarch64_<sur>qshl<u>_n<mode>. */ - BUILTIN_VSDQ_I (SHIFTIMM_USS, sqshlu_n, 0) -@@ -234,105 +245,145 @@ - BUILTIN_VALL (UNOP, reduc_plus_scal_, 10) - - /* Implemented by reduc_<maxmin_uns>_scal_<mode> (producing scalar). */ -- BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10) -- BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10) -+ BUILTIN_VDQIF_F16 (UNOP, reduc_smax_scal_, 10) -+ BUILTIN_VDQIF_F16 (UNOP, reduc_smin_scal_, 10) - BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10) - BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10) -- BUILTIN_VDQF (UNOP, reduc_smax_nan_scal_, 10) -- BUILTIN_VDQF (UNOP, reduc_smin_nan_scal_, 10) -+ BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10) -+ BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10) - -- /* Implemented by <maxmin><mode>3. -+ /* Implemented by <maxmin_uns><mode>3. - smax variants map to fmaxnm, - smax_nan variants map to fmax. */ - BUILTIN_VDQ_BHSI (BINOP, smax, 3) - BUILTIN_VDQ_BHSI (BINOP, smin, 3) - BUILTIN_VDQ_BHSI (BINOP, umax, 3) - BUILTIN_VDQ_BHSI (BINOP, umin, 3) -- BUILTIN_VDQF (BINOP, smax_nan, 3) -- BUILTIN_VDQF (BINOP, smin_nan, 3) -+ BUILTIN_VHSDF_DF (BINOP, smax_nan, 3) -+ BUILTIN_VHSDF_DF (BINOP, smin_nan, 3) - -- /* Implemented by <fmaxmin><mode>3. */ -- BUILTIN_VDQF (BINOP, fmax, 3) -- BUILTIN_VDQF (BINOP, fmin, 3) -+ /* Implemented by <maxmin_uns><mode>3. */ -+ BUILTIN_VHSDF_HSDF (BINOP, fmax, 3) -+ BUILTIN_VHSDF_HSDF (BINOP, fmin, 3) - - /* Implemented by aarch64_<maxmin_uns>p<mode>. */ - BUILTIN_VDQ_BHSI (BINOP, smaxp, 0) - BUILTIN_VDQ_BHSI (BINOP, sminp, 0) - BUILTIN_VDQ_BHSI (BINOP, umaxp, 0) - BUILTIN_VDQ_BHSI (BINOP, uminp, 0) -- BUILTIN_VDQF (BINOP, smaxp, 0) -- BUILTIN_VDQF (BINOP, sminp, 0) -- BUILTIN_VDQF (BINOP, smax_nanp, 0) -- BUILTIN_VDQF (BINOP, smin_nanp, 0) -+ BUILTIN_VHSDF (BINOP, smaxp, 0) -+ BUILTIN_VHSDF (BINOP, sminp, 0) -+ BUILTIN_VHSDF (BINOP, smax_nanp, 0) -+ BUILTIN_VHSDF (BINOP, smin_nanp, 0) - - /* Implemented by <frint_pattern><mode>2. */ -- BUILTIN_VDQF (UNOP, btrunc, 2) -- BUILTIN_VDQF (UNOP, ceil, 2) -- BUILTIN_VDQF (UNOP, floor, 2) -- BUILTIN_VDQF (UNOP, nearbyint, 2) -- BUILTIN_VDQF (UNOP, rint, 2) -- BUILTIN_VDQF (UNOP, round, 2) -- BUILTIN_VDQF_DF (UNOP, frintn, 2) -+ BUILTIN_VHSDF (UNOP, btrunc, 2) -+ BUILTIN_VHSDF (UNOP, ceil, 2) -+ BUILTIN_VHSDF (UNOP, floor, 2) -+ BUILTIN_VHSDF (UNOP, nearbyint, 2) -+ BUILTIN_VHSDF (UNOP, rint, 2) -+ BUILTIN_VHSDF (UNOP, round, 2) -+ BUILTIN_VHSDF_DF (UNOP, frintn, 2) -+ -+ VAR1 (UNOP, btrunc, 2, hf) -+ VAR1 (UNOP, ceil, 2, hf) -+ VAR1 (UNOP, floor, 2, hf) -+ VAR1 (UNOP, frintn, 2, hf) -+ VAR1 (UNOP, nearbyint, 2, hf) -+ VAR1 (UNOP, rint, 2, hf) -+ VAR1 (UNOP, round, 2, hf) - - /* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2. */ -+ VAR1 (UNOP, lbtruncv4hf, 2, v4hi) -+ VAR1 (UNOP, lbtruncv8hf, 2, v8hi) - VAR1 (UNOP, lbtruncv2sf, 2, v2si) - VAR1 (UNOP, lbtruncv4sf, 2, v4si) - VAR1 (UNOP, lbtruncv2df, 2, v2di) - -+ VAR1 (UNOPUS, lbtruncuv4hf, 2, v4hi) -+ VAR1 (UNOPUS, lbtruncuv8hf, 2, v8hi) - VAR1 (UNOPUS, lbtruncuv2sf, 2, v2si) - VAR1 (UNOPUS, lbtruncuv4sf, 2, v4si) - VAR1 (UNOPUS, lbtruncuv2df, 2, v2di) - -+ VAR1 (UNOP, lroundv4hf, 2, v4hi) -+ VAR1 (UNOP, lroundv8hf, 2, v8hi) - VAR1 (UNOP, lroundv2sf, 2, v2si) - VAR1 (UNOP, lroundv4sf, 2, v4si) - VAR1 (UNOP, lroundv2df, 2, v2di) -- /* Implemented by l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2. */ -+ /* Implemented by l<fcvt_pattern><su_optab><GPF_F16:mode><GPI:mode>2. */ -+ BUILTIN_GPI_I16 (UNOP, lroundhf, 2) - VAR1 (UNOP, lroundsf, 2, si) - VAR1 (UNOP, lrounddf, 2, di) - -+ VAR1 (UNOPUS, lrounduv4hf, 2, v4hi) -+ VAR1 (UNOPUS, lrounduv8hf, 2, v8hi) - VAR1 (UNOPUS, lrounduv2sf, 2, v2si) - VAR1 (UNOPUS, lrounduv4sf, 2, v4si) - VAR1 (UNOPUS, lrounduv2df, 2, v2di) -+ BUILTIN_GPI_I16 (UNOPUS, lrounduhf, 2) - VAR1 (UNOPUS, lroundusf, 2, si) - VAR1 (UNOPUS, lroundudf, 2, di) - -+ VAR1 (UNOP, lceilv4hf, 2, v4hi) -+ VAR1 (UNOP, lceilv8hf, 2, v8hi) - VAR1 (UNOP, lceilv2sf, 2, v2si) - VAR1 (UNOP, lceilv4sf, 2, v4si) - VAR1 (UNOP, lceilv2df, 2, v2di) -+ BUILTIN_GPI_I16 (UNOP, lceilhf, 2) - -+ VAR1 (UNOPUS, lceiluv4hf, 2, v4hi) -+ VAR1 (UNOPUS, lceiluv8hf, 2, v8hi) - VAR1 (UNOPUS, lceiluv2sf, 2, v2si) - VAR1 (UNOPUS, lceiluv4sf, 2, v4si) - VAR1 (UNOPUS, lceiluv2df, 2, v2di) -+ BUILTIN_GPI_I16 (UNOPUS, lceiluhf, 2) - VAR1 (UNOPUS, lceilusf, 2, si) - VAR1 (UNOPUS, lceiludf, 2, di) - -+ VAR1 (UNOP, lfloorv4hf, 2, v4hi) -+ VAR1 (UNOP, lfloorv8hf, 2, v8hi) - VAR1 (UNOP, lfloorv2sf, 2, v2si) - VAR1 (UNOP, lfloorv4sf, 2, v4si) - VAR1 (UNOP, lfloorv2df, 2, v2di) -+ BUILTIN_GPI_I16 (UNOP, lfloorhf, 2) - -+ VAR1 (UNOPUS, lflooruv4hf, 2, v4hi) -+ VAR1 (UNOPUS, lflooruv8hf, 2, v8hi) - VAR1 (UNOPUS, lflooruv2sf, 2, v2si) - VAR1 (UNOPUS, lflooruv4sf, 2, v4si) - VAR1 (UNOPUS, lflooruv2df, 2, v2di) -+ BUILTIN_GPI_I16 (UNOPUS, lflooruhf, 2) - VAR1 (UNOPUS, lfloorusf, 2, si) - VAR1 (UNOPUS, lfloorudf, 2, di) - -+ VAR1 (UNOP, lfrintnv4hf, 2, v4hi) -+ VAR1 (UNOP, lfrintnv8hf, 2, v8hi) - VAR1 (UNOP, lfrintnv2sf, 2, v2si) - VAR1 (UNOP, lfrintnv4sf, 2, v4si) - VAR1 (UNOP, lfrintnv2df, 2, v2di) -+ BUILTIN_GPI_I16 (UNOP, lfrintnhf, 2) - VAR1 (UNOP, lfrintnsf, 2, si) - VAR1 (UNOP, lfrintndf, 2, di) - -+ VAR1 (UNOPUS, lfrintnuv4hf, 2, v4hi) -+ VAR1 (UNOPUS, lfrintnuv8hf, 2, v8hi) - VAR1 (UNOPUS, lfrintnuv2sf, 2, v2si) - VAR1 (UNOPUS, lfrintnuv4sf, 2, v4si) - VAR1 (UNOPUS, lfrintnuv2df, 2, v2di) -+ BUILTIN_GPI_I16 (UNOPUS, lfrintnuhf, 2) - VAR1 (UNOPUS, lfrintnusf, 2, si) - VAR1 (UNOPUS, lfrintnudf, 2, di) - - /* Implemented by <optab><fcvt_target><VDQF:mode>2. */ -+ VAR1 (UNOP, floatv4hi, 2, v4hf) -+ VAR1 (UNOP, floatv8hi, 2, v8hf) - VAR1 (UNOP, floatv2si, 2, v2sf) - VAR1 (UNOP, floatv4si, 2, v4sf) - VAR1 (UNOP, floatv2di, 2, v2df) - -+ VAR1 (UNOP, floatunsv4hi, 2, v4hf) -+ VAR1 (UNOP, floatunsv8hi, 2, v8hf) - VAR1 (UNOP, floatunsv2si, 2, v2sf) - VAR1 (UNOP, floatunsv4si, 2, v4sf) - VAR1 (UNOP, floatunsv2di, 2, v2df) -@@ -352,19 +403,19 @@ - - /* Implemented by - aarch64_frecp<FRECP:frecp_suffix><mode>. */ -- BUILTIN_GPF (UNOP, frecpe, 0) -- BUILTIN_GPF (BINOP, frecps, 0) -- BUILTIN_GPF (UNOP, frecpx, 0) -+ BUILTIN_GPF_F16 (UNOP, frecpe, 0) -+ BUILTIN_GPF_F16 (UNOP, frecpx, 0) - - BUILTIN_VDQ_SI (UNOP, urecpe, 0) - -- BUILTIN_VDQF (UNOP, frecpe, 0) -- BUILTIN_VDQF (BINOP, frecps, 0) -+ BUILTIN_VHSDF (UNOP, frecpe, 0) -+ BUILTIN_VHSDF_HSDF (BINOP, frecps, 0) - - /* Implemented by a mixture of abs2 patterns. Note the DImode builtin is - only ever used for the int64x1_t intrinsic, there is no scalar version. */ - BUILTIN_VSDQ_I_DI (UNOP, abs, 0) -- BUILTIN_VDQF (UNOP, abs, 2) -+ BUILTIN_VHSDF (UNOP, abs, 2) -+ VAR1 (UNOP, abs, 2, hf) - - BUILTIN_VQ_HSF (UNOP, vec_unpacks_hi_, 10) - VAR1 (BINOP, float_truncate_hi_, 0, v4sf) -@@ -376,15 +427,22 @@ - - /* Implemented by aarch64_ld1<VALL_F16:mode>. */ - BUILTIN_VALL_F16 (LOAD1, ld1, 0) -+ VAR1(STORE1P, ld1, 0, v2di) - - /* Implemented by aarch64_st1<VALL_F16:mode>. */ - BUILTIN_VALL_F16 (STORE1, st1, 0) -+ VAR1(STORE1P, st1, 0, v2di) - - /* Implemented by fma<mode>4. */ -- BUILTIN_VDQF (TERNOP, fma, 4) -+ BUILTIN_VHSDF (TERNOP, fma, 4) -+ VAR1 (TERNOP, fma, 4, hf) -+ /* Implemented by fnma<mode>4. */ -+ BUILTIN_VHSDF (TERNOP, fnma, 4) -+ VAR1 (TERNOP, fnma, 4, hf) - - /* Implemented by aarch64_simd_bsl<mode>. */ - BUILTIN_VDQQH (BSL_P, simd_bsl, 0) -+ VAR2 (BSL_P, simd_bsl,0, di, v2di) - BUILTIN_VSDQ_I_DI (BSL_U, simd_bsl, 0) - BUILTIN_VALLDIF (BSL_S, simd_bsl, 0) - -@@ -436,7 +494,7 @@ - VAR1 (TERNOP, qtbx4, 0, v8qi) - VAR1 (TERNOP, qtbx4, 0, v16qi) - -- /* Builtins for ARMv8.1 Adv.SIMD instructions. */ -+ /* Builtins for ARMv8.1-A Adv.SIMD instructions. */ - - /* Implemented by aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>. */ - BUILTIN_VSDQ_HSI (TERNOP, sqrdmlah, 0) -@@ -449,3 +507,60 @@ - /* Implemented by aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>. */ - BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlah_laneq, 0) - BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_laneq, 0) -+ -+ /* Implemented by <FCVT_F2FIXED/FIXED2F:fcvt_fixed_insn><*><*>3. */ -+ BUILTIN_VSDQ_HSDI (SHIFTIMM, scvtf, 3) -+ BUILTIN_VSDQ_HSDI (FCVTIMM_SUS, ucvtf, 3) -+ BUILTIN_VHSDF_HSDF (SHIFTIMM, fcvtzs, 3) -+ BUILTIN_VHSDF_HSDF (SHIFTIMM_USS, fcvtzu, 3) -+ VAR1 (SHIFTIMM, scvtfsi, 3, hf) -+ VAR1 (SHIFTIMM, scvtfdi, 3, hf) -+ VAR1 (FCVTIMM_SUS, ucvtfsi, 3, hf) -+ VAR1 (FCVTIMM_SUS, ucvtfdi, 3, hf) -+ BUILTIN_GPI (SHIFTIMM, fcvtzshf, 3) -+ BUILTIN_GPI (SHIFTIMM_USS, fcvtzuhf, 3) -+ -+ /* Implemented by aarch64_rsqrte<mode>. */ -+ BUILTIN_VHSDF_HSDF (UNOP, rsqrte, 0) -+ -+ /* Implemented by aarch64_rsqrts<mode>. */ -+ BUILTIN_VHSDF_HSDF (BINOP, rsqrts, 0) -+ -+ /* Implemented by fabd<mode>3. */ -+ BUILTIN_VHSDF_HSDF (BINOP, fabd, 3) -+ -+ /* Implemented by aarch64_faddp<mode>. */ -+ BUILTIN_VHSDF (BINOP, faddp, 0) -+ -+ /* Implemented by aarch64_cm<optab><mode>. */ -+ BUILTIN_VHSDF_HSDF (BINOP_USS, cmeq, 0) -+ BUILTIN_VHSDF_HSDF (BINOP_USS, cmge, 0) -+ BUILTIN_VHSDF_HSDF (BINOP_USS, cmgt, 0) -+ BUILTIN_VHSDF_HSDF (BINOP_USS, cmle, 0) -+ BUILTIN_VHSDF_HSDF (BINOP_USS, cmlt, 0) -+ -+ /* Implemented by neg<mode>2. */ -+ BUILTIN_VHSDF_HSDF (UNOP, neg, 2) -+ -+ /* Implemented by aarch64_fac<optab><mode>. */ -+ BUILTIN_VHSDF_HSDF (BINOP_USS, faclt, 0) -+ BUILTIN_VHSDF_HSDF (BINOP_USS, facle, 0) -+ BUILTIN_VHSDF_HSDF (BINOP_USS, facgt, 0) -+ BUILTIN_VHSDF_HSDF (BINOP_USS, facge, 0) -+ -+ /* Implemented by sqrt<mode>2. */ -+ VAR1 (UNOP, sqrt, 2, hf) -+ -+ /* Implemented by <optab><mode>hf2. */ -+ VAR1 (UNOP, floatdi, 2, hf) -+ VAR1 (UNOP, floatsi, 2, hf) -+ VAR1 (UNOP, floathi, 2, hf) -+ VAR1 (UNOPUS, floatunsdi, 2, hf) -+ VAR1 (UNOPUS, floatunssi, 2, hf) -+ VAR1 (UNOPUS, floatunshi, 2, hf) -+ BUILTIN_GPI_I16 (UNOP, fix_trunchf, 2) -+ BUILTIN_GPI (UNOP, fix_truncsf, 2) -+ BUILTIN_GPI (UNOP, fix_truncdf, 2) -+ BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2) -+ BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2) -+ BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2) -\ No newline at end of file ---- a/src/gcc/config/aarch64/aarch64-simd.md -+++ b/src/gcc/config/aarch64/aarch64-simd.md -@@ -351,7 +351,7 @@ - operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); - return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; - } -- [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")] -+ [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] - ) - - (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>" -@@ -371,33 +371,33 @@ - [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")] - ) - --(define_insn "*aarch64_mul3_elt_to_128df" -- [(set (match_operand:V2DF 0 "register_operand" "=w") -- (mult:V2DF -- (vec_duplicate:V2DF -- (match_operand:DF 2 "register_operand" "w")) -- (match_operand:V2DF 1 "register_operand" "w")))] -+(define_insn "*aarch64_mul3_elt_from_dup<mode>" -+ [(set (match_operand:VMUL 0 "register_operand" "=w") -+ (mult:VMUL -+ (vec_duplicate:VMUL -+ (match_operand:<VEL> 1 "register_operand" "<h_con>")) -+ (match_operand:VMUL 2 "register_operand" "w")))] - "TARGET_SIMD" -- "fmul\\t%0.2d, %1.2d, %2.d[0]" -- [(set_attr "type" "neon_fp_mul_d_scalar_q")] -+ "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"; -+ [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] - ) - --(define_insn "aarch64_rsqrte_<mode>2" -- [(set (match_operand:VALLF 0 "register_operand" "=w") -- (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")] -+(define_insn "aarch64_rsqrte<mode>" -+ [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") -+ (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")] - UNSPEC_RSQRTE))] - "TARGET_SIMD" - "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>" -- [(set_attr "type" "neon_fp_rsqrte_<Vetype><q>")]) -+ [(set_attr "type" "neon_fp_rsqrte_<stype><q>")]) - --(define_insn "aarch64_rsqrts_<mode>3" -- [(set (match_operand:VALLF 0 "register_operand" "=w") -- (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w") -- (match_operand:VALLF 2 "register_operand" "w")] -- UNSPEC_RSQRTS))] -+(define_insn "aarch64_rsqrts<mode>" -+ [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") -+ (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w") -+ (match_operand:VHSDF_HSDF 2 "register_operand" "w")] -+ UNSPEC_RSQRTS))] - "TARGET_SIMD" - "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" -- [(set_attr "type" "neon_fp_rsqrts_<Vetype><q>")]) -+ [(set_attr "type" "neon_fp_rsqrts_<stype><q>")]) - - (define_expand "rsqrt<mode>2" - [(set (match_operand:VALLF 0 "register_operand" "=w") -@@ -405,7 +405,7 @@ - UNSPEC_RSQRT))] - "TARGET_SIMD" - { -- aarch64_emit_approx_rsqrt (operands[0], operands[1]); -+ aarch64_emit_approx_sqrt (operands[0], operands[1], true); - DONE; - }) - -@@ -474,24 +474,15 @@ - [(set_attr "type" "neon_arith_acc<q>")] - ) - --(define_insn "fabd<mode>_3" -- [(set (match_operand:VDQF 0 "register_operand" "=w") -- (abs:VDQF (minus:VDQF -- (match_operand:VDQF 1 "register_operand" "w") -- (match_operand:VDQF 2 "register_operand" "w"))))] -- "TARGET_SIMD" -- "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" -- [(set_attr "type" "neon_fp_abd_<Vetype><q>")] --) -- --(define_insn "*fabd_scalar<mode>3" -- [(set (match_operand:GPF 0 "register_operand" "=w") -- (abs:GPF (minus:GPF -- (match_operand:GPF 1 "register_operand" "w") -- (match_operand:GPF 2 "register_operand" "w"))))] -+(define_insn "fabd<mode>3" -+ [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") -+ (abs:VHSDF_HSDF -+ (minus:VHSDF_HSDF -+ (match_operand:VHSDF_HSDF 1 "register_operand" "w") -+ (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))] - "TARGET_SIMD" -- "fabd\t%<s>0, %<s>1, %<s>2" -- [(set_attr "type" "neon_fp_abd_<Vetype><q>")] -+ "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" -+ [(set_attr "type" "neon_fp_abd_<stype><q>")] - ) - - (define_insn "and<mode>3" -@@ -555,6 +546,49 @@ - [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_1reg<q>")] - ) - -+(define_insn "*aarch64_simd_vec_copy_lane<mode>" -+ [(set (match_operand:VALL 0 "register_operand" "=w") -+ (vec_merge:VALL -+ (vec_duplicate:VALL -+ (vec_select:<VEL> -+ (match_operand:VALL 3 "register_operand" "w") -+ (parallel -+ [(match_operand:SI 4 "immediate_operand" "i")]))) -+ (match_operand:VALL 1 "register_operand" "0") -+ (match_operand:SI 2 "immediate_operand" "i")))] -+ "TARGET_SIMD" -+ { -+ int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2]))); -+ operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt); -+ operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4]))); -+ -+ return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]"; -+ } -+ [(set_attr "type" "neon_ins<q>")] -+) -+ -+(define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>" -+ [(set (match_operand:VALL 0 "register_operand" "=w") -+ (vec_merge:VALL -+ (vec_duplicate:VALL -+ (vec_select:<VEL> -+ (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w") -+ (parallel -+ [(match_operand:SI 4 "immediate_operand" "i")]))) -+ (match_operand:VALL 1 "register_operand" "0") -+ (match_operand:SI 2 "immediate_operand" "i")))] -+ "TARGET_SIMD" -+ { -+ int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2]))); -+ operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt); -+ operands[4] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, -+ INTVAL (operands[4]))); -+ -+ return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]"; -+ } -+ [(set_attr "type" "neon_ins<q>")] -+) -+ - (define_insn "aarch64_simd_lshr<mode>" - [(set (match_operand:VDQ_I 0 "register_operand" "=w") - (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") -@@ -1071,10 +1105,10 @@ - - ;; Pairwise FP Max/Min operations. - (define_insn "aarch64_<maxmin_uns>p<mode>" -- [(set (match_operand:VDQF 0 "register_operand" "=w") -- (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w") -- (match_operand:VDQF 2 "register_operand" "w")] -- FMAXMINV))] -+ [(set (match_operand:VHSDF 0 "register_operand" "=w") -+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") -+ (match_operand:VHSDF 2 "register_operand" "w")] -+ FMAXMINV))] - "TARGET_SIMD" - "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" - [(set_attr "type" "neon_minmax<q>")] -@@ -1483,65 +1517,77 @@ - ;; FP arithmetic operations. - - (define_insn "add<mode>3" -- [(set (match_operand:VDQF 0 "register_operand" "=w") -- (plus:VDQF (match_operand:VDQF 1 "register_operand" "w") -- (match_operand:VDQF 2 "register_operand" "w")))] -+ [(set (match_operand:VHSDF 0 "register_operand" "=w") -+ (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w") -+ (match_operand:VHSDF 2 "register_operand" "w")))] - "TARGET_SIMD" - "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" -- [(set_attr "type" "neon_fp_addsub_<Vetype><q>")] -+ [(set_attr "type" "neon_fp_addsub_<stype><q>")] - ) - - (define_insn "sub<mode>3" -- [(set (match_operand:VDQF 0 "register_operand" "=w") -- (minus:VDQF (match_operand:VDQF 1 "register_operand" "w") -- (match_operand:VDQF 2 "register_operand" "w")))] -+ [(set (match_operand:VHSDF 0 "register_operand" "=w") -+ (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w") -+ (match_operand:VHSDF 2 "register_operand" "w")))] - "TARGET_SIMD" - "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" -- [(set_attr "type" "neon_fp_addsub_<Vetype><q>")] -+ [(set_attr "type" "neon_fp_addsub_<stype><q>")] - ) - - (define_insn "mul<mode>3" -- [(set (match_operand:VDQF 0 "register_operand" "=w") -- (mult:VDQF (match_operand:VDQF 1 "register_operand" "w") -- (match_operand:VDQF 2 "register_operand" "w")))] -+ [(set (match_operand:VHSDF 0 "register_operand" "=w") -+ (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w") -+ (match_operand:VHSDF 2 "register_operand" "w")))] - "TARGET_SIMD" - "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" -- [(set_attr "type" "neon_fp_mul_<Vetype><q>")] -+ [(set_attr "type" "neon_fp_mul_<stype><q>")] - ) - --(define_insn "div<mode>3" -- [(set (match_operand:VDQF 0 "register_operand" "=w") -- (div:VDQF (match_operand:VDQF 1 "register_operand" "w") -- (match_operand:VDQF 2 "register_operand" "w")))] -+(define_expand "div<mode>3" -+ [(set (match_operand:VHSDF 0 "register_operand" "=w") -+ (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w") -+ (match_operand:VHSDF 2 "register_operand" "w")))] -+ "TARGET_SIMD" -+{ -+ if (aarch64_emit_approx_div (operands[0], operands[1], operands[2])) -+ DONE; -+ -+ operands[1] = force_reg (<MODE>mode, operands[1]); -+}) -+ -+(define_insn "*div<mode>3" -+ [(set (match_operand:VHSDF 0 "register_operand" "=w") -+ (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w") -+ (match_operand:VHSDF 2 "register_operand" "w")))] - "TARGET_SIMD" - "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" -- [(set_attr "type" "neon_fp_div_<Vetype><q>")] -+ [(set_attr "type" "neon_fp_div_<stype><q>")] - ) - - (define_insn "neg<mode>2" -- [(set (match_operand:VDQF 0 "register_operand" "=w") -- (neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))] -+ [(set (match_operand:VHSDF 0 "register_operand" "=w") -+ (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] - "TARGET_SIMD" - "fneg\\t%0.<Vtype>, %1.<Vtype>" -- [(set_attr "type" "neon_fp_neg_<Vetype><q>")] -+ [(set_attr "type" "neon_fp_neg_<stype><q>")] - ) - - (define_insn "abs<mode>2" -- [(set (match_operand:VDQF 0 "register_operand" "=w") -- (abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))] -+ [(set (match_operand:VHSDF 0 "register_operand" "=w") -+ (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] - "TARGET_SIMD" - "fabs\\t%0.<Vtype>, %1.<Vtype>" -- [(set_attr "type" "neon_fp_abs_<Vetype><q>")] -+ [(set_attr "type" "neon_fp_abs_<stype><q>")] - ) - - (define_insn "fma<mode>4" -- [(set (match_operand:VDQF 0 "register_operand" "=w") -- (fma:VDQF (match_operand:VDQF 1 "register_operand" "w") -- (match_operand:VDQF 2 "register_operand" "w") -- (match_operand:VDQF 3 "register_operand" "0")))] -+ [(set (match_operand:VHSDF 0 "register_operand" "=w") -+ (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w") -+ (match_operand:VHSDF 2 "register_operand" "w") -+ (match_operand:VHSDF 3 "register_operand" "0")))] - "TARGET_SIMD" - "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" -- [(set_attr "type" "neon_fp_mla_<Vetype><q>")] -+ [(set_attr "type" "neon_fp_mla_<stype><q>")] - ) - - (define_insn "*aarch64_fma4_elt<mode>" -@@ -1579,16 +1625,16 @@ - [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] - ) - --(define_insn "*aarch64_fma4_elt_to_128df" -- [(set (match_operand:V2DF 0 "register_operand" "=w") -- (fma:V2DF -- (vec_duplicate:V2DF -- (match_operand:DF 1 "register_operand" "w")) -- (match_operand:V2DF 2 "register_operand" "w") -- (match_operand:V2DF 3 "register_operand" "0")))] -+(define_insn "*aarch64_fma4_elt_from_dup<mode>" -+ [(set (match_operand:VMUL 0 "register_operand" "=w") -+ (fma:VMUL -+ (vec_duplicate:VMUL -+ (match_operand:<VEL> 1 "register_operand" "w")) -+ (match_operand:VMUL 2 "register_operand" "w") -+ (match_operand:VMUL 3 "register_operand" "0")))] - "TARGET_SIMD" -- "fmla\\t%0.2d, %2.2d, %1.2d[0]" -- [(set_attr "type" "neon_fp_mla_d_scalar_q")] -+ "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]" -+ [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")] - ) - - (define_insn "*aarch64_fma4_elt_to_64v2df" -@@ -1608,15 +1654,15 @@ - ) - - (define_insn "fnma<mode>4" -- [(set (match_operand:VDQF 0 "register_operand" "=w") -- (fma:VDQF -- (match_operand:VDQF 1 "register_operand" "w") -- (neg:VDQF -- (match_operand:VDQF 2 "register_operand" "w")) -- (match_operand:VDQF 3 "register_operand" "0")))] -+ [(set (match_operand:VHSDF 0 "register_operand" "=w") -+ (fma:VHSDF -+ (match_operand:VHSDF 1 "register_operand" "w") -+ (neg:VHSDF -+ (match_operand:VHSDF 2 "register_operand" "w")) -+ (match_operand:VHSDF 3 "register_operand" "0")))] - "TARGET_SIMD" -- "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" -- [(set_attr "type" "neon_fp_mla_<Vetype><q>")] -+ "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" -+ [(set_attr "type" "neon_fp_mla_<stype><q>")] - ) - - (define_insn "*aarch64_fnma4_elt<mode>" -@@ -1656,17 +1702,17 @@ - [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] - ) - --(define_insn "*aarch64_fnma4_elt_to_128df" -- [(set (match_operand:V2DF 0 "register_operand" "=w") -- (fma:V2DF -- (neg:V2DF -- (match_operand:V2DF 2 "register_operand" "w")) -- (vec_duplicate:V2DF -- (match_operand:DF 1 "register_operand" "w")) -- (match_operand:V2DF 3 "register_operand" "0")))] -+(define_insn "*aarch64_fnma4_elt_from_dup<mode>" -+ [(set (match_operand:VMUL 0 "register_operand" "=w") -+ (fma:VMUL -+ (neg:VMUL -+ (match_operand:VMUL 2 "register_operand" "w")) -+ (vec_duplicate:VMUL -+ (match_operand:<VEL> 1 "register_operand" "w")) -+ (match_operand:VMUL 3 "register_operand" "0")))] - "TARGET_SIMD" -- "fmls\\t%0.2d, %2.2d, %1.2d[0]" -- [(set_attr "type" "neon_fp_mla_d_scalar_q")] -+ "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]" -+ [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")] - ) - - (define_insn "*aarch64_fnma4_elt_to_64v2df" -@@ -1689,24 +1735,50 @@ - ;; Vector versions of the floating-point frint patterns. - ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. - (define_insn "<frint_pattern><mode>2" -- [(set (match_operand:VDQF 0 "register_operand" "=w") -- (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] -- FRINT))] -+ [(set (match_operand:VHSDF 0 "register_operand" "=w") -+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")] -+ FRINT))] - "TARGET_SIMD" - "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>" -- [(set_attr "type" "neon_fp_round_<Vetype><q>")] -+ [(set_attr "type" "neon_fp_round_<stype><q>")] - ) - - ;; Vector versions of the fcvt standard patterns. - ;; Expands to lbtrunc, lround, lceil, lfloor --(define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2" -+(define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2" - [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w") - (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> -- [(match_operand:VDQF 1 "register_operand" "w")] -+ [(match_operand:VHSDF 1 "register_operand" "w")] - FCVT)))] - "TARGET_SIMD" - "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>" -- [(set_attr "type" "neon_fp_to_int_<Vetype><q>")] -+ [(set_attr "type" "neon_fp_to_int_<stype><q>")] -+) -+ -+;; HF Scalar variants of related SIMD instructions. -+(define_insn "l<fcvt_pattern><su_optab>hfhi2" -+ [(set (match_operand:HI 0 "register_operand" "=w") -+ (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")] -+ FCVT)))] -+ "TARGET_SIMD_F16INST" -+ "fcvt<frint_suffix><su>\t%h0, %h1" -+ [(set_attr "type" "neon_fp_to_int_s")] -+) -+ -+(define_insn "<optab>_trunchfhi2" -+ [(set (match_operand:HI 0 "register_operand" "=w") -+ (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))] -+ "TARGET_SIMD_F16INST" -+ "fcvtz<su>\t%h0, %h1" -+ [(set_attr "type" "neon_fp_to_int_s")] -+) -+ -+(define_insn "<optab>hihf2" -+ [(set (match_operand:HF 0 "register_operand" "=w") -+ (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))] -+ "TARGET_SIMD_F16INST" -+ "<su_optab>cvtf\t%h0, %h1" -+ [(set_attr "type" "neon_int_to_fp_s")] - ) - - (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult" -@@ -1729,36 +1801,36 @@ - [(set_attr "type" "neon_fp_to_int_<Vetype><q>")] - ) - --(define_expand "<optab><VDQF:mode><fcvt_target>2" -+(define_expand "<optab><VHSDF:mode><fcvt_target>2" - [(set (match_operand:<FCVT_TARGET> 0 "register_operand") - (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> -- [(match_operand:VDQF 1 "register_operand")] -- UNSPEC_FRINTZ)))] -+ [(match_operand:VHSDF 1 "register_operand")] -+ UNSPEC_FRINTZ)))] - "TARGET_SIMD" - {}) - --(define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2" -+(define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2" - [(set (match_operand:<FCVT_TARGET> 0 "register_operand") - (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> -- [(match_operand:VDQF 1 "register_operand")] -- UNSPEC_FRINTZ)))] -+ [(match_operand:VHSDF 1 "register_operand")] -+ UNSPEC_FRINTZ)))] - "TARGET_SIMD" - {}) - --(define_expand "ftrunc<VDQF:mode>2" -- [(set (match_operand:VDQF 0 "register_operand") -- (unspec:VDQF [(match_operand:VDQF 1 "register_operand")] -- UNSPEC_FRINTZ))] -+(define_expand "ftrunc<VHSDF:mode>2" -+ [(set (match_operand:VHSDF 0 "register_operand") -+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")] -+ UNSPEC_FRINTZ))] - "TARGET_SIMD" - {}) - --(define_insn "<optab><fcvt_target><VDQF:mode>2" -- [(set (match_operand:VDQF 0 "register_operand" "=w") -- (FLOATUORS:VDQF -+(define_insn "<optab><fcvt_target><VHSDF:mode>2" -+ [(set (match_operand:VHSDF 0 "register_operand" "=w") -+ (FLOATUORS:VHSDF - (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))] - "TARGET_SIMD" - "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>" -- [(set_attr "type" "neon_int_to_fp_<Vetype><q>")] -+ [(set_attr "type" "neon_int_to_fp_<stype><q>")] - ) - - ;; Conversions between vectors of floats and doubles. -@@ -1778,6 +1850,30 @@ - [(set_attr "type" "neon_fp_cvt_widen_s")] - ) - -+;; Convert between fixed-point and floating-point (vector modes) -+ -+(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3" -+ [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w") -+ (unspec:<VHSDF:FCVT_TARGET> -+ [(match_operand:VHSDF 1 "register_operand" "w") -+ (match_operand:SI 2 "immediate_operand" "i")] -+ FCVT_F2FIXED))] -+ "TARGET_SIMD" -+ "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2" -+ [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")] -+) -+ -+(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3" -+ [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w") -+ (unspec:<VDQ_HSDI:FCVT_TARGET> -+ [(match_operand:VDQ_HSDI 1 "register_operand" "w") -+ (match_operand:SI 2 "immediate_operand" "i")] -+ FCVT_FIXED2F))] -+ "TARGET_SIMD" -+ "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2" -+ [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")] -+) -+ - ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns - ;; is inconsistent with vector ordering elsewhere in the compiler, in that - ;; the meaning of HI and LO changes depending on the target endianness. -@@ -1934,33 +2030,25 @@ - ;; NaNs. - - (define_insn "<su><maxmin><mode>3" -- [(set (match_operand:VDQF 0 "register_operand" "=w") -- (FMAXMIN:VDQF (match_operand:VDQF 1 "register_operand" "w") -- (match_operand:VDQF 2 "register_operand" "w")))] -+ [(set (match_operand:VHSDF 0 "register_operand" "=w") -+ (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w") -+ (match_operand:VHSDF 2 "register_operand" "w")))] - "TARGET_SIMD" - "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" -- [(set_attr "type" "neon_fp_minmax_<Vetype><q>")] -+ [(set_attr "type" "neon_fp_minmax_<stype><q>")] - ) - -+;; Vector forms for fmax, fmin, fmaxnm, fminnm. -+;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names, -+;; which implement the IEEE fmax ()/fmin () functions. - (define_insn "<maxmin_uns><mode>3" -- [(set (match_operand:VDQF 0 "register_operand" "=w") -- (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w") -- (match_operand:VDQF 2 "register_operand" "w")] -- FMAXMIN_UNS))] -+ [(set (match_operand:VHSDF 0 "register_operand" "=w") -+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") -+ (match_operand:VHSDF 2 "register_operand" "w")] -+ FMAXMIN_UNS))] - "TARGET_SIMD" - "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" -- [(set_attr "type" "neon_fp_minmax_<Vetype><q>")] --) -- --;; Auto-vectorized forms for the IEEE-754 fmax()/fmin() functions --(define_insn "<fmaxmin><mode>3" -- [(set (match_operand:VDQF 0 "register_operand" "=w") -- (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w") -- (match_operand:VDQF 2 "register_operand" "w")] -- FMAXMIN))] -- "TARGET_SIMD" -- "<fmaxmin_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" -- [(set_attr "type" "neon_fp_minmax_<Vetype><q>")] -+ [(set_attr "type" "neon_fp_minmax_<stype><q>")] - ) - - ;; 'across lanes' add. -@@ -1979,17 +2067,14 @@ - } - ) - --(define_expand "reduc_plus_scal_<mode>" -- [(match_operand:<VEL> 0 "register_operand" "=w") -- (match_operand:V2F 1 "register_operand" "w")] -- "TARGET_SIMD" -- { -- rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0)); -- rtx scratch = gen_reg_rtx (<MODE>mode); -- emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1])); -- emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); -- DONE; -- } -+(define_insn "aarch64_faddp<mode>" -+ [(set (match_operand:VHSDF 0 "register_operand" "=w") -+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") -+ (match_operand:VHSDF 2 "register_operand" "w")] -+ UNSPEC_FADDV))] -+ "TARGET_SIMD" -+ "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" -+ [(set_attr "type" "neon_fp_reduc_add_<stype><q>")] - ) - - (define_insn "aarch64_reduc_plus_internal<mode>" -@@ -2010,24 +2095,15 @@ - [(set_attr "type" "neon_reduc_add")] - ) - --(define_insn "aarch64_reduc_plus_internal<mode>" -- [(set (match_operand:V2F 0 "register_operand" "=w") -- (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")] -+(define_insn "reduc_plus_scal_<mode>" -+ [(set (match_operand:<VEL> 0 "register_operand" "=w") -+ (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")] - UNSPEC_FADDV))] - "TARGET_SIMD" - "faddp\\t%<Vetype>0, %1.<Vtype>" - [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")] - ) - --(define_insn "aarch64_addpv4sf" -- [(set (match_operand:V4SF 0 "register_operand" "=w") -- (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")] -- UNSPEC_FADDV))] -- "TARGET_SIMD" -- "faddp\\t%0.4s, %1.4s, %1.4s" -- [(set_attr "type" "neon_fp_reduc_add_s_q")] --) -- - (define_expand "reduc_plus_scal_v4sf" - [(set (match_operand:SF 0 "register_operand") - (unspec:V4SF [(match_operand:V4SF 1 "register_operand")] -@@ -2036,8 +2112,8 @@ - { - rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0)); - rtx scratch = gen_reg_rtx (V4SFmode); -- emit_insn (gen_aarch64_addpv4sf (scratch, operands[1])); -- emit_insn (gen_aarch64_addpv4sf (scratch, scratch)); -+ emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1])); -+ emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch)); - emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt)); - DONE; - }) -@@ -2072,8 +2148,8 @@ - ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin). - (define_expand "reduc_<maxmin_uns>_scal_<mode>" - [(match_operand:<VEL> 0 "register_operand") -- (unspec:VDQF [(match_operand:VDQF 1 "register_operand")] -- FMAXMINV)] -+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")] -+ FMAXMINV)] - "TARGET_SIMD" - { - rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0)); -@@ -2120,12 +2196,12 @@ - ) - - (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>" -- [(set (match_operand:VDQF 0 "register_operand" "=w") -- (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] -- FMAXMINV))] -+ [(set (match_operand:VHSDF 0 "register_operand" "=w") -+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")] -+ FMAXMINV))] - "TARGET_SIMD" - "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>" -- [(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")] -+ [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")] - ) - - ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register -@@ -2635,7 +2711,7 @@ - (define_insn "*aarch64_combinez<mode>" - [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w") - (vec_concat:<VDBL> -- (match_operand:VD_BHSI 1 "general_operand" "w,r,m") -+ (match_operand:VD_BHSI 1 "general_operand" "w,?r,m") - (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))] - "TARGET_SIMD && !BYTES_BIG_ENDIAN" - "@ -@@ -2651,7 +2727,7 @@ - [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w") - (vec_concat:<VDBL> - (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz") -- (match_operand:VD_BHSI 1 "general_operand" "w,r,m")))] -+ (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))] - "TARGET_SIMD && BYTES_BIG_ENDIAN" - "@ - mov\\t%0.8b, %1.8b -@@ -2994,13 +3070,14 @@ - ;; fmulx. - - (define_insn "aarch64_fmulx<mode>" -- [(set (match_operand:VALLF 0 "register_operand" "=w") -- (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w") -- (match_operand:VALLF 2 "register_operand" "w")] -- UNSPEC_FMULX))] -+ [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") -+ (unspec:VHSDF_HSDF -+ [(match_operand:VHSDF_HSDF 1 "register_operand" "w") -+ (match_operand:VHSDF_HSDF 2 "register_operand" "w")] -+ UNSPEC_FMULX))] - "TARGET_SIMD" - "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" -- [(set_attr "type" "neon_fp_mul_<Vetype>")] -+ [(set_attr "type" "neon_fp_mul_<stype>")] - ) - - ;; vmulxq_lane_f32, and vmulx_laneq_f32 -@@ -3042,20 +3119,18 @@ - [(set_attr "type" "neon_fp_mul_<Vetype><q>")] - ) - --;; vmulxq_lane_f64 -+;; vmulxq_lane - --(define_insn "*aarch64_mulx_elt_to_64v2df" -- [(set (match_operand:V2DF 0 "register_operand" "=w") -- (unspec:V2DF -- [(match_operand:V2DF 1 "register_operand" "w") -- (vec_duplicate:V2DF -- (match_operand:DF 2 "register_operand" "w"))] -+(define_insn "*aarch64_mulx_elt_from_dup<mode>" -+ [(set (match_operand:VHSDF 0 "register_operand" "=w") -+ (unspec:VHSDF -+ [(match_operand:VHSDF 1 "register_operand" "w") -+ (vec_duplicate:VHSDF -+ (match_operand:<VEL> 2 "register_operand" "w"))] - UNSPEC_FMULX))] - "TARGET_SIMD" -- { -- return "fmulx\t%0.2d, %1.2d, %2.d[0]"; -- } -- [(set_attr "type" "neon_fp_mul_d_scalar_q")] -+ "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]"; -+ [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] - ) - - ;; vmulxs_lane_f32, vmulxs_laneq_f32 -@@ -3937,15 +4012,12 @@ - "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] - VSHLL))] - "TARGET_SIMD" -- "* -- int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; -- if (INTVAL (operands[2]) == bit_width) - { -- return \"shll\\t%0.<Vwtype>, %1.<Vtype>, %2\"; -+ if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) -+ return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2"; -+ else -+ return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2"; - } -- else { -- return \"<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2\"; -- }" - [(set_attr "type" "neon_shift_imm_long")] - ) - -@@ -3957,15 +4029,12 @@ - (match_operand:SI 2 "immediate_operand" "i")] - VSHLL))] - "TARGET_SIMD" -- "* -- int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; -- if (INTVAL (operands[2]) == bit_width) - { -- return \"shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\"; -+ if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) -+ return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2"; -+ else -+ return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2"; - } -- else { -- return \"<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\"; -- }" - [(set_attr "type" "neon_shift_imm_long")] - ) - -@@ -4246,30 +4315,32 @@ - [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w") - (neg:<V_cmp_result> - (COMPARISONS:<V_cmp_result> -- (match_operand:VALLF 1 "register_operand" "w,w") -- (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz") -+ (match_operand:VHSDF_HSDF 1 "register_operand" "w,w") -+ (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz") - )))] - "TARGET_SIMD" - "@ - fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype> - fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0" -- [(set_attr "type" "neon_fp_compare_<Vetype><q>")] -+ [(set_attr "type" "neon_fp_compare_<stype><q>")] - ) - - ;; fac(ge|gt) - ;; Note we can also handle what would be fac(le|lt) by - ;; generating fac(ge|gt). - --(define_insn "*aarch64_fac<optab><mode>" -+(define_insn "aarch64_fac<optab><mode>" - [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w") - (neg:<V_cmp_result> - (FAC_COMPARISONS:<V_cmp_result> -- (abs:VALLF (match_operand:VALLF 1 "register_operand" "w")) -- (abs:VALLF (match_operand:VALLF 2 "register_operand" "w")) -+ (abs:VHSDF_HSDF -+ (match_operand:VHSDF_HSDF 1 "register_operand" "w")) -+ (abs:VHSDF_HSDF -+ (match_operand:VHSDF_HSDF 2 "register_operand" "w")) - )))] - "TARGET_SIMD" - "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>" -- [(set_attr "type" "neon_fp_compare_<Vetype><q>")] -+ [(set_attr "type" "neon_fp_compare_<stype><q>")] - ) - - ;; addp -@@ -4297,12 +4368,21 @@ - - ;; sqrt - --(define_insn "sqrt<mode>2" -- [(set (match_operand:VDQF 0 "register_operand" "=w") -- (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))] -+(define_expand "sqrt<mode>2" -+ [(set (match_operand:VHSDF 0 "register_operand" "=w") -+ (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] -+ "TARGET_SIMD" -+{ -+ if (aarch64_emit_approx_sqrt (operands[0], operands[1], false)) -+ DONE; -+}) -+ -+(define_insn "*sqrt<mode>2" -+ [(set (match_operand:VHSDF 0 "register_operand" "=w") -+ (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] - "TARGET_SIMD" - "fsqrt\\t%0.<Vtype>, %1.<Vtype>" -- [(set_attr "type" "neon_fp_sqrt_<Vetype><q>")] -+ [(set_attr "type" "neon_fp_sqrt_<stype><q>")] - ) - - ;; Patterns for vector struct loads and stores. -@@ -4652,7 +4732,7 @@ - ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1" - [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\ - neon_load<nregs>_<nregs>reg_q") -- (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))] -+ (set_attr "length" "<insn_count>,4,4")] - ) - - (define_insn "aarch64_be_ld1<mode>" -@@ -4685,7 +4765,7 @@ - stp\\t%q1, %R1, %0 - ldp\\t%q0, %R0, %1" - [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q") -- (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))] -+ (set_attr "length" "8,4,4")] - ) - - (define_insn "*aarch64_be_movci" -@@ -4696,7 +4776,7 @@ - || register_operand (operands[1], CImode))" - "#" - [(set_attr "type" "multiple") -- (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))] -+ (set_attr "length" "12,4,4")] - ) - - (define_insn "*aarch64_be_movxi" -@@ -4707,7 +4787,7 @@ - || register_operand (operands[1], XImode))" - "#" - [(set_attr "type" "multiple") -- (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))] -+ (set_attr "length" "16,4,4")] - ) - - (define_split -@@ -4787,7 +4867,7 @@ - DONE; - }) - --(define_insn "aarch64_ld2<mode>_dreg" -+(define_insn "aarch64_ld2<mode>_dreg_le" - [(set (match_operand:OI 0 "register_operand" "=w") - (subreg:OI - (vec_concat:<VRL2> -@@ -4800,12 +4880,30 @@ - (unspec:VD [(match_dup 1)] - UNSPEC_LD2) - (vec_duplicate:VD (const_int 0)))) 0))] -- "TARGET_SIMD" -+ "TARGET_SIMD && !BYTES_BIG_ENDIAN" - "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" - [(set_attr "type" "neon_load2_2reg<q>")] - ) - --(define_insn "aarch64_ld2<mode>_dreg" -+(define_insn "aarch64_ld2<mode>_dreg_be" -+ [(set (match_operand:OI 0 "register_operand" "=w") -+ (subreg:OI -+ (vec_concat:<VRL2> -+ (vec_concat:<VDBL> -+ (vec_duplicate:VD (const_int 0)) -+ (unspec:VD -+ [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] -+ UNSPEC_LD2)) -+ (vec_concat:<VDBL> -+ (vec_duplicate:VD (const_int 0)) -+ (unspec:VD [(match_dup 1)] -+ UNSPEC_LD2))) 0))] -+ "TARGET_SIMD && BYTES_BIG_ENDIAN" -+ "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" -+ [(set_attr "type" "neon_load2_2reg<q>")] -+) -+ -+(define_insn "aarch64_ld2<mode>_dreg_le" - [(set (match_operand:OI 0 "register_operand" "=w") - (subreg:OI - (vec_concat:<VRL2> -@@ -4818,12 +4916,30 @@ - (unspec:DX [(match_dup 1)] - UNSPEC_LD2) - (const_int 0))) 0))] -- "TARGET_SIMD" -+ "TARGET_SIMD && !BYTES_BIG_ENDIAN" - "ld1\\t{%S0.1d - %T0.1d}, %1" - [(set_attr "type" "neon_load1_2reg<q>")] - ) - --(define_insn "aarch64_ld3<mode>_dreg" -+(define_insn "aarch64_ld2<mode>_dreg_be" -+ [(set (match_operand:OI 0 "register_operand" "=w") -+ (subreg:OI -+ (vec_concat:<VRL2> -+ (vec_concat:<VDBL> -+ (const_int 0) -+ (unspec:DX -+ [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] -+ UNSPEC_LD2)) -+ (vec_concat:<VDBL> -+ (const_int 0) -+ (unspec:DX [(match_dup 1)] -+ UNSPEC_LD2))) 0))] -+ "TARGET_SIMD && BYTES_BIG_ENDIAN" -+ "ld1\\t{%S0.1d - %T0.1d}, %1" -+ [(set_attr "type" "neon_load1_2reg<q>")] -+) -+ -+(define_insn "aarch64_ld3<mode>_dreg_le" - [(set (match_operand:CI 0 "register_operand" "=w") - (subreg:CI - (vec_concat:<VRL3> -@@ -4841,12 +4957,35 @@ - (unspec:VD [(match_dup 1)] - UNSPEC_LD3) - (vec_duplicate:VD (const_int 0)))) 0))] -- "TARGET_SIMD" -+ "TARGET_SIMD && !BYTES_BIG_ENDIAN" -+ "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" -+ [(set_attr "type" "neon_load3_3reg<q>")] -+) -+ -+(define_insn "aarch64_ld3<mode>_dreg_be" -+ [(set (match_operand:CI 0 "register_operand" "=w") -+ (subreg:CI -+ (vec_concat:<VRL3> -+ (vec_concat:<VRL2> -+ (vec_concat:<VDBL> -+ (vec_duplicate:VD (const_int 0)) -+ (unspec:VD -+ [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] -+ UNSPEC_LD3)) -+ (vec_concat:<VDBL> -+ (vec_duplicate:VD (const_int 0)) -+ (unspec:VD [(match_dup 1)] -+ UNSPEC_LD3))) -+ (vec_concat:<VDBL> -+ (vec_duplicate:VD (const_int 0)) -+ (unspec:VD [(match_dup 1)] -+ UNSPEC_LD3))) 0))] -+ "TARGET_SIMD && BYTES_BIG_ENDIAN" - "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" - [(set_attr "type" "neon_load3_3reg<q>")] - ) - --(define_insn "aarch64_ld3<mode>_dreg" -+(define_insn "aarch64_ld3<mode>_dreg_le" - [(set (match_operand:CI 0 "register_operand" "=w") - (subreg:CI - (vec_concat:<VRL3> -@@ -4864,12 +5003,35 @@ - (unspec:DX [(match_dup 1)] - UNSPEC_LD3) - (const_int 0))) 0))] -- "TARGET_SIMD" -+ "TARGET_SIMD && !BYTES_BIG_ENDIAN" - "ld1\\t{%S0.1d - %U0.1d}, %1" - [(set_attr "type" "neon_load1_3reg<q>")] - ) - --(define_insn "aarch64_ld4<mode>_dreg" -+(define_insn "aarch64_ld3<mode>_dreg_be" -+ [(set (match_operand:CI 0 "register_operand" "=w") -+ (subreg:CI -+ (vec_concat:<VRL3> -+ (vec_concat:<VRL2> -+ (vec_concat:<VDBL> -+ (const_int 0) -+ (unspec:DX -+ [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] -+ UNSPEC_LD3)) -+ (vec_concat:<VDBL> -+ (const_int 0) -+ (unspec:DX [(match_dup 1)] -+ UNSPEC_LD3))) -+ (vec_concat:<VDBL> -+ (const_int 0) -+ (unspec:DX [(match_dup 1)] -+ UNSPEC_LD3))) 0))] -+ "TARGET_SIMD && BYTES_BIG_ENDIAN" -+ "ld1\\t{%S0.1d - %U0.1d}, %1" -+ [(set_attr "type" "neon_load1_3reg<q>")] -+) -+ -+(define_insn "aarch64_ld4<mode>_dreg_le" - [(set (match_operand:XI 0 "register_operand" "=w") - (subreg:XI - (vec_concat:<VRL4> -@@ -4880,9 +5042,9 @@ - UNSPEC_LD4) - (vec_duplicate:VD (const_int 0))) - (vec_concat:<VDBL> -- (unspec:VD [(match_dup 1)] -+ (unspec:VD [(match_dup 1)] - UNSPEC_LD4) -- (vec_duplicate:VD (const_int 0)))) -+ (vec_duplicate:VD (const_int 0)))) - (vec_concat:<VRL2> - (vec_concat:<VDBL> - (unspec:VD [(match_dup 1)] -@@ -4892,12 +5054,40 @@ - (unspec:VD [(match_dup 1)] - UNSPEC_LD4) - (vec_duplicate:VD (const_int 0))))) 0))] -- "TARGET_SIMD" -+ "TARGET_SIMD && !BYTES_BIG_ENDIAN" -+ "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" -+ [(set_attr "type" "neon_load4_4reg<q>")] -+) -+ -+(define_insn "aarch64_ld4<mode>_dreg_be" -+ [(set (match_operand:XI 0 "register_operand" "=w") -+ (subreg:XI -+ (vec_concat:<VRL4> -+ (vec_concat:<VRL2> -+ (vec_concat:<VDBL> -+ (vec_duplicate:VD (const_int 0)) -+ (unspec:VD -+ [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] -+ UNSPEC_LD4)) -+ (vec_concat:<VDBL> -+ (vec_duplicate:VD (const_int 0)) -+ (unspec:VD [(match_dup 1)] -+ UNSPEC_LD4))) -+ (vec_concat:<VRL2> -+ (vec_concat:<VDBL> -+ (vec_duplicate:VD (const_int 0)) -+ (unspec:VD [(match_dup 1)] -+ UNSPEC_LD4)) -+ (vec_concat:<VDBL> -+ (vec_duplicate:VD (const_int 0)) -+ (unspec:VD [(match_dup 1)] -+ UNSPEC_LD4)))) 0))] -+ "TARGET_SIMD && BYTES_BIG_ENDIAN" - "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" - [(set_attr "type" "neon_load4_4reg<q>")] - ) - --(define_insn "aarch64_ld4<mode>_dreg" -+(define_insn "aarch64_ld4<mode>_dreg_le" - [(set (match_operand:XI 0 "register_operand" "=w") - (subreg:XI - (vec_concat:<VRL4> -@@ -4910,7 +5100,7 @@ - (vec_concat:<VDBL> - (unspec:DX [(match_dup 1)] - UNSPEC_LD4) -- (const_int 0))) -+ (const_int 0))) - (vec_concat:<VRL2> - (vec_concat:<VDBL> - (unspec:DX [(match_dup 1)] -@@ -4920,7 +5110,35 @@ - (unspec:DX [(match_dup 1)] - UNSPEC_LD4) - (const_int 0)))) 0))] -- "TARGET_SIMD" -+ "TARGET_SIMD && !BYTES_BIG_ENDIAN" -+ "ld1\\t{%S0.1d - %V0.1d}, %1" -+ [(set_attr "type" "neon_load1_4reg<q>")] -+) -+ -+(define_insn "aarch64_ld4<mode>_dreg_be" -+ [(set (match_operand:XI 0 "register_operand" "=w") -+ (subreg:XI -+ (vec_concat:<VRL4> -+ (vec_concat:<VRL2> -+ (vec_concat:<VDBL> -+ (const_int 0) -+ (unspec:DX -+ [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")] -+ UNSPEC_LD4)) -+ (vec_concat:<VDBL> -+ (const_int 0) -+ (unspec:DX [(match_dup 1)] -+ UNSPEC_LD4))) -+ (vec_concat:<VRL2> -+ (vec_concat:<VDBL> -+ (const_int 0) -+ (unspec:DX [(match_dup 1)] -+ UNSPEC_LD4)) -+ (vec_concat:<VDBL> -+ (const_int 0) -+ (unspec:DX [(match_dup 1)] -+ UNSPEC_LD4)))) 0))] -+ "TARGET_SIMD && BYTES_BIG_ENDIAN" - "ld1\\t{%S0.1d - %V0.1d}, %1" - [(set_attr "type" "neon_load1_4reg<q>")] - ) -@@ -4934,7 +5152,12 @@ - rtx mem = gen_rtx_MEM (BLKmode, operands[1]); - set_mem_size (mem, <VSTRUCT:nregs> * 8); - -- emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem)); -+ if (BYTES_BIG_ENDIAN) -+ emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg_be (operands[0], -+ mem)); -+ else -+ emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg_le (operands[0], -+ mem)); - DONE; - }) - -@@ -5160,10 +5383,10 @@ - ) - - (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>" -- [(set (match_operand:VALL 0 "register_operand" "=w") -- (unspec:VALL [(match_operand:VALL 1 "register_operand" "w") -- (match_operand:VALL 2 "register_operand" "w")] -- PERMUTE))] -+ [(set (match_operand:VALL_F16 0 "register_operand" "=w") -+ (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") -+ (match_operand:VALL_F16 2 "register_operand" "w")] -+ PERMUTE))] - "TARGET_SIMD" - "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" - [(set_attr "type" "neon_permute<q>")] -@@ -5171,11 +5394,11 @@ - - ;; Note immediate (third) operand is lane index not byte index. - (define_insn "aarch64_ext<mode>" -- [(set (match_operand:VALL 0 "register_operand" "=w") -- (unspec:VALL [(match_operand:VALL 1 "register_operand" "w") -- (match_operand:VALL 2 "register_operand" "w") -- (match_operand:SI 3 "immediate_operand" "i")] -- UNSPEC_EXT))] -+ [(set (match_operand:VALL_F16 0 "register_operand" "=w") -+ (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") -+ (match_operand:VALL_F16 2 "register_operand" "w") -+ (match_operand:SI 3 "immediate_operand" "i")] -+ UNSPEC_EXT))] - "TARGET_SIMD" - { - operands[3] = GEN_INT (INTVAL (operands[3]) -@@ -5186,8 +5409,8 @@ - ) - - (define_insn "aarch64_rev<REVERSE:rev_op><mode>" -- [(set (match_operand:VALL 0 "register_operand" "=w") -- (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")] -+ [(set (match_operand:VALL_F16 0 "register_operand" "=w") -+ (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")] - REVERSE))] - "TARGET_SIMD" - "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>" -@@ -5354,31 +5577,32 @@ - ) - - (define_insn "aarch64_frecpe<mode>" -- [(set (match_operand:VDQF 0 "register_operand" "=w") -- (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] -- UNSPEC_FRECPE))] -+ [(set (match_operand:VHSDF 0 "register_operand" "=w") -+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")] -+ UNSPEC_FRECPE))] - "TARGET_SIMD" - "frecpe\\t%0.<Vtype>, %1.<Vtype>" -- [(set_attr "type" "neon_fp_recpe_<Vetype><q>")] -+ [(set_attr "type" "neon_fp_recpe_<stype><q>")] - ) - - (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>" -- [(set (match_operand:GPF 0 "register_operand" "=w") -- (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")] -- FRECP))] -+ [(set (match_operand:GPF_F16 0 "register_operand" "=w") -+ (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")] -+ FRECP))] - "TARGET_SIMD" - "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1" -- [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF:Vetype><GPF:q>")] -+ [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")] - ) - - (define_insn "aarch64_frecps<mode>" -- [(set (match_operand:VALLF 0 "register_operand" "=w") -- (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w") -- (match_operand:VALLF 2 "register_operand" "w")] -- UNSPEC_FRECPS))] -+ [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") -+ (unspec:VHSDF_HSDF -+ [(match_operand:VHSDF_HSDF 1 "register_operand" "w") -+ (match_operand:VHSDF_HSDF 2 "register_operand" "w")] -+ UNSPEC_FRECPS))] - "TARGET_SIMD" - "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" -- [(set_attr "type" "neon_fp_recps_<Vetype><q>")] -+ [(set_attr "type" "neon_fp_recps_<stype><q>")] - ) - - (define_insn "aarch64_urecpe<mode>" -@@ -5414,13 +5638,25 @@ - [(set_attr "type" "crypto_aese")] - ) - -+;; When AES/AESMC fusion is enabled we want the register allocation to -+;; look like: -+;; AESE Vn, _ -+;; AESMC Vn, Vn -+;; So prefer to tie operand 1 to operand 0 when fusing. -+ - (define_insn "aarch64_crypto_aes<aesmc_op>v16qi" -- [(set (match_operand:V16QI 0 "register_operand" "=w") -- (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")] -+ [(set (match_operand:V16QI 0 "register_operand" "=w,w") -+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")] - CRYPTO_AESMC))] - "TARGET_SIMD && TARGET_CRYPTO" - "aes<aesmc_op>\\t%0.16b, %1.16b" -- [(set_attr "type" "crypto_aesmc")] -+ [(set_attr "type" "crypto_aesmc") -+ (set_attr_alternative "enabled" -+ [(if_then_else (match_test -+ "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)") -+ (const_string "yes" ) -+ (const_string "no")) -+ (const_string "yes")])] - ) - - ;; sha1 -@@ -5435,6 +5671,26 @@ - [(set_attr "type" "crypto_sha1_fast")] - ) - -+(define_insn "aarch64_crypto_sha1hv4si" -+ [(set (match_operand:SI 0 "register_operand" "=w") -+ (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w") -+ (parallel [(const_int 0)]))] -+ UNSPEC_SHA1H))] -+ "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN" -+ "sha1h\\t%s0, %s1" -+ [(set_attr "type" "crypto_sha1_fast")] -+) -+ -+(define_insn "aarch64_be_crypto_sha1hv4si" -+ [(set (match_operand:SI 0 "register_operand" "=w") -+ (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w") -+ (parallel [(const_int 3)]))] -+ UNSPEC_SHA1H))] -+ "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN" -+ "sha1h\\t%s0, %s1" -+ [(set_attr "type" "crypto_sha1_fast")] -+) -+ - (define_insn "aarch64_crypto_sha1su1v4si" - [(set (match_operand:V4SI 0 "register_operand" "=w") - (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") ---- a/src/gcc/config/aarch64/aarch64-tune.md -+++ b/src/gcc/config/aarch64/aarch64-tune.md -@@ -1,5 +1,5 @@ - ;; -*- buffer-read-only: t -*- - ;; Generated automatically by gentune.sh from aarch64-cores.def - (define_attr "tune" -- "cortexa35,cortexa53,cortexa57,cortexa72,exynosm1,qdf24xx,thunderx,xgene1,cortexa57cortexa53,cortexa72cortexa53" -+ "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,exynosm1,qdf24xx,thunderx,xgene1,vulcan,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53" - (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) ---- a/src/gcc/config/aarch64/aarch64-tuning-flags.def -+++ b/src/gcc/config/aarch64/aarch64-tuning-flags.def -@@ -29,5 +29,8 @@ - AARCH64_TUNE_ to give an enum name. */ - - AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS) --AARCH64_EXTRA_TUNING_OPTION ("approx_rsqrt", APPROX_RSQRT) - -+/* Don't create non-8 byte aligned load/store pair. That is if the -+two load/stores are not at least 8 byte aligned don't create load/store -+pairs. */ -+AARCH64_EXTRA_TUNING_OPTION ("slow_unaligned_ldpw", SLOW_UNALIGNED_LDPW) ---- a/src/gcc/config/aarch64/aarch64.c -+++ b/src/gcc/config/aarch64/aarch64.c -@@ -26,6 +26,7 @@ - #include "target.h" - #include "rtl.h" - #include "tree.h" -+#include "memmodel.h" - #include "gimple.h" - #include "cfghooks.h" - #include "cfgloop.h" -@@ -61,7 +62,6 @@ - #include "rtl-iter.h" - #include "tm-constrs.h" - #include "sched-int.h" --#include "cortex-a57-fma-steering.h" - #include "target-globals.h" - #include "common/common-target.h" - -@@ -141,6 +141,10 @@ static bool aarch64_vector_mode_supported_p (machine_mode); - static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, - const unsigned char *sel); - static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool); -+static bool aarch64_builtin_support_vector_misalignment (machine_mode mode, -+ const_tree type, -+ int misalignment, -+ bool is_packed); - - /* Major revision number of the ARM Architecture implemented by the target. */ - unsigned aarch64_architecture_version; -@@ -152,7 +156,7 @@ enum aarch64_processor aarch64_tune = cortexa53; - unsigned long aarch64_tune_flags = 0; - - /* Global flag for PC relative loads. */ --bool aarch64_nopcrelative_literal_loads; -+bool aarch64_pcrelative_literal_loads; - - /* Support for command line parsing of boolean flags in the tuning - structures. */ -@@ -250,6 +254,38 @@ static const struct cpu_addrcost_table xgene1_addrcost_table = - 0, /* imm_offset */ - }; - -+static const struct cpu_addrcost_table qdf24xx_addrcost_table = -+{ -+ { -+ 1, /* hi */ -+ 0, /* si */ -+ 0, /* di */ -+ 1, /* ti */ -+ }, -+ 0, /* pre_modify */ -+ 0, /* post_modify */ -+ 0, /* register_offset */ -+ 0, /* register_sextend */ -+ 0, /* register_zextend */ -+ 0 /* imm_offset */ -+}; -+ -+static const struct cpu_addrcost_table vulcan_addrcost_table = -+{ -+ { -+ 0, /* hi */ -+ 0, /* si */ -+ 0, /* di */ -+ 2, /* ti */ -+ }, -+ 0, /* pre_modify */ -+ 0, /* post_modify */ -+ 2, /* register_offset */ -+ 3, /* register_sextend */ -+ 3, /* register_zextend */ -+ 0, /* imm_offset */ -+}; -+ - static const struct cpu_regmove_cost generic_regmove_cost = - { - 1, /* GP2GP */ -@@ -308,6 +344,24 @@ static const struct cpu_regmove_cost xgene1_regmove_cost = - 2 /* FP2FP */ - }; - -+static const struct cpu_regmove_cost qdf24xx_regmove_cost = -+{ -+ 2, /* GP2GP */ -+ /* Avoid the use of int<->fp moves for spilling. */ -+ 6, /* GP2FP */ -+ 6, /* FP2GP */ -+ 4 /* FP2FP */ -+}; -+ -+static const struct cpu_regmove_cost vulcan_regmove_cost = -+{ -+ 1, /* GP2GP */ -+ /* Avoid the use of int<->fp moves for spilling. */ -+ 8, /* GP2FP */ -+ 8, /* FP2GP */ -+ 4 /* FP2FP */ -+}; -+ - /* Generic costs for vector insn classes. */ - static const struct cpu_vector_cost generic_vector_cost = - { -@@ -326,18 +380,36 @@ static const struct cpu_vector_cost generic_vector_cost = - 1 /* cond_not_taken_branch_cost */ - }; - -+/* ThunderX costs for vector insn classes. */ -+static const struct cpu_vector_cost thunderx_vector_cost = -+{ -+ 1, /* scalar_stmt_cost */ -+ 3, /* scalar_load_cost */ -+ 1, /* scalar_store_cost */ -+ 4, /* vec_stmt_cost */ -+ 4, /* vec_permute_cost */ -+ 2, /* vec_to_scalar_cost */ -+ 2, /* scalar_to_vec_cost */ -+ 3, /* vec_align_load_cost */ -+ 10, /* vec_unalign_load_cost */ -+ 10, /* vec_unalign_store_cost */ -+ 1, /* vec_store_cost */ -+ 3, /* cond_taken_branch_cost */ -+ 3 /* cond_not_taken_branch_cost */ -+}; -+ - /* Generic costs for vector insn classes. */ - static const struct cpu_vector_cost cortexa57_vector_cost = - { - 1, /* scalar_stmt_cost */ - 4, /* scalar_load_cost */ - 1, /* scalar_store_cost */ -- 3, /* vec_stmt_cost */ -+ 2, /* vec_stmt_cost */ - 3, /* vec_permute_cost */ - 8, /* vec_to_scalar_cost */ - 8, /* scalar_to_vec_cost */ -- 5, /* vec_align_load_cost */ -- 5, /* vec_unalign_load_cost */ -+ 4, /* vec_align_load_cost */ -+ 4, /* vec_unalign_load_cost */ - 1, /* vec_unalign_store_cost */ - 1, /* vec_store_cost */ - 1, /* cond_taken_branch_cost */ -@@ -379,6 +451,24 @@ static const struct cpu_vector_cost xgene1_vector_cost = - 1 /* cond_not_taken_branch_cost */ - }; - -+/* Costs for vector insn classes for Vulcan. */ -+static const struct cpu_vector_cost vulcan_vector_cost = -+{ -+ 6, /* scalar_stmt_cost */ -+ 4, /* scalar_load_cost */ -+ 1, /* scalar_store_cost */ -+ 6, /* vec_stmt_cost */ -+ 3, /* vec_permute_cost */ -+ 6, /* vec_to_scalar_cost */ -+ 5, /* scalar_to_vec_cost */ -+ 8, /* vec_align_load_cost */ -+ 8, /* vec_unalign_load_cost */ -+ 4, /* vec_unalign_store_cost */ -+ 4, /* vec_store_cost */ -+ 2, /* cond_taken_branch_cost */ -+ 1 /* cond_not_taken_branch_cost */ -+}; -+ - /* Generic costs for branch instructions. */ - static const struct cpu_branch_cost generic_branch_cost = - { -@@ -393,6 +483,37 @@ static const struct cpu_branch_cost cortexa57_branch_cost = - 3 /* Unpredictable. */ - }; - -+/* Branch costs for Vulcan. */ -+static const struct cpu_branch_cost vulcan_branch_cost = -+{ -+ 1, /* Predictable. */ -+ 3 /* Unpredictable. */ -+}; -+ -+/* Generic approximation modes. */ -+static const cpu_approx_modes generic_approx_modes = -+{ -+ AARCH64_APPROX_NONE, /* division */ -+ AARCH64_APPROX_NONE, /* sqrt */ -+ AARCH64_APPROX_NONE /* recip_sqrt */ -+}; -+ -+/* Approximation modes for Exynos M1. */ -+static const cpu_approx_modes exynosm1_approx_modes = -+{ -+ AARCH64_APPROX_NONE, /* division */ -+ AARCH64_APPROX_ALL, /* sqrt */ -+ AARCH64_APPROX_ALL /* recip_sqrt */ -+}; -+ -+/* Approximation modes for X-Gene 1. */ -+static const cpu_approx_modes xgene1_approx_modes = -+{ -+ AARCH64_APPROX_NONE, /* division */ -+ AARCH64_APPROX_NONE, /* sqrt */ -+ AARCH64_APPROX_ALL /* recip_sqrt */ -+}; -+ - static const struct tune_params generic_tunings = - { - &cortexa57_extra_costs, -@@ -400,6 +521,7 @@ static const struct tune_params generic_tunings = - &generic_regmove_cost, - &generic_vector_cost, - &generic_branch_cost, -+ &generic_approx_modes, - 4, /* memmov_cost */ - 2, /* issue_rate */ - AARCH64_FUSE_NOTHING, /* fusible_ops */ -@@ -423,14 +545,15 @@ static const struct tune_params cortexa35_tunings = - &generic_addrcost_table, - &cortexa53_regmove_cost, - &generic_vector_cost, -- &generic_branch_cost, -+ &cortexa57_branch_cost, -+ &generic_approx_modes, - 4, /* memmov_cost */ - 1, /* issue_rate */ -- (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD -+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD - | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */ -- 8, /* function_align. */ -+ 16, /* function_align. */ - 8, /* jump_align. */ -- 4, /* loop_align. */ -+ 8, /* loop_align. */ - 2, /* int_reassoc_width. */ - 4, /* fp_reassoc_width. */ - 1, /* vec_reassoc_width. */ -@@ -448,14 +571,15 @@ static const struct tune_params cortexa53_tunings = - &generic_addrcost_table, - &cortexa53_regmove_cost, - &generic_vector_cost, -- &generic_branch_cost, -+ &cortexa57_branch_cost, -+ &generic_approx_modes, - 4, /* memmov_cost */ - 2, /* issue_rate */ - (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD - | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */ -- 8, /* function_align. */ -+ 16, /* function_align. */ - 8, /* jump_align. */ -- 4, /* loop_align. */ -+ 8, /* loop_align. */ - 2, /* int_reassoc_width. */ - 4, /* fp_reassoc_width. */ - 1, /* vec_reassoc_width. */ -@@ -474,13 +598,14 @@ static const struct tune_params cortexa57_tunings = - &cortexa57_regmove_cost, - &cortexa57_vector_cost, - &cortexa57_branch_cost, -+ &generic_approx_modes, - 4, /* memmov_cost */ - 3, /* issue_rate */ - (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD - | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */ - 16, /* function_align. */ - 8, /* jump_align. */ -- 4, /* loop_align. */ -+ 8, /* loop_align. */ - 2, /* int_reassoc_width. */ - 4, /* fp_reassoc_width. */ - 1, /* vec_reassoc_width. */ -@@ -498,14 +623,15 @@ static const struct tune_params cortexa72_tunings = - &cortexa57_addrcost_table, - &cortexa57_regmove_cost, - &cortexa57_vector_cost, -- &generic_branch_cost, -+ &cortexa57_branch_cost, -+ &generic_approx_modes, - 4, /* memmov_cost */ - 3, /* issue_rate */ - (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD - | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops */ - 16, /* function_align. */ - 8, /* jump_align. */ -- 4, /* loop_align. */ -+ 8, /* loop_align. */ - 2, /* int_reassoc_width. */ - 4, /* fp_reassoc_width. */ - 1, /* vec_reassoc_width. */ -@@ -513,7 +639,33 @@ static const struct tune_params cortexa72_tunings = - 2, /* min_div_recip_mul_df. */ - 0, /* max_case_values. */ - 0, /* cache_line_size. */ -- tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */ -+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ -+ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ -+}; -+ -+static const struct tune_params cortexa73_tunings = -+{ -+ &cortexa57_extra_costs, -+ &cortexa57_addrcost_table, -+ &cortexa57_regmove_cost, -+ &cortexa57_vector_cost, -+ &cortexa57_branch_cost, -+ &generic_approx_modes, -+ 4, /* memmov_cost. */ -+ 2, /* issue_rate. */ -+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD -+ | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops */ -+ 16, /* function_align. */ -+ 8, /* jump_align. */ -+ 8, /* loop_align. */ -+ 2, /* int_reassoc_width. */ -+ 4, /* fp_reassoc_width. */ -+ 1, /* vec_reassoc_width. */ -+ 2, /* min_div_recip_mul_sf. */ -+ 2, /* min_div_recip_mul_df. */ -+ 0, /* max_case_values. */ -+ 0, /* cache_line_size. */ -+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ - }; - -@@ -524,6 +676,7 @@ static const struct tune_params exynosm1_tunings = - &exynosm1_regmove_cost, - &exynosm1_vector_cost, - &generic_branch_cost, -+ &exynosm1_approx_modes, - 4, /* memmov_cost */ - 3, /* issue_rate */ - (AARCH64_FUSE_AES_AESMC), /* fusible_ops */ -@@ -538,7 +691,7 @@ static const struct tune_params exynosm1_tunings = - 48, /* max_case_values. */ - 64, /* cache_line_size. */ - tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ -- (AARCH64_EXTRA_TUNE_APPROX_RSQRT) /* tune_flags. */ -+ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ - }; - - static const struct tune_params thunderx_tunings = -@@ -546,8 +699,9 @@ static const struct tune_params thunderx_tunings = - &thunderx_extra_costs, - &generic_addrcost_table, - &thunderx_regmove_cost, -- &generic_vector_cost, -+ &thunderx_vector_cost, - &generic_branch_cost, -+ &generic_approx_modes, - 6, /* memmov_cost */ - 2, /* issue_rate */ - AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */ -@@ -562,7 +716,7 @@ static const struct tune_params thunderx_tunings = - 0, /* max_case_values. */ - 0, /* cache_line_size. */ - tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */ -- (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ -+ (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW) /* tune_flags. */ - }; - - static const struct tune_params xgene1_tunings = -@@ -572,6 +726,7 @@ static const struct tune_params xgene1_tunings = - &xgene1_regmove_cost, - &xgene1_vector_cost, - &generic_branch_cost, -+ &xgene1_approx_modes, - 6, /* memmov_cost */ - 4, /* issue_rate */ - AARCH64_FUSE_NOTHING, /* fusible_ops */ -@@ -586,7 +741,58 @@ static const struct tune_params xgene1_tunings = - 0, /* max_case_values. */ - 0, /* cache_line_size. */ - tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */ -- (AARCH64_EXTRA_TUNE_APPROX_RSQRT) /* tune_flags. */ -+ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ -+}; -+ -+static const struct tune_params qdf24xx_tunings = -+{ -+ &qdf24xx_extra_costs, -+ &qdf24xx_addrcost_table, -+ &qdf24xx_regmove_cost, -+ &generic_vector_cost, -+ &generic_branch_cost, -+ &generic_approx_modes, -+ 4, /* memmov_cost */ -+ 4, /* issue_rate */ -+ (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD -+ | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */ -+ 16, /* function_align. */ -+ 8, /* jump_align. */ -+ 16, /* loop_align. */ -+ 2, /* int_reassoc_width. */ -+ 4, /* fp_reassoc_width. */ -+ 1, /* vec_reassoc_width. */ -+ 2, /* min_div_recip_mul_sf. */ -+ 2, /* min_div_recip_mul_df. */ -+ 0, /* max_case_values. */ -+ 64, /* cache_line_size. */ -+ tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */ -+ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ -+}; -+ -+static const struct tune_params vulcan_tunings = -+{ -+ &vulcan_extra_costs, -+ &vulcan_addrcost_table, -+ &vulcan_regmove_cost, -+ &vulcan_vector_cost, -+ &vulcan_branch_cost, -+ &generic_approx_modes, -+ 4, /* memmov_cost. */ -+ 4, /* issue_rate. */ -+ AARCH64_FUSE_NOTHING, /* fuseable_ops. */ -+ 16, /* function_align. */ -+ 8, /* jump_align. */ -+ 16, /* loop_align. */ -+ 3, /* int_reassoc_width. */ -+ 2, /* fp_reassoc_width. */ -+ 2, /* vec_reassoc_width. */ -+ 2, /* min_div_recip_mul_sf. */ -+ 2, /* min_div_recip_mul_df. */ -+ 0, /* max_case_values. */ -+ 64, /* cache_line_size. */ -+ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */ -+ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ - }; - - /* Support for fine-grained override of the tuning structures. */ -@@ -663,16 +869,6 @@ struct aarch64_option_extension - const unsigned long flags_off; - }; - --/* ISA extensions in AArch64. */ --static const struct aarch64_option_extension all_extensions[] = --{ --#define AARCH64_OPT_EXTENSION(NAME, X, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) \ -- {NAME, FLAGS_ON, FLAGS_OFF}, --#include "aarch64-option-extensions.def" --#undef AARCH64_OPT_EXTENSION -- {NULL, 0, 0} --}; -- - typedef enum aarch64_cond_code - { - AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL, -@@ -1110,7 +1306,8 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm, - emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s)); - - if (mode != GET_MODE (gp_rtx)) -- gp_rtx = simplify_gen_subreg (mode, gp_rtx, GET_MODE (gp_rtx), 0); -+ gp_rtx = gen_lowpart (mode, gp_rtx); -+ - } - - if (mode == ptr_mode) -@@ -1186,10 +1383,14 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm, - case SYMBOL_SMALL_TLSGD: - { - rtx_insn *insns; -- rtx result = gen_rtx_REG (Pmode, R0_REGNUM); -+ machine_mode mode = GET_MODE (dest); -+ rtx result = gen_rtx_REG (mode, R0_REGNUM); - - start_sequence (); -- aarch64_emit_call_insn (gen_tlsgd_small (result, imm)); -+ if (TARGET_ILP32) -+ aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm)); -+ else -+ aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm)); - insns = get_insns (); - end_sequence (); - -@@ -1703,7 +1904,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) - we need to expand the literal pool access carefully. - This is something that needs to be done in a number - of places, so could well live as a separate function. */ -- if (aarch64_nopcrelative_literal_loads) -+ if (!aarch64_pcrelative_literal_loads) - { - gcc_assert (can_create_pseudo_p ()); - base = gen_reg_rtx (ptr_mode); -@@ -1766,6 +1967,88 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) - aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest)); - } - -+/* Add DELTA to REGNUM in mode MODE. SCRATCHREG can be used to hold a -+ temporary value if necessary. FRAME_RELATED_P should be true if -+ the RTX_FRAME_RELATED flag should be set and CFA adjustments added -+ to the generated instructions. If SCRATCHREG is known to hold -+ abs (delta), EMIT_MOVE_IMM can be set to false to avoid emitting the -+ immediate again. -+ -+ Since this function may be used to adjust the stack pointer, we must -+ ensure that it cannot cause transient stack deallocation (for example -+ by first incrementing SP and then decrementing when adjusting by a -+ large immediate). */ -+ -+static void -+aarch64_add_constant_internal (machine_mode mode, int regnum, int scratchreg, -+ HOST_WIDE_INT delta, bool frame_related_p, -+ bool emit_move_imm) -+{ -+ HOST_WIDE_INT mdelta = abs_hwi (delta); -+ rtx this_rtx = gen_rtx_REG (mode, regnum); -+ rtx_insn *insn; -+ -+ if (!mdelta) -+ return; -+ -+ /* Single instruction adjustment. */ -+ if (aarch64_uimm12_shift (mdelta)) -+ { -+ insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta))); -+ RTX_FRAME_RELATED_P (insn) = frame_related_p; -+ return; -+ } -+ -+ /* Emit 2 additions/subtractions if the adjustment is less than 24 bits. -+ Only do this if mdelta is not a 16-bit move as adjusting using a move -+ is better. */ -+ if (mdelta < 0x1000000 && !aarch64_move_imm (mdelta, mode)) -+ { -+ HOST_WIDE_INT low_off = mdelta & 0xfff; -+ -+ low_off = delta < 0 ? -low_off : low_off; -+ insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (low_off))); -+ RTX_FRAME_RELATED_P (insn) = frame_related_p; -+ insn = emit_insn (gen_add2_insn (this_rtx, GEN_INT (delta - low_off))); -+ RTX_FRAME_RELATED_P (insn) = frame_related_p; -+ return; -+ } -+ -+ /* Emit a move immediate if required and an addition/subtraction. */ -+ rtx scratch_rtx = gen_rtx_REG (mode, scratchreg); -+ if (emit_move_imm) -+ aarch64_internal_mov_immediate (scratch_rtx, GEN_INT (mdelta), true, mode); -+ insn = emit_insn (delta < 0 ? gen_sub2_insn (this_rtx, scratch_rtx) -+ : gen_add2_insn (this_rtx, scratch_rtx)); -+ if (frame_related_p) -+ { -+ RTX_FRAME_RELATED_P (insn) = frame_related_p; -+ rtx adj = plus_constant (mode, this_rtx, delta); -+ add_reg_note (insn , REG_CFA_ADJUST_CFA, gen_rtx_SET (this_rtx, adj)); -+ } -+} -+ -+static inline void -+aarch64_add_constant (machine_mode mode, int regnum, int scratchreg, -+ HOST_WIDE_INT delta) -+{ -+ aarch64_add_constant_internal (mode, regnum, scratchreg, delta, false, true); -+} -+ -+static inline void -+aarch64_add_sp (int scratchreg, HOST_WIDE_INT delta, bool emit_move_imm) -+{ -+ aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, delta, -+ true, emit_move_imm); -+} -+ -+static inline void -+aarch64_sub_sp (int scratchreg, HOST_WIDE_INT delta, bool frame_related_p) -+{ -+ aarch64_add_constant_internal (Pmode, SP_REGNUM, scratchreg, -delta, -+ frame_related_p, true); -+} -+ - static bool - aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, - tree exp ATTRIBUTE_UNUSED) -@@ -2494,7 +2777,7 @@ static void - aarch64_layout_frame (void) - { - HOST_WIDE_INT offset = 0; -- int regno; -+ int regno, last_fp_reg = INVALID_REGNUM; - - if (reload_completed && cfun->machine->frame.laid_out) - return; -@@ -2502,8 +2785,8 @@ aarch64_layout_frame (void) - #define SLOT_NOT_REQUIRED (-2) - #define SLOT_REQUIRED (-1) - -- cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER; -- cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER; -+ cfun->machine->frame.wb_candidate1 = INVALID_REGNUM; -+ cfun->machine->frame.wb_candidate2 = INVALID_REGNUM; - - /* First mark all the registers that really need to be saved... */ - for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) -@@ -2528,7 +2811,10 @@ aarch64_layout_frame (void) - for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) - if (df_regs_ever_live_p (regno) - && !call_used_regs[regno]) -- cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED; -+ { -+ cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED; -+ last_fp_reg = regno; -+ } - - if (frame_pointer_needed) - { -@@ -2537,7 +2823,6 @@ aarch64_layout_frame (void) - cfun->machine->frame.wb_candidate1 = R29_REGNUM; - cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD; - cfun->machine->frame.wb_candidate2 = R30_REGNUM; -- cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD; - offset += 2 * UNITS_PER_WORD; - } - -@@ -2546,35 +2831,46 @@ aarch64_layout_frame (void) - if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED) - { - cfun->machine->frame.reg_offset[regno] = offset; -- if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER) -+ if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM) - cfun->machine->frame.wb_candidate1 = regno; -- else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER) -+ else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM) - cfun->machine->frame.wb_candidate2 = regno; - offset += UNITS_PER_WORD; - } - -+ HOST_WIDE_INT max_int_offset = offset; -+ offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT); -+ bool has_align_gap = offset != max_int_offset; -+ - for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) - if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED) - { -+ /* If there is an alignment gap between integer and fp callee-saves, -+ allocate the last fp register to it if possible. */ -+ if (regno == last_fp_reg && has_align_gap && (offset & 8) == 0) -+ { -+ cfun->machine->frame.reg_offset[regno] = max_int_offset; -+ break; -+ } -+ - cfun->machine->frame.reg_offset[regno] = offset; -- if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER) -+ if (cfun->machine->frame.wb_candidate1 == INVALID_REGNUM) - cfun->machine->frame.wb_candidate1 = regno; -- else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER -+ else if (cfun->machine->frame.wb_candidate2 == INVALID_REGNUM - && cfun->machine->frame.wb_candidate1 >= V0_REGNUM) - cfun->machine->frame.wb_candidate2 = regno; - offset += UNITS_PER_WORD; - } - -- cfun->machine->frame.padding0 = -- (ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset); - offset = ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT); - - cfun->machine->frame.saved_regs_size = offset; - -+ HOST_WIDE_INT varargs_and_saved_regs_size -+ = offset + cfun->machine->frame.saved_varargs_size; -+ - cfun->machine->frame.hard_fp_offset -- = ROUND_UP (cfun->machine->frame.saved_varargs_size -- + get_frame_size () -- + cfun->machine->frame.saved_regs_size, -+ = ROUND_UP (varargs_and_saved_regs_size + get_frame_size (), - STACK_BOUNDARY / BITS_PER_UNIT); - - cfun->machine->frame.frame_size -@@ -2582,15 +2878,92 @@ aarch64_layout_frame (void) - + crtl->outgoing_args_size, - STACK_BOUNDARY / BITS_PER_UNIT); - -+ cfun->machine->frame.locals_offset = cfun->machine->frame.saved_varargs_size; -+ -+ cfun->machine->frame.initial_adjust = 0; -+ cfun->machine->frame.final_adjust = 0; -+ cfun->machine->frame.callee_adjust = 0; -+ cfun->machine->frame.callee_offset = 0; -+ -+ HOST_WIDE_INT max_push_offset = 0; -+ if (cfun->machine->frame.wb_candidate2 != INVALID_REGNUM) -+ max_push_offset = 512; -+ else if (cfun->machine->frame.wb_candidate1 != INVALID_REGNUM) -+ max_push_offset = 256; -+ -+ if (cfun->machine->frame.frame_size < max_push_offset -+ && crtl->outgoing_args_size == 0) -+ { -+ /* Simple, small frame with no outgoing arguments: -+ stp reg1, reg2, [sp, -frame_size]! -+ stp reg3, reg4, [sp, 16] */ -+ cfun->machine->frame.callee_adjust = cfun->machine->frame.frame_size; -+ } -+ else if ((crtl->outgoing_args_size -+ + cfun->machine->frame.saved_regs_size < 512) -+ && !(cfun->calls_alloca -+ && cfun->machine->frame.hard_fp_offset < max_push_offset)) -+ { -+ /* Frame with small outgoing arguments: -+ sub sp, sp, frame_size -+ stp reg1, reg2, [sp, outgoing_args_size] -+ stp reg3, reg4, [sp, outgoing_args_size + 16] */ -+ cfun->machine->frame.initial_adjust = cfun->machine->frame.frame_size; -+ cfun->machine->frame.callee_offset -+ = cfun->machine->frame.frame_size - cfun->machine->frame.hard_fp_offset; -+ } -+ else if (cfun->machine->frame.hard_fp_offset < max_push_offset) -+ { -+ /* Frame with large outgoing arguments but a small local area: -+ stp reg1, reg2, [sp, -hard_fp_offset]! -+ stp reg3, reg4, [sp, 16] -+ sub sp, sp, outgoing_args_size */ -+ cfun->machine->frame.callee_adjust = cfun->machine->frame.hard_fp_offset; -+ cfun->machine->frame.final_adjust -+ = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust; -+ } -+ else if (!frame_pointer_needed -+ && varargs_and_saved_regs_size < max_push_offset) -+ { -+ /* Frame with large local area and outgoing arguments (this pushes the -+ callee-saves first, followed by the locals and outgoing area): -+ stp reg1, reg2, [sp, -varargs_and_saved_regs_size]! -+ stp reg3, reg4, [sp, 16] -+ sub sp, sp, frame_size - varargs_and_saved_regs_size */ -+ cfun->machine->frame.callee_adjust = varargs_and_saved_regs_size; -+ cfun->machine->frame.final_adjust -+ = cfun->machine->frame.frame_size - cfun->machine->frame.callee_adjust; -+ cfun->machine->frame.hard_fp_offset = cfun->machine->frame.callee_adjust; -+ cfun->machine->frame.locals_offset = cfun->machine->frame.hard_fp_offset; -+ } -+ else -+ { -+ /* Frame with large local area and outgoing arguments using frame pointer: -+ sub sp, sp, hard_fp_offset -+ stp x29, x30, [sp, 0] -+ add x29, sp, 0 -+ stp reg3, reg4, [sp, 16] -+ sub sp, sp, outgoing_args_size */ -+ cfun->machine->frame.initial_adjust = cfun->machine->frame.hard_fp_offset; -+ cfun->machine->frame.final_adjust -+ = cfun->machine->frame.frame_size - cfun->machine->frame.initial_adjust; -+ } -+ - cfun->machine->frame.laid_out = true; - } - -+/* Return true if the register REGNO is saved on entry to -+ the current function. */ -+ - static bool - aarch64_register_saved_on_entry (int regno) - { - return cfun->machine->frame.reg_offset[regno] >= 0; - } - -+/* Return the next register up from REGNO up to LIMIT for the callee -+ to save. */ -+ - static unsigned - aarch64_next_callee_save (unsigned regno, unsigned limit) - { -@@ -2599,6 +2972,9 @@ aarch64_next_callee_save (unsigned regno, unsigned limit) - return regno; - } - -+/* Push the register number REGNO of mode MODE to the stack with write-back -+ adjusting the stack by ADJUSTMENT. */ -+ - static void - aarch64_pushwb_single_reg (machine_mode mode, unsigned regno, - HOST_WIDE_INT adjustment) -@@ -2615,6 +2991,10 @@ aarch64_pushwb_single_reg (machine_mode mode, unsigned regno, - RTX_FRAME_RELATED_P (insn) = 1; - } - -+/* Generate and return an instruction to store the pair of registers -+ REG and REG2 of mode MODE to location BASE with write-back adjusting -+ the stack location BASE by ADJUSTMENT. */ -+ - static rtx - aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2, - HOST_WIDE_INT adjustment) -@@ -2634,11 +3014,18 @@ aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2, - } - } - -+/* Push registers numbered REGNO1 and REGNO2 to the stack, adjusting the -+ stack pointer by ADJUSTMENT. */ -+ - static void --aarch64_pushwb_pair_reg (machine_mode mode, unsigned regno1, -- unsigned regno2, HOST_WIDE_INT adjustment) -+aarch64_push_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment) - { - rtx_insn *insn; -+ machine_mode mode = (regno1 <= R30_REGNUM) ? DImode : DFmode; -+ -+ if (regno2 == INVALID_REGNUM) -+ return aarch64_pushwb_single_reg (mode, regno1, adjustment); -+ - rtx reg1 = gen_rtx_REG (mode, regno1); - rtx reg2 = gen_rtx_REG (mode, regno2); - -@@ -2649,6 +3036,9 @@ aarch64_pushwb_pair_reg (machine_mode mode, unsigned regno1, - RTX_FRAME_RELATED_P (insn) = 1; - } - -+/* Load the pair of register REG, REG2 of mode MODE from stack location BASE, -+ adjusting it by ADJUSTMENT afterwards. */ -+ - static rtx - aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2, - HOST_WIDE_INT adjustment) -@@ -2666,6 +3056,37 @@ aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2, - } - } - -+/* Pop the two registers numbered REGNO1, REGNO2 from the stack, adjusting it -+ afterwards by ADJUSTMENT and writing the appropriate REG_CFA_RESTORE notes -+ into CFI_OPS. */ -+ -+static void -+aarch64_pop_regs (unsigned regno1, unsigned regno2, HOST_WIDE_INT adjustment, -+ rtx *cfi_ops) -+{ -+ machine_mode mode = (regno1 <= R30_REGNUM) ? DImode : DFmode; -+ rtx reg1 = gen_rtx_REG (mode, regno1); -+ -+ *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg1, *cfi_ops); -+ -+ if (regno2 == INVALID_REGNUM) -+ { -+ rtx mem = plus_constant (Pmode, stack_pointer_rtx, adjustment); -+ mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem); -+ emit_move_insn (reg1, gen_rtx_MEM (mode, mem)); -+ } -+ else -+ { -+ rtx reg2 = gen_rtx_REG (mode, regno2); -+ *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops); -+ emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1, -+ reg2, adjustment)); -+ } -+} -+ -+/* Generate and return a store pair instruction of mode MODE to store -+ register REG1 to MEM1 and register REG2 to MEM2. */ -+ - static rtx - aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2, - rtx reg2) -@@ -2683,6 +3104,9 @@ aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2, - } - } - -+/* Generate and regurn a load pair isntruction of mode MODE to load register -+ REG1 from MEM1 and register REG2 from MEM2. */ -+ - static rtx - aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2, - rtx mem2) -@@ -2700,6 +3124,9 @@ aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2, - } - } - -+/* Emit code to save the callee-saved registers from register number START -+ to LIMIT to the stack at the location starting at offset START_OFFSET, -+ skipping any write-back candidates if SKIP_WB is true. */ - - static void - aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset, -@@ -2758,6 +3185,11 @@ aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset, - } - } - -+/* Emit code to restore the callee registers of mode MODE from register -+ number START up to and including LIMIT. Restore from the stack offset -+ START_OFFSET, skipping any write-back candidates if SKIP_WB is true. -+ Write the appropriate REG_CFA_RESTORE notes into CFI_OPS. */ -+ - static void - aarch64_restore_callee_saves (machine_mode mode, - HOST_WIDE_INT start_offset, unsigned start, -@@ -2852,23 +3284,16 @@ aarch64_restore_callee_saves (machine_mode mode, - void - aarch64_expand_prologue (void) - { -- /* sub sp, sp, #<frame_size> -- stp {fp, lr}, [sp, #<frame_size> - 16] -- add fp, sp, #<frame_size> - hardfp_offset -- stp {cs_reg}, [fp, #-16] etc. -- -- sub sp, sp, <final_adjustment_if_any> -- */ -- HOST_WIDE_INT frame_size, offset; -- HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */ -- HOST_WIDE_INT hard_fp_offset; -- rtx_insn *insn; -- - aarch64_layout_frame (); - -- offset = frame_size = cfun->machine->frame.frame_size; -- hard_fp_offset = cfun->machine->frame.hard_fp_offset; -- fp_offset = frame_size - hard_fp_offset; -+ HOST_WIDE_INT frame_size = cfun->machine->frame.frame_size; -+ HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust; -+ HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; -+ HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust; -+ HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset; -+ unsigned reg1 = cfun->machine->frame.wb_candidate1; -+ unsigned reg2 = cfun->machine->frame.wb_candidate2; -+ rtx_insn *insn; - - if (flag_stack_usage_info) - current_function_static_stack_size = frame_size; -@@ -2885,129 +3310,28 @@ aarch64_expand_prologue (void) - aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size); - } - -- /* Store pairs and load pairs have a range only -512 to 504. */ -- if (offset >= 512) -- { -- /* When the frame has a large size, an initial decrease is done on -- the stack pointer to jump over the callee-allocated save area for -- register varargs, the local variable area and/or the callee-saved -- register area. This will allow the pre-index write-back -- store pair instructions to be used for setting up the stack frame -- efficiently. */ -- offset = hard_fp_offset; -- if (offset >= 512) -- offset = cfun->machine->frame.saved_regs_size; -+ aarch64_sub_sp (IP0_REGNUM, initial_adjust, true); - -- frame_size -= (offset + crtl->outgoing_args_size); -- fp_offset = 0; -+ if (callee_adjust != 0) -+ aarch64_push_regs (reg1, reg2, callee_adjust); - -- if (frame_size >= 0x1000000) -- { -- rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM); -- emit_move_insn (op0, GEN_INT (-frame_size)); -- insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0)); -- -- add_reg_note (insn, REG_CFA_ADJUST_CFA, -- gen_rtx_SET (stack_pointer_rtx, -- plus_constant (Pmode, stack_pointer_rtx, -- -frame_size))); -- RTX_FRAME_RELATED_P (insn) = 1; -- } -- else if (frame_size > 0) -- { -- int hi_ofs = frame_size & 0xfff000; -- int lo_ofs = frame_size & 0x000fff; -- -- if (hi_ofs) -- { -- insn = emit_insn (gen_add2_insn -- (stack_pointer_rtx, GEN_INT (-hi_ofs))); -- RTX_FRAME_RELATED_P (insn) = 1; -- } -- if (lo_ofs) -- { -- insn = emit_insn (gen_add2_insn -- (stack_pointer_rtx, GEN_INT (-lo_ofs))); -- RTX_FRAME_RELATED_P (insn) = 1; -- } -- } -- } -- else -- frame_size = -1; -- -- if (offset > 0) -+ if (frame_pointer_needed) - { -- bool skip_wb = false; -- -- if (frame_pointer_needed) -- { -- skip_wb = true; -- -- if (fp_offset) -- { -- insn = emit_insn (gen_add2_insn (stack_pointer_rtx, -- GEN_INT (-offset))); -- RTX_FRAME_RELATED_P (insn) = 1; -- -- aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM, -- R30_REGNUM, false); -- } -- else -- aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset); -- -- /* Set up frame pointer to point to the location of the -- previous frame pointer on the stack. */ -- insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx, -- stack_pointer_rtx, -- GEN_INT (fp_offset))); -- RTX_FRAME_RELATED_P (insn) = 1; -- emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); -- } -- else -- { -- unsigned reg1 = cfun->machine->frame.wb_candidate1; -- unsigned reg2 = cfun->machine->frame.wb_candidate2; -- -- if (fp_offset -- || reg1 == FIRST_PSEUDO_REGISTER -- || (reg2 == FIRST_PSEUDO_REGISTER -- && offset >= 256)) -- { -- insn = emit_insn (gen_add2_insn (stack_pointer_rtx, -- GEN_INT (-offset))); -- RTX_FRAME_RELATED_P (insn) = 1; -- } -- else -- { -- machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode; -- -- skip_wb = true; -- -- if (reg2 == FIRST_PSEUDO_REGISTER) -- aarch64_pushwb_single_reg (mode1, reg1, offset); -- else -- aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset); -- } -- } -- -- aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM, -- skip_wb); -- aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM, -- skip_wb); -+ if (callee_adjust == 0) -+ aarch64_save_callee_saves (DImode, callee_offset, R29_REGNUM, -+ R30_REGNUM, false); -+ insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx, -+ stack_pointer_rtx, -+ GEN_INT (callee_offset))); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); - } - -- /* when offset >= 512, -- sub sp, sp, #<outgoing_args_size> */ -- if (frame_size > -1) -- { -- if (crtl->outgoing_args_size > 0) -- { -- insn = emit_insn (gen_add2_insn -- (stack_pointer_rtx, -- GEN_INT (- crtl->outgoing_args_size))); -- RTX_FRAME_RELATED_P (insn) = 1; -- } -- } -+ aarch64_save_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM, -+ callee_adjust != 0 || frame_pointer_needed); -+ aarch64_save_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM, -+ callee_adjust != 0 || frame_pointer_needed); -+ aarch64_sub_sp (IP1_REGNUM, final_adjust, !frame_pointer_needed); - } - - /* Return TRUE if we can use a simple_return insn. -@@ -3030,151 +3354,80 @@ aarch64_use_return_insn_p (void) - return cfun->machine->frame.frame_size == 0; - } - --/* Generate the epilogue instructions for returning from a function. */ -+/* Generate the epilogue instructions for returning from a function. -+ This is almost exactly the reverse of the prolog sequence, except -+ that we need to insert barriers to avoid scheduling loads that read -+ from a deallocated stack, and we optimize the unwind records by -+ emitting them all together if possible. */ - void - aarch64_expand_epilogue (bool for_sibcall) - { -- HOST_WIDE_INT frame_size, offset; -- HOST_WIDE_INT fp_offset; -- HOST_WIDE_INT hard_fp_offset; -- rtx_insn *insn; -- /* We need to add memory barrier to prevent read from deallocated stack. */ -- bool need_barrier_p = (get_frame_size () != 0 -- || cfun->machine->frame.saved_varargs_size -- || crtl->calls_eh_return); -- - aarch64_layout_frame (); - -- offset = frame_size = cfun->machine->frame.frame_size; -- hard_fp_offset = cfun->machine->frame.hard_fp_offset; -- fp_offset = frame_size - hard_fp_offset; -+ HOST_WIDE_INT initial_adjust = cfun->machine->frame.initial_adjust; -+ HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust; -+ HOST_WIDE_INT final_adjust = cfun->machine->frame.final_adjust; -+ HOST_WIDE_INT callee_offset = cfun->machine->frame.callee_offset; -+ unsigned reg1 = cfun->machine->frame.wb_candidate1; -+ unsigned reg2 = cfun->machine->frame.wb_candidate2; -+ rtx cfi_ops = NULL; -+ rtx_insn *insn; - -- /* Store pairs and load pairs have a range only -512 to 504. */ -- if (offset >= 512) -- { -- offset = hard_fp_offset; -- if (offset >= 512) -- offset = cfun->machine->frame.saved_regs_size; -+ /* We need to add memory barrier to prevent read from deallocated stack. */ -+ bool need_barrier_p = (get_frame_size () -+ + cfun->machine->frame.saved_varargs_size) != 0; - -- frame_size -= (offset + crtl->outgoing_args_size); -- fp_offset = 0; -- if (!frame_pointer_needed && crtl->outgoing_args_size > 0) -- { -- insn = emit_insn (gen_add2_insn -- (stack_pointer_rtx, -- GEN_INT (crtl->outgoing_args_size))); -- RTX_FRAME_RELATED_P (insn) = 1; -- } -+ /* Emit a barrier to prevent loads from a deallocated stack. */ -+ if (final_adjust > crtl->outgoing_args_size || cfun->calls_alloca -+ || crtl->calls_eh_return) -+ { -+ emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); -+ need_barrier_p = false; - } -- else -- frame_size = -1; - -- /* If there were outgoing arguments or we've done dynamic stack -- allocation, then restore the stack pointer from the frame -- pointer. This is at most one insn and more efficient than using -- GCC's internal mechanism. */ -- if (frame_pointer_needed -- && (crtl->outgoing_args_size || cfun->calls_alloca)) -+ /* Restore the stack pointer from the frame pointer if it may not -+ be the same as the stack pointer. */ -+ if (frame_pointer_needed && (final_adjust || cfun->calls_alloca)) - { -- if (cfun->calls_alloca) -- emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); -- - insn = emit_insn (gen_add3_insn (stack_pointer_rtx, - hard_frame_pointer_rtx, -- GEN_INT (0))); -- offset = offset - fp_offset; -- } -- -- if (offset > 0) -- { -- unsigned reg1 = cfun->machine->frame.wb_candidate1; -- unsigned reg2 = cfun->machine->frame.wb_candidate2; -- bool skip_wb = true; -- rtx cfi_ops = NULL; -- -- if (frame_pointer_needed) -- fp_offset = 0; -- else if (fp_offset -- || reg1 == FIRST_PSEUDO_REGISTER -- || (reg2 == FIRST_PSEUDO_REGISTER -- && offset >= 256)) -- skip_wb = false; -- -- aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM, -- skip_wb, &cfi_ops); -- aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM, -- skip_wb, &cfi_ops); -- -- if (need_barrier_p) -- emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); -- -- if (skip_wb) -- { -- machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode; -- rtx rreg1 = gen_rtx_REG (mode1, reg1); -- -- cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops); -- if (reg2 == FIRST_PSEUDO_REGISTER) -- { -- rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset); -- mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem); -- mem = gen_rtx_MEM (mode1, mem); -- insn = emit_move_insn (rreg1, mem); -- } -- else -- { -- rtx rreg2 = gen_rtx_REG (mode1, reg2); -- -- cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops); -- insn = emit_insn (aarch64_gen_loadwb_pair -- (mode1, stack_pointer_rtx, rreg1, -- rreg2, offset)); -- } -- } -- else -- { -- insn = emit_insn (gen_add2_insn (stack_pointer_rtx, -- GEN_INT (offset))); -- } -- -- /* Reset the CFA to be SP + FRAME_SIZE. */ -- rtx new_cfa = stack_pointer_rtx; -- if (frame_size > 0) -- new_cfa = plus_constant (Pmode, new_cfa, frame_size); -- cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops); -- REG_NOTES (insn) = cfi_ops; -- RTX_FRAME_RELATED_P (insn) = 1; -+ GEN_INT (-callee_offset))); -+ /* If writeback is used when restoring callee-saves, the CFA -+ is restored on the instruction doing the writeback. */ -+ RTX_FRAME_RELATED_P (insn) = callee_adjust == 0; - } -+ else -+ aarch64_add_sp (IP1_REGNUM, final_adjust, df_regs_ever_live_p (IP1_REGNUM)); - -- if (frame_size > 0) -- { -- if (need_barrier_p) -- emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); -+ aarch64_restore_callee_saves (DImode, callee_offset, R0_REGNUM, R30_REGNUM, -+ callee_adjust != 0, &cfi_ops); -+ aarch64_restore_callee_saves (DFmode, callee_offset, V0_REGNUM, V31_REGNUM, -+ callee_adjust != 0, &cfi_ops); - -- if (frame_size >= 0x1000000) -- { -- rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM); -- emit_move_insn (op0, GEN_INT (frame_size)); -- insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0)); -- } -- else -- { -- int hi_ofs = frame_size & 0xfff000; -- int lo_ofs = frame_size & 0x000fff; -+ if (need_barrier_p) -+ emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); - -- if (hi_ofs && lo_ofs) -- { -- insn = emit_insn (gen_add2_insn -- (stack_pointer_rtx, GEN_INT (hi_ofs))); -- RTX_FRAME_RELATED_P (insn) = 1; -- frame_size = lo_ofs; -- } -- insn = emit_insn (gen_add2_insn -- (stack_pointer_rtx, GEN_INT (frame_size))); -- } -+ if (callee_adjust != 0) -+ aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops); -+ -+ if (callee_adjust != 0 || initial_adjust > 65536) -+ { -+ /* Emit delayed restores and set the CFA to be SP + initial_adjust. */ -+ insn = get_last_insn (); -+ rtx new_cfa = plus_constant (Pmode, stack_pointer_rtx, initial_adjust); -+ REG_NOTES (insn) = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops); -+ RTX_FRAME_RELATED_P (insn) = 1; -+ cfi_ops = NULL; -+ } -+ -+ aarch64_add_sp (IP0_REGNUM, initial_adjust, df_regs_ever_live_p (IP0_REGNUM)); - -- /* Reset the CFA to be SP + 0. */ -- add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx); -+ if (cfi_ops) -+ { -+ /* Emit delayed restores and reset the CFA to be SP. */ -+ insn = get_last_insn (); -+ cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, stack_pointer_rtx, cfi_ops); -+ REG_NOTES (insn) = cfi_ops; - RTX_FRAME_RELATED_P (insn) = 1; - } - -@@ -3230,122 +3483,6 @@ aarch64_eh_return_handler_rtx (void) - return tmp; - } - --/* Possibly output code to build up a constant in a register. For -- the benefit of the costs infrastructure, returns the number of -- instructions which would be emitted. GENERATE inhibits or -- enables code generation. */ -- --static int --aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate) --{ -- int insns = 0; -- -- if (aarch64_bitmask_imm (val, DImode)) -- { -- if (generate) -- emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val)); -- insns = 1; -- } -- else -- { -- int i; -- int ncount = 0; -- int zcount = 0; -- HOST_WIDE_INT valp = val >> 16; -- HOST_WIDE_INT valm; -- HOST_WIDE_INT tval; -- -- for (i = 16; i < 64; i += 16) -- { -- valm = (valp & 0xffff); -- -- if (valm != 0) -- ++ zcount; -- -- if (valm != 0xffff) -- ++ ncount; -- -- valp >>= 16; -- } -- -- /* zcount contains the number of additional MOVK instructions -- required if the constant is built up with an initial MOVZ instruction, -- while ncount is the number of MOVK instructions required if starting -- with a MOVN instruction. Choose the sequence that yields the fewest -- number of instructions, preferring MOVZ instructions when they are both -- the same. */ -- if (ncount < zcount) -- { -- if (generate) -- emit_move_insn (gen_rtx_REG (Pmode, regnum), -- GEN_INT (val | ~(HOST_WIDE_INT) 0xffff)); -- tval = 0xffff; -- insns++; -- } -- else -- { -- if (generate) -- emit_move_insn (gen_rtx_REG (Pmode, regnum), -- GEN_INT (val & 0xffff)); -- tval = 0; -- insns++; -- } -- -- val >>= 16; -- -- for (i = 16; i < 64; i += 16) -- { -- if ((val & 0xffff) != tval) -- { -- if (generate) -- emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum), -- GEN_INT (i), -- GEN_INT (val & 0xffff))); -- insns++; -- } -- val >>= 16; -- } -- } -- return insns; --} -- --static void --aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta) --{ -- HOST_WIDE_INT mdelta = delta; -- rtx this_rtx = gen_rtx_REG (Pmode, regnum); -- rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg); -- -- if (mdelta < 0) -- mdelta = -mdelta; -- -- if (mdelta >= 4096 * 4096) -- { -- (void) aarch64_build_constant (scratchreg, delta, true); -- emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx)); -- } -- else if (mdelta > 0) -- { -- if (mdelta >= 4096) -- { -- emit_insn (gen_rtx_SET (scratch_rtx, GEN_INT (mdelta / 4096))); -- rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12)); -- if (delta < 0) -- emit_insn (gen_rtx_SET (this_rtx, -- gen_rtx_MINUS (Pmode, this_rtx, shift))); -- else -- emit_insn (gen_rtx_SET (this_rtx, -- gen_rtx_PLUS (Pmode, this_rtx, shift))); -- } -- if (mdelta % 4096 != 0) -- { -- scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096)); -- emit_insn (gen_rtx_SET (this_rtx, -- gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx))); -- } -- } --} -- - /* Output code to add DELTA to the first argument, and then jump - to FUNCTION. Used for C++ multiple inheritance. */ - static void -@@ -3366,7 +3503,7 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, - emit_note (NOTE_INSN_PROLOGUE_END); - - if (vcall_offset == 0) -- aarch64_add_constant (this_regno, IP1_REGNUM, delta); -+ aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta); - else - { - gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0); -@@ -3382,7 +3519,7 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, - addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx, - plus_constant (Pmode, this_rtx, delta)); - else -- aarch64_add_constant (this_regno, IP1_REGNUM, delta); -+ aarch64_add_constant (Pmode, this_regno, IP1_REGNUM, delta); - } - - if (Pmode == ptr_mode) -@@ -3396,7 +3533,8 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, - addr = plus_constant (Pmode, temp0, vcall_offset); - else - { -- (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true); -+ aarch64_internal_mov_immediate (temp1, GEN_INT (vcall_offset), true, -+ Pmode); - addr = gen_rtx_PLUS (Pmode, temp0, temp1); - } - -@@ -3575,7 +3713,12 @@ aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) - return aarch64_tls_referenced_p (x); - } - --/* Implement TARGET_CASE_VALUES_THRESHOLD. */ -+/* Implement TARGET_CASE_VALUES_THRESHOLD. -+ The expansion for a table switch is quite expensive due to the number -+ of instructions, the table lookup and hard to predict indirect jump. -+ When optimizing for speed, and -O3 enabled, use the per-core tuning if -+ set, otherwise use tables for > 16 cases as a tradeoff between size and -+ performance. When optimizing for size, use the default setting. */ - - static unsigned int - aarch64_case_values_threshold (void) -@@ -3586,7 +3729,7 @@ aarch64_case_values_threshold (void) - && selected_cpu->tune->max_case_values != 0) - return selected_cpu->tune->max_case_values; - else -- return default_case_values_threshold (); -+ return optimize_size ? default_case_values_threshold () : 17; - } - - /* Return true if register REGNO is a valid index register. -@@ -3921,9 +4064,11 @@ aarch64_classify_address (struct aarch64_address_info *info, - X,X: 7-bit signed scaled offset - Q: 9-bit signed offset - We conservatively require an offset representable in either mode. -- */ -+ When performing the check for pairs of X registers i.e. LDP/STP -+ pass down DImode since that is the natural size of the LDP/STP -+ instruction memory accesses. */ - if (mode == TImode || mode == TFmode) -- return (aarch64_offset_7bit_signed_scaled_p (mode, offset) -+ return (aarch64_offset_7bit_signed_scaled_p (DImode, offset) - && offset_9bit_signed_unscaled_p (mode, offset)); - - /* A 7bit offset check because OImode will emit a ldp/stp -@@ -4031,7 +4176,7 @@ aarch64_classify_address (struct aarch64_address_info *info, - return ((GET_CODE (sym) == LABEL_REF - || (GET_CODE (sym) == SYMBOL_REF - && CONSTANT_POOL_ADDRESS_P (sym) -- && !aarch64_nopcrelative_literal_loads))); -+ && aarch64_pcrelative_literal_loads))); - } - return false; - -@@ -4125,6 +4270,24 @@ aarch64_legitimate_address_p (machine_mode mode, rtx x, - return aarch64_classify_address (&addr, x, mode, outer_code, strict_p); - } - -+/* Split an out-of-range address displacement into a base and offset. -+ Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise -+ to increase opportunities for sharing the base address of different sizes. -+ For TI/TFmode and unaligned accesses use a 256-byte range. */ -+static bool -+aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode) -+{ -+ HOST_WIDE_INT mask = GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3fff; -+ -+ if (mode == TImode || mode == TFmode || -+ (INTVAL (*disp) & (GET_MODE_SIZE (mode) - 1)) != 0) -+ mask = 0xff; -+ -+ *off = GEN_INT (INTVAL (*disp) & ~mask); -+ *disp = GEN_INT (INTVAL (*disp) & mask); -+ return true; -+} -+ - /* Return TRUE if rtx X is immediate constant 0.0 */ - bool - aarch64_float_const_zero_rtx_p (rtx x) -@@ -4198,6 +4361,14 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) - && (GET_MODE (x) == HImode || GET_MODE (x) == QImode)) - return CC_NZmode; - -+ /* Similarly, comparisons of zero_extends from shorter modes can -+ be performed using an ANDS with an immediate mask. */ -+ if (y == const0_rtx && GET_CODE (x) == ZERO_EXTEND -+ && (GET_MODE (x) == SImode || GET_MODE (x) == DImode) -+ && (GET_MODE (XEXP (x, 0)) == HImode || GET_MODE (XEXP (x, 0)) == QImode) -+ && (code == EQ || code == NE)) -+ return CC_NZmode; -+ - if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode) - && y == const0_rtx - && (code == EQ || code == NE || code == LT || code == GE) -@@ -4225,14 +4396,6 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y) - && GET_CODE (x) == NEG) - return CC_Zmode; - -- /* A compare of a mode narrower than SI mode against zero can be done -- by extending the value in the comparison. */ -- if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode) -- && y == const0_rtx) -- /* Only use sign-extension if we really need it. */ -- return ((code == GT || code == GE || code == LE || code == LT) -- ? CC_SESWPmode : CC_ZESWPmode); -- - /* A test for unsigned overflow. */ - if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode) - && code == NE -@@ -4301,8 +4464,6 @@ aarch64_get_condition_code_1 (enum machine_mode mode, enum rtx_code comp_code) - break; - - case CC_SWPmode: -- case CC_ZESWPmode: -- case CC_SESWPmode: - switch (comp_code) - { - case NE: return AARCH64_NE; -@@ -4957,7 +5118,7 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode) - if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))) - { - rtx base = XEXP (x, 0); -- rtx offset_rtx XEXP (x, 1); -+ rtx offset_rtx = XEXP (x, 1); - HOST_WIDE_INT offset = INTVAL (offset_rtx); - - if (GET_CODE (base) == PLUS) -@@ -5015,120 +5176,6 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x */, machine_mode mode) - return x; - } - --/* Try a machine-dependent way of reloading an illegitimate address -- operand. If we find one, push the reload and return the new rtx. */ -- --rtx --aarch64_legitimize_reload_address (rtx *x_p, -- machine_mode mode, -- int opnum, int type, -- int ind_levels ATTRIBUTE_UNUSED) --{ -- rtx x = *x_p; -- -- /* Do not allow mem (plus (reg, const)) if vector struct mode. */ -- if (aarch64_vect_struct_mode_p (mode) -- && GET_CODE (x) == PLUS -- && REG_P (XEXP (x, 0)) -- && CONST_INT_P (XEXP (x, 1))) -- { -- rtx orig_rtx = x; -- x = copy_rtx (x); -- push_reload (orig_rtx, NULL_RTX, x_p, NULL, -- BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, -- opnum, (enum reload_type) type); -- return x; -- } -- -- /* We must recognize output that we have already generated ourselves. */ -- if (GET_CODE (x) == PLUS -- && GET_CODE (XEXP (x, 0)) == PLUS -- && REG_P (XEXP (XEXP (x, 0), 0)) -- && CONST_INT_P (XEXP (XEXP (x, 0), 1)) -- && CONST_INT_P (XEXP (x, 1))) -- { -- push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, -- BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, -- opnum, (enum reload_type) type); -- return x; -- } -- -- /* We wish to handle large displacements off a base register by splitting -- the addend across an add and the mem insn. This can cut the number of -- extra insns needed from 3 to 1. It is only useful for load/store of a -- single register with 12 bit offset field. */ -- if (GET_CODE (x) == PLUS -- && REG_P (XEXP (x, 0)) -- && CONST_INT_P (XEXP (x, 1)) -- && HARD_REGISTER_P (XEXP (x, 0)) -- && mode != TImode -- && mode != TFmode -- && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true)) -- { -- HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); -- HOST_WIDE_INT low = val & 0xfff; -- HOST_WIDE_INT high = val - low; -- HOST_WIDE_INT offs; -- rtx cst; -- machine_mode xmode = GET_MODE (x); -- -- /* In ILP32, xmode can be either DImode or SImode. */ -- gcc_assert (xmode == DImode || xmode == SImode); -- -- /* Reload non-zero BLKmode offsets. This is because we cannot ascertain -- BLKmode alignment. */ -- if (GET_MODE_SIZE (mode) == 0) -- return NULL_RTX; -- -- offs = low % GET_MODE_SIZE (mode); -- -- /* Align misaligned offset by adjusting high part to compensate. */ -- if (offs != 0) -- { -- if (aarch64_uimm12_shift (high + offs)) -- { -- /* Align down. */ -- low = low - offs; -- high = high + offs; -- } -- else -- { -- /* Align up. */ -- offs = GET_MODE_SIZE (mode) - offs; -- low = low + offs; -- high = high + (low & 0x1000) - offs; -- low &= 0xfff; -- } -- } -- -- /* Check for overflow. */ -- if (high + low != val) -- return NULL_RTX; -- -- cst = GEN_INT (high); -- if (!aarch64_uimm12_shift (high)) -- cst = force_const_mem (xmode, cst); -- -- /* Reload high part into base reg, leaving the low part -- in the mem instruction. -- Note that replacing this gen_rtx_PLUS with plus_constant is -- wrong in this case because we rely on the -- (plus (plus reg c1) c2) structure being preserved so that -- XEXP (*p, 0) in push_reload below uses the correct term. */ -- x = gen_rtx_PLUS (xmode, -- gen_rtx_PLUS (xmode, XEXP (x, 0), cst), -- GEN_INT (low)); -- -- push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, -- BASE_REG_CLASS, xmode, VOIDmode, 0, 0, -- opnum, (enum reload_type) type); -- return x; -- } -- -- return NULL_RTX; --} -- -- - /* Return the reload icode required for a constant pool in mode. */ - static enum insn_code - aarch64_constant_pool_reload_icode (machine_mode mode) -@@ -5186,7 +5233,7 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x, - if (MEM_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x) - && (SCALAR_FLOAT_MODE_P (GET_MODE (x)) - || targetm.vector_mode_supported_p (GET_MODE (x))) -- && aarch64_nopcrelative_literal_loads) -+ && !aarch64_pcrelative_literal_loads) - { - sri->icode = aarch64_constant_pool_reload_icode (mode); - return NO_REGS; -@@ -5260,18 +5307,18 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to) - if (to == HARD_FRAME_POINTER_REGNUM) - { - if (from == ARG_POINTER_REGNUM) -- return cfun->machine->frame.frame_size - crtl->outgoing_args_size; -+ return cfun->machine->frame.hard_fp_offset; - - if (from == FRAME_POINTER_REGNUM) -- return (cfun->machine->frame.hard_fp_offset -- - cfun->machine->frame.saved_varargs_size); -+ return cfun->machine->frame.hard_fp_offset -+ - cfun->machine->frame.locals_offset; - } - - if (to == STACK_POINTER_REGNUM) - { - if (from == FRAME_POINTER_REGNUM) -- return (cfun->machine->frame.frame_size -- - cfun->machine->frame.saved_varargs_size); -+ return cfun->machine->frame.frame_size -+ - cfun->machine->frame.locals_offset; - } - - return cfun->machine->frame.frame_size; -@@ -5418,7 +5465,10 @@ aarch64_elf_asm_constructor (rtx symbol, int priority) - else - { - section *s; -- char buf[18]; -+ /* While priority is known to be in range [0, 65535], so 18 bytes -+ would be enough, the compiler might not know that. To avoid -+ -Wformat-truncation false positive, use a larger size. */ -+ char buf[23]; - snprintf (buf, sizeof (buf), ".init_array.%.5u", priority); - s = get_section (buf, SECTION_WRITE, NULL); - switch_to_section (s); -@@ -5435,7 +5485,10 @@ aarch64_elf_asm_destructor (rtx symbol, int priority) - else - { - section *s; -- char buf[18]; -+ /* While priority is known to be in range [0, 65535], so 18 bytes -+ would be enough, the compiler might not know that. To avoid -+ -Wformat-truncation false positive, use a larger size. */ -+ char buf[23]; - snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority); - s = get_section (buf, SECTION_WRITE, NULL); - switch_to_section (s); -@@ -5520,7 +5573,7 @@ aarch64_uxt_size (int shift, HOST_WIDE_INT mask) - static inline bool - aarch64_can_use_per_function_literal_pools_p (void) - { -- return (!aarch64_nopcrelative_literal_loads -+ return (aarch64_pcrelative_literal_loads - || aarch64_cmodel == AARCH64_CMODEL_LARGE); - } - -@@ -6139,6 +6192,19 @@ aarch64_extend_bitfield_pattern_p (rtx x) - return op; - } - -+/* Return true if the mask and a shift amount from an RTX of the form -+ (x << SHFT_AMNT) & MASK are valid to combine into a UBFIZ instruction of -+ mode MODE. See the *andim_ashift<mode>_bfiz pattern. */ -+ -+bool -+aarch64_mask_and_shift_for_ubfiz_p (machine_mode mode, rtx mask, rtx shft_amnt) -+{ -+ return CONST_INT_P (mask) && CONST_INT_P (shft_amnt) -+ && INTVAL (shft_amnt) < GET_MODE_BITSIZE (mode) -+ && exact_log2 ((INTVAL (mask) >> INTVAL (shft_amnt)) + 1) >= 0 -+ && (INTVAL (mask) & ((1 << INTVAL (shft_amnt)) - 1)) == 0; -+} -+ - /* Calculate the cost of calculating X, storing it in *COST. Result - is true if the total cost of the operation has now been calculated. */ - static bool -@@ -6404,10 +6470,6 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED, - /* TODO: A write to the CC flags possibly costs extra, this - needs encoding in the cost tables. */ - -- /* CC_ZESWPmode supports zero extend for free. */ -- if (mode == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND) -- op0 = XEXP (op0, 0); -- - mode = GET_MODE (op0); - /* ANDS. */ - if (GET_CODE (op0) == AND) -@@ -6717,17 +6779,31 @@ cost_plus: - - if (GET_MODE_CLASS (mode) == MODE_INT) - { -- /* We possibly get the immediate for free, this is not -- modelled. */ -- if (CONST_INT_P (op1) -- && aarch64_bitmask_imm (INTVAL (op1), mode)) -+ if (CONST_INT_P (op1)) - { -- *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed); -+ /* We have a mask + shift version of a UBFIZ -+ i.e. the *andim_ashift<mode>_bfiz pattern. */ -+ if (GET_CODE (op0) == ASHIFT -+ && aarch64_mask_and_shift_for_ubfiz_p (mode, op1, -+ XEXP (op0, 1))) -+ { -+ *cost += rtx_cost (XEXP (op0, 0), mode, -+ (enum rtx_code) code, 0, speed); -+ if (speed) -+ *cost += extra_cost->alu.bfx; - -- if (speed) -- *cost += extra_cost->alu.logical; -+ return true; -+ } -+ else if (aarch64_bitmask_imm (INTVAL (op1), mode)) -+ { -+ /* We possibly get the immediate for free, this is not -+ modelled. */ -+ *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed); -+ if (speed) -+ *cost += extra_cost->alu.logical; - -- return true; -+ return true; -+ } - } - else - { -@@ -6831,11 +6907,12 @@ cost_plus: - { - int op_cost = rtx_cost (op0, VOIDmode, ZERO_EXTEND, 0, speed); - -- if (!op_cost && speed) -- /* MOV. */ -- *cost += extra_cost->alu.extend; -- else -- /* Free, the cost is that of the SI mode operation. */ -+ /* If OP_COST is non-zero, then the cost of the zero extend -+ is effectively the cost of the inner operation. Otherwise -+ we have a MOV instruction and we take the cost from the MOV -+ itself. This is true independently of whether we are -+ optimizing for space or time. */ -+ if (op_cost) - *cost = op_cost; - - return true; -@@ -6865,8 +6942,8 @@ cost_plus: - } - else - { -- /* UXTB/UXTH. */ -- *cost += extra_cost->alu.extend; -+ /* We generate an AND instead of UXTB/UXTH. */ -+ *cost += extra_cost->alu.logical; - } - } - return false; -@@ -7349,7 +7426,8 @@ cost_plus: - break; - } - -- if (dump_file && (dump_flags & TDF_DETAILS)) -+ if (dump_file -+ && flag_aarch64_verbose_cost) - fprintf (dump_file, - "\nFailed to cost RTX. Assuming default cost.\n"); - -@@ -7365,7 +7443,8 @@ aarch64_rtx_costs_wrapper (rtx x, machine_mode mode, int outer, - { - bool result = aarch64_rtx_costs (x, mode, outer, param, cost, speed); - -- if (dump_file && (dump_flags & TDF_DETAILS)) -+ if (dump_file -+ && flag_aarch64_verbose_cost) - { - print_rtl_single (dump_file, x); - fprintf (dump_file, "\n%s cost: %d (%s)\n", -@@ -7445,12 +7524,12 @@ aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, - to optimize 1.0/sqrt. */ - - static bool --use_rsqrt_p (void) -+use_rsqrt_p (machine_mode mode) - { - return (!flag_trapping_math - && flag_unsafe_math_optimizations -- && ((aarch64_tune_params.extra_tuning_flags -- & AARCH64_EXTRA_TUNE_APPROX_RSQRT) -+ && ((aarch64_tune_params.approx_modes->recip_sqrt -+ & AARCH64_APPROX_MODE (mode)) - || flag_mrecip_low_precision_sqrt)); - } - -@@ -7460,89 +7539,225 @@ use_rsqrt_p (void) - static tree - aarch64_builtin_reciprocal (tree fndecl) - { -- if (!use_rsqrt_p ()) -+ machine_mode mode = TYPE_MODE (TREE_TYPE (fndecl)); -+ -+ if (!use_rsqrt_p (mode)) - return NULL_TREE; - return aarch64_builtin_rsqrt (DECL_FUNCTION_CODE (fndecl)); - } - - typedef rtx (*rsqrte_type) (rtx, rtx); - --/* Select reciprocal square root initial estimate -- insn depending on machine mode. */ -+/* Select reciprocal square root initial estimate insn depending on machine -+ mode. */ - --rsqrte_type -+static rsqrte_type - get_rsqrte_type (machine_mode mode) - { - switch (mode) - { -- case DFmode: return gen_aarch64_rsqrte_df2; -- case SFmode: return gen_aarch64_rsqrte_sf2; -- case V2DFmode: return gen_aarch64_rsqrte_v2df2; -- case V2SFmode: return gen_aarch64_rsqrte_v2sf2; -- case V4SFmode: return gen_aarch64_rsqrte_v4sf2; -+ case DFmode: return gen_aarch64_rsqrtedf; -+ case SFmode: return gen_aarch64_rsqrtesf; -+ case V2DFmode: return gen_aarch64_rsqrtev2df; -+ case V2SFmode: return gen_aarch64_rsqrtev2sf; -+ case V4SFmode: return gen_aarch64_rsqrtev4sf; - default: gcc_unreachable (); - } - } - - typedef rtx (*rsqrts_type) (rtx, rtx, rtx); - --/* Select reciprocal square root Newton-Raphson step -- insn depending on machine mode. */ -+/* Select reciprocal square root series step insn depending on machine mode. */ - --rsqrts_type -+static rsqrts_type - get_rsqrts_type (machine_mode mode) - { - switch (mode) - { -- case DFmode: return gen_aarch64_rsqrts_df3; -- case SFmode: return gen_aarch64_rsqrts_sf3; -- case V2DFmode: return gen_aarch64_rsqrts_v2df3; -- case V2SFmode: return gen_aarch64_rsqrts_v2sf3; -- case V4SFmode: return gen_aarch64_rsqrts_v4sf3; -+ case DFmode: return gen_aarch64_rsqrtsdf; -+ case SFmode: return gen_aarch64_rsqrtssf; -+ case V2DFmode: return gen_aarch64_rsqrtsv2df; -+ case V2SFmode: return gen_aarch64_rsqrtsv2sf; -+ case V4SFmode: return gen_aarch64_rsqrtsv4sf; - default: gcc_unreachable (); - } - } - --/* Emit instruction sequence to compute the reciprocal square root using the -- Newton-Raphson series. Iterate over the series twice for SF -- and thrice for DF. */ -+/* Emit instruction sequence to compute either the approximate square root -+ or its approximate reciprocal, depending on the flag RECP, and return -+ whether the sequence was emitted or not. */ - --void --aarch64_emit_approx_rsqrt (rtx dst, rtx src) -+bool -+aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp) - { -- machine_mode mode = GET_MODE (src); -- gcc_assert ( -- mode == SFmode || mode == V2SFmode || mode == V4SFmode -- || mode == DFmode || mode == V2DFmode); -+ machine_mode mode = GET_MODE (dst); -+ -+ if (GET_MODE_INNER (mode) == HFmode) -+ return false; - -- rtx xsrc = gen_reg_rtx (mode); -- emit_move_insn (xsrc, src); -- rtx x0 = gen_reg_rtx (mode); -+ machine_mode mmsk = mode_for_vector -+ (int_mode_for_mode (GET_MODE_INNER (mode)), -+ GET_MODE_NUNITS (mode)); -+ bool use_approx_sqrt_p = (!recp -+ && (flag_mlow_precision_sqrt -+ || (aarch64_tune_params.approx_modes->sqrt -+ & AARCH64_APPROX_MODE (mode)))); -+ bool use_approx_rsqrt_p = (recp -+ && (flag_mrecip_low_precision_sqrt -+ || (aarch64_tune_params.approx_modes->recip_sqrt -+ & AARCH64_APPROX_MODE (mode)))); -+ -+ if (!flag_finite_math_only -+ || flag_trapping_math -+ || !flag_unsafe_math_optimizations -+ || !(use_approx_sqrt_p || use_approx_rsqrt_p) -+ || optimize_function_for_size_p (cfun)) -+ return false; - -- emit_insn ((*get_rsqrte_type (mode)) (x0, xsrc)); -+ rtx xmsk = gen_reg_rtx (mmsk); -+ if (!recp) -+ /* When calculating the approximate square root, compare the argument with -+ 0.0 and create a mask. */ -+ emit_insn (gen_rtx_SET (xmsk, gen_rtx_NEG (mmsk, gen_rtx_EQ (mmsk, src, -+ CONST0_RTX (mode))))); - -- bool double_mode = (mode == DFmode || mode == V2DFmode); -+ /* Estimate the approximate reciprocal square root. */ -+ rtx xdst = gen_reg_rtx (mode); -+ emit_insn ((*get_rsqrte_type (mode)) (xdst, src)); - -- int iterations = double_mode ? 3 : 2; -+ /* Iterate over the series twice for SF and thrice for DF. */ -+ int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2; - -- /* Optionally iterate over the series one less time than otherwise. */ -- if (flag_mrecip_low_precision_sqrt) -+ /* Optionally iterate over the series once less for faster performance -+ while sacrificing the accuracy. */ -+ if ((recp && flag_mrecip_low_precision_sqrt) -+ || (!recp && flag_mlow_precision_sqrt)) - iterations--; - -- for (int i = 0; i < iterations; ++i) -+ /* Iterate over the series to calculate the approximate reciprocal square -+ root. */ -+ rtx x1 = gen_reg_rtx (mode); -+ while (iterations--) - { -- rtx x1 = gen_reg_rtx (mode); - rtx x2 = gen_reg_rtx (mode); -- rtx x3 = gen_reg_rtx (mode); -- emit_set_insn (x2, gen_rtx_MULT (mode, x0, x0)); -+ emit_set_insn (x2, gen_rtx_MULT (mode, xdst, xdst)); -+ -+ emit_insn ((*get_rsqrts_type (mode)) (x1, src, x2)); -+ -+ if (iterations > 0) -+ emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, x1)); -+ } -+ -+ if (!recp) -+ { -+ /* Qualify the approximate reciprocal square root when the argument is -+ 0.0 by squashing the intermediary result to 0.0. */ -+ rtx xtmp = gen_reg_rtx (mmsk); -+ emit_set_insn (xtmp, gen_rtx_AND (mmsk, gen_rtx_NOT (mmsk, xmsk), -+ gen_rtx_SUBREG (mmsk, xdst, 0))); -+ emit_move_insn (xdst, gen_rtx_SUBREG (mode, xtmp, 0)); -+ -+ /* Calculate the approximate square root. */ -+ emit_set_insn (xdst, gen_rtx_MULT (mode, xdst, src)); -+ } -+ -+ /* Finalize the approximation. */ -+ emit_set_insn (dst, gen_rtx_MULT (mode, xdst, x1)); -+ -+ return true; -+} -+ -+typedef rtx (*recpe_type) (rtx, rtx); -+ -+/* Select reciprocal initial estimate insn depending on machine mode. */ -+ -+static recpe_type -+get_recpe_type (machine_mode mode) -+{ -+ switch (mode) -+ { -+ case SFmode: return (gen_aarch64_frecpesf); -+ case V2SFmode: return (gen_aarch64_frecpev2sf); -+ case V4SFmode: return (gen_aarch64_frecpev4sf); -+ case DFmode: return (gen_aarch64_frecpedf); -+ case V2DFmode: return (gen_aarch64_frecpev2df); -+ default: gcc_unreachable (); -+ } -+} -+ -+typedef rtx (*recps_type) (rtx, rtx, rtx); -+ -+/* Select reciprocal series step insn depending on machine mode. */ -+ -+static recps_type -+get_recps_type (machine_mode mode) -+{ -+ switch (mode) -+ { -+ case SFmode: return (gen_aarch64_frecpssf); -+ case V2SFmode: return (gen_aarch64_frecpsv2sf); -+ case V4SFmode: return (gen_aarch64_frecpsv4sf); -+ case DFmode: return (gen_aarch64_frecpsdf); -+ case V2DFmode: return (gen_aarch64_frecpsv2df); -+ default: gcc_unreachable (); -+ } -+} -+ -+/* Emit the instruction sequence to compute the approximation for the division -+ of NUM by DEN in QUO and return whether the sequence was emitted or not. */ -+ -+bool -+aarch64_emit_approx_div (rtx quo, rtx num, rtx den) -+{ -+ machine_mode mode = GET_MODE (quo); - -- emit_insn ((*get_rsqrts_type (mode)) (x3, xsrc, x2)); -+ if (GET_MODE_INNER (mode) == HFmode) -+ return false; -+ -+ bool use_approx_division_p = (flag_mlow_precision_div -+ || (aarch64_tune_params.approx_modes->division -+ & AARCH64_APPROX_MODE (mode))); -+ -+ if (!flag_finite_math_only -+ || flag_trapping_math -+ || !flag_unsafe_math_optimizations -+ || optimize_function_for_size_p (cfun) -+ || !use_approx_division_p) -+ return false; -+ -+ /* Estimate the approximate reciprocal. */ -+ rtx xrcp = gen_reg_rtx (mode); -+ emit_insn ((*get_recpe_type (mode)) (xrcp, den)); -+ -+ /* Iterate over the series twice for SF and thrice for DF. */ -+ int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2; -+ -+ /* Optionally iterate over the series once less for faster performance, -+ while sacrificing the accuracy. */ -+ if (flag_mlow_precision_div) -+ iterations--; - -- emit_set_insn (x1, gen_rtx_MULT (mode, x0, x3)); -- x0 = x1; -+ /* Iterate over the series to calculate the approximate reciprocal. */ -+ rtx xtmp = gen_reg_rtx (mode); -+ while (iterations--) -+ { -+ emit_insn ((*get_recps_type (mode)) (xtmp, xrcp, den)); -+ -+ if (iterations > 0) -+ emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xtmp)); -+ } -+ -+ if (num != CONST1_RTX (mode)) -+ { -+ /* As the approximate reciprocal of DEN is already calculated, only -+ calculate the approximate division when NUM is not 1.0. */ -+ rtx xnum = force_reg (mode, num); -+ emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xnum)); - } - -- emit_move_insn (dst, x0); -+ /* Finalize the approximation. */ -+ emit_set_insn (quo, gen_rtx_MULT (mode, xrcp, xtmp)); -+ return true; - } - - /* Return the number of instructions that can be issued per cycle. */ -@@ -8046,32 +8261,37 @@ aarch64_override_options_after_change_1 (struct gcc_options *opts) - opts->x_align_functions = aarch64_tune_params.function_align; - } - -- /* If nopcrelative_literal_loads is set on the command line, this -+ /* We default to no pc-relative literal loads. */ -+ -+ aarch64_pcrelative_literal_loads = false; -+ -+ /* If -mpc-relative-literal-loads is set on the command line, this - implies that the user asked for PC relative literal loads. */ -- if (opts->x_nopcrelative_literal_loads == 1) -- aarch64_nopcrelative_literal_loads = false; -+ if (opts->x_pcrelative_literal_loads == 1) -+ aarch64_pcrelative_literal_loads = true; - -- /* If it is not set on the command line, we default to no pc -- relative literal loads, unless the workaround for Cortex-A53 -- erratum 843419 is in effect. */ - /* This is PR70113. When building the Linux kernel with - CONFIG_ARM64_ERRATUM_843419, support for relocations - R_AARCH64_ADR_PREL_PG_HI21 and R_AARCH64_ADR_PREL_PG_HI21_NC is - removed from the kernel to avoid loading objects with possibly -- offending sequences. With nopcrelative_literal_loads, we would -+ offending sequences. Without -mpc-relative-literal-loads we would - generate such relocations, preventing the kernel build from - succeeding. */ -- if (opts->x_nopcrelative_literal_loads == 2 -- && !TARGET_FIX_ERR_A53_843419) -- aarch64_nopcrelative_literal_loads = true; -+ if (opts->x_pcrelative_literal_loads == 2 -+ && TARGET_FIX_ERR_A53_843419) -+ aarch64_pcrelative_literal_loads = true; - -- /* In the tiny memory model it makes no sense -- to disallow non PC relative literal pool loads -- as many other things will break anyway. */ -- if (opts->x_nopcrelative_literal_loads -- && (aarch64_cmodel == AARCH64_CMODEL_TINY -- || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)) -- aarch64_nopcrelative_literal_loads = false; -+ /* In the tiny memory model it makes no sense to disallow PC relative -+ literal pool loads. */ -+ if (aarch64_cmodel == AARCH64_CMODEL_TINY -+ || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC) -+ aarch64_pcrelative_literal_loads = true; -+ -+ /* When enabling the lower precision Newton series for the square root, also -+ enable it for the reciprocal square root, since the latter is an -+ intermediary step for the former. */ -+ if (flag_mlow_precision_sqrt) -+ flag_mrecip_low_precision_sqrt = true; - } - - /* 'Unpack' up the internal tuning structs and update the options -@@ -8374,9 +8594,6 @@ aarch64_override_options (void) - while processing functions with potential target attributes. */ - target_option_default_node = target_option_current_node - = build_target_option_node (&global_options); -- -- aarch64_register_fma_steering (); -- - } - - /* Implement targetm.override_options_after_change. */ -@@ -9279,15 +9496,18 @@ aarch64_classify_symbol (rtx x, rtx offset) - switch (aarch64_cmodel) - { - case AARCH64_CMODEL_TINY: -- /* When we retreive symbol + offset address, we have to make sure -+ /* When we retrieve symbol + offset address, we have to make sure - the offset does not cause overflow of the final address. But - we have no way of knowing the address of symbol at compile time - so we can't accurately say if the distance between the PC and - symbol + offset is outside the addressible range of +/-1M in the - TINY code model. So we rely on images not being greater than - 1M and cap the offset at 1M and anything beyond 1M will have to -- be loaded using an alternative mechanism. */ -- if (SYMBOL_REF_WEAK (x) -+ be loaded using an alternative mechanism. Furthermore if the -+ symbol is a weak reference to something that isn't known to -+ resolve to a symbol in this module, then force to memory. */ -+ if ((SYMBOL_REF_WEAK (x) -+ && !aarch64_symbol_binds_local_p (x)) - || INTVAL (offset) < -1048575 || INTVAL (offset) > 1048575) - return SYMBOL_FORCE_TO_MEM; - return SYMBOL_TINY_ABSOLUTE; -@@ -9295,7 +9515,8 @@ aarch64_classify_symbol (rtx x, rtx offset) - case AARCH64_CMODEL_SMALL: - /* Same reasoning as the tiny code model, but the offset cap here is - 4G. */ -- if (SYMBOL_REF_WEAK (x) -+ if ((SYMBOL_REF_WEAK (x) -+ && !aarch64_symbol_binds_local_p (x)) - || !IN_RANGE (INTVAL (offset), HOST_WIDE_INT_C (-4294967263), - HOST_WIDE_INT_C (4294967264))) - return SYMBOL_FORCE_TO_MEM; -@@ -9317,8 +9538,7 @@ aarch64_classify_symbol (rtx x, rtx offset) - /* This is alright even in PIC code as the constant - pool reference is always PC relative and within - the same translation unit. */ -- if (nopcrelative_literal_loads -- && CONSTANT_POOL_ADDRESS_P (x)) -+ if (CONSTANT_POOL_ADDRESS_P (x)) - return SYMBOL_SMALL_ABSOLUTE; - else - return SYMBOL_FORCE_TO_MEM; -@@ -9454,6 +9674,13 @@ aarch64_build_builtin_va_list (void) - FIELD_DECL, get_identifier ("__vr_offs"), - integer_type_node); - -+ /* Tell tree-stdarg pass about our internal offset fields. -+ NOTE: va_list_gpr/fpr_counter_field are only used for tree comparision -+ purpose to identify whether the code is updating va_list internal -+ offset fields through irregular way. */ -+ va_list_gpr_counter_field = f_groff; -+ va_list_fpr_counter_field = f_vroff; -+ - DECL_ARTIFICIAL (f_stack) = 1; - DECL_ARTIFICIAL (f_grtop) = 1; - DECL_ARTIFICIAL (f_vrtop) = 1; -@@ -9486,15 +9713,17 @@ aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED) - tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff; - tree stack, grtop, vrtop, groff, vroff; - tree t; -- int gr_save_area_size; -- int vr_save_area_size; -+ int gr_save_area_size = cfun->va_list_gpr_size; -+ int vr_save_area_size = cfun->va_list_fpr_size; - int vr_offset; - - cum = &crtl->args.info; -- gr_save_area_size -- = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD; -- vr_save_area_size -- = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG; -+ if (cfun->va_list_gpr_size) -+ gr_save_area_size = MIN ((NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD, -+ cfun->va_list_gpr_size); -+ if (cfun->va_list_fpr_size) -+ vr_save_area_size = MIN ((NUM_FP_ARG_REGS - cum->aapcs_nvrn) -+ * UNITS_PER_VREG, cfun->va_list_fpr_size); - - if (!TARGET_FLOAT) - { -@@ -9823,7 +10052,8 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode, - { - CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); - CUMULATIVE_ARGS local_cum; -- int gr_saved, vr_saved; -+ int gr_saved = cfun->va_list_gpr_size; -+ int vr_saved = cfun->va_list_fpr_size; - - /* The caller has advanced CUM up to, but not beyond, the last named - argument. Advance a local copy of CUM past the last "real" named -@@ -9831,9 +10061,14 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode, - local_cum = *cum; - aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true); - -- /* Found out how many registers we need to save. */ -- gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn; -- vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn; -+ /* Found out how many registers we need to save. -+ Honor tree-stdvar analysis results. */ -+ if (cfun->va_list_gpr_size) -+ gr_saved = MIN (NUM_ARG_REGS - local_cum.aapcs_ncrn, -+ cfun->va_list_gpr_size / UNITS_PER_WORD); -+ if (cfun->va_list_fpr_size) -+ vr_saved = MIN (NUM_FP_ARG_REGS - local_cum.aapcs_nvrn, -+ cfun->va_list_fpr_size / UNITS_PER_VREG); - - if (!TARGET_FLOAT) - { -@@ -9861,7 +10096,7 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode, - /* We can't use move_block_from_reg, because it will use - the wrong mode, storing D regs only. */ - machine_mode mode = TImode; -- int off, i; -+ int off, i, vr_start; - - /* Set OFF to the offset from virtual_incoming_args_rtx of - the first vector register. The VR save area lies below -@@ -9870,14 +10105,15 @@ aarch64_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode, - STACK_BOUNDARY / BITS_PER_UNIT); - off -= vr_saved * UNITS_PER_VREG; - -- for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i) -+ vr_start = V0_REGNUM + local_cum.aapcs_nvrn; -+ for (i = 0; i < vr_saved; ++i) - { - rtx ptr, mem; - - ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off); - mem = gen_frame_mem (mode, ptr); - set_mem_alias_set (mem, get_varargs_alias_set ()); -- aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i)); -+ aarch64_emit_move (mem, gen_rtx_REG (mode, vr_start + i)); - off += UNITS_PER_VREG; - } - } -@@ -10839,33 +11075,6 @@ aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode, - gen_rtx_REG (mode, rsrc + count - i - 1)); - } - --/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is -- one of VSTRUCT modes: OI, CI or XI. */ --int --aarch64_simd_attr_length_move (rtx_insn *insn) --{ -- machine_mode mode; -- -- extract_insn_cached (insn); -- -- if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1])) -- { -- mode = GET_MODE (recog_data.operand[0]); -- switch (mode) -- { -- case OImode: -- return 8; -- case CImode: -- return 12; -- case XImode: -- return 16; -- default: -- gcc_unreachable (); -- } -- } -- return 4; --} -- - /* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is - one of VSTRUCT modes: OI, CI, or XI. */ - int -@@ -10899,6 +11108,37 @@ aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed) - return true; - } - -+/* Return true if the vector misalignment factor is supported by the -+ target. */ -+static bool -+aarch64_builtin_support_vector_misalignment (machine_mode mode, -+ const_tree type, int misalignment, -+ bool is_packed) -+{ -+ if (TARGET_SIMD && STRICT_ALIGNMENT) -+ { -+ /* Return if movmisalign pattern is not supported for this mode. */ -+ if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing) -+ return false; -+ -+ if (misalignment == -1) -+ { -+ /* Misalignment factor is unknown at compile time but we know -+ it's word aligned. */ -+ if (aarch64_simd_vector_alignment_reachable (type, is_packed)) -+ { -+ int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type)); -+ -+ if (element_size != 64) -+ return true; -+ } -+ return false; -+ } -+ } -+ return default_builtin_support_vector_misalignment (mode, type, misalignment, -+ is_packed); -+} -+ - /* If VALS is a vector constant that can be loaded into a register - using DUP, generate instructions to do so and return an RTX to - assign to the register. Otherwise return NULL_RTX. */ -@@ -11947,12 +12187,11 @@ aarch64_output_simd_mov_immediate (rtx const_vector, - info.value = GEN_INT (0); - else - { --#define buf_size 20 -+ const unsigned int buf_size = 20; - char float_buf[buf_size] = {'\0'}; - real_to_decimal_for_mode (float_buf, - CONST_DOUBLE_REAL_VALUE (info.value), - buf_size, buf_size, 1, mode); --#undef buf_size - - if (lane_count == 1) - snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf); -@@ -12186,6 +12425,8 @@ aarch64_evpc_trn (struct expand_vec_perm_d *d) - case V4SImode: gen = gen_aarch64_trn2v4si; break; - case V2SImode: gen = gen_aarch64_trn2v2si; break; - case V2DImode: gen = gen_aarch64_trn2v2di; break; -+ case V4HFmode: gen = gen_aarch64_trn2v4hf; break; -+ case V8HFmode: gen = gen_aarch64_trn2v8hf; break; - case V4SFmode: gen = gen_aarch64_trn2v4sf; break; - case V2SFmode: gen = gen_aarch64_trn2v2sf; break; - case V2DFmode: gen = gen_aarch64_trn2v2df; break; -@@ -12204,6 +12445,8 @@ aarch64_evpc_trn (struct expand_vec_perm_d *d) - case V4SImode: gen = gen_aarch64_trn1v4si; break; - case V2SImode: gen = gen_aarch64_trn1v2si; break; - case V2DImode: gen = gen_aarch64_trn1v2di; break; -+ case V4HFmode: gen = gen_aarch64_trn1v4hf; break; -+ case V8HFmode: gen = gen_aarch64_trn1v8hf; break; - case V4SFmode: gen = gen_aarch64_trn1v4sf; break; - case V2SFmode: gen = gen_aarch64_trn1v2sf; break; - case V2DFmode: gen = gen_aarch64_trn1v2df; break; -@@ -12269,6 +12512,8 @@ aarch64_evpc_uzp (struct expand_vec_perm_d *d) - case V4SImode: gen = gen_aarch64_uzp2v4si; break; - case V2SImode: gen = gen_aarch64_uzp2v2si; break; - case V2DImode: gen = gen_aarch64_uzp2v2di; break; -+ case V4HFmode: gen = gen_aarch64_uzp2v4hf; break; -+ case V8HFmode: gen = gen_aarch64_uzp2v8hf; break; - case V4SFmode: gen = gen_aarch64_uzp2v4sf; break; - case V2SFmode: gen = gen_aarch64_uzp2v2sf; break; - case V2DFmode: gen = gen_aarch64_uzp2v2df; break; -@@ -12287,6 +12532,8 @@ aarch64_evpc_uzp (struct expand_vec_perm_d *d) - case V4SImode: gen = gen_aarch64_uzp1v4si; break; - case V2SImode: gen = gen_aarch64_uzp1v2si; break; - case V2DImode: gen = gen_aarch64_uzp1v2di; break; -+ case V4HFmode: gen = gen_aarch64_uzp1v4hf; break; -+ case V8HFmode: gen = gen_aarch64_uzp1v8hf; break; - case V4SFmode: gen = gen_aarch64_uzp1v4sf; break; - case V2SFmode: gen = gen_aarch64_uzp1v2sf; break; - case V2DFmode: gen = gen_aarch64_uzp1v2df; break; -@@ -12357,6 +12604,8 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d) - case V4SImode: gen = gen_aarch64_zip2v4si; break; - case V2SImode: gen = gen_aarch64_zip2v2si; break; - case V2DImode: gen = gen_aarch64_zip2v2di; break; -+ case V4HFmode: gen = gen_aarch64_zip2v4hf; break; -+ case V8HFmode: gen = gen_aarch64_zip2v8hf; break; - case V4SFmode: gen = gen_aarch64_zip2v4sf; break; - case V2SFmode: gen = gen_aarch64_zip2v2sf; break; - case V2DFmode: gen = gen_aarch64_zip2v2df; break; -@@ -12375,6 +12624,8 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d) - case V4SImode: gen = gen_aarch64_zip1v4si; break; - case V2SImode: gen = gen_aarch64_zip1v2si; break; - case V2DImode: gen = gen_aarch64_zip1v2di; break; -+ case V4HFmode: gen = gen_aarch64_zip1v4hf; break; -+ case V8HFmode: gen = gen_aarch64_zip1v8hf; break; - case V4SFmode: gen = gen_aarch64_zip1v4sf; break; - case V2SFmode: gen = gen_aarch64_zip1v2sf; break; - case V2DFmode: gen = gen_aarch64_zip1v2df; break; -@@ -12419,6 +12670,8 @@ aarch64_evpc_ext (struct expand_vec_perm_d *d) - case V8HImode: gen = gen_aarch64_extv8hi; break; - case V2SImode: gen = gen_aarch64_extv2si; break; - case V4SImode: gen = gen_aarch64_extv4si; break; -+ case V4HFmode: gen = gen_aarch64_extv4hf; break; -+ case V8HFmode: gen = gen_aarch64_extv8hf; break; - case V2SFmode: gen = gen_aarch64_extv2sf; break; - case V4SFmode: gen = gen_aarch64_extv4sf; break; - case V2DImode: gen = gen_aarch64_extv2di; break; -@@ -12494,6 +12747,8 @@ aarch64_evpc_rev (struct expand_vec_perm_d *d) - case V2SImode: gen = gen_aarch64_rev64v2si; break; - case V4SFmode: gen = gen_aarch64_rev64v4sf; break; - case V2SFmode: gen = gen_aarch64_rev64v2sf; break; -+ case V8HFmode: gen = gen_aarch64_rev64v8hf; break; -+ case V4HFmode: gen = gen_aarch64_rev64v4hf; break; - default: - return false; - } -@@ -12737,24 +12992,6 @@ aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, - return ret; - } - --/* Implement target hook CANNOT_CHANGE_MODE_CLASS. */ --bool --aarch64_cannot_change_mode_class (machine_mode from, -- machine_mode to, -- enum reg_class rclass) --{ -- /* We cannot allow word_mode subregs of full vector modes. -- Otherwise the middle-end will assume it's ok to store to -- (subreg:DI (reg:TI 100) 0) in order to modify only the low 64 bits -- of the 128-bit register. However, after reload the subreg will -- be dropped leaving a plain DImode store. See PR67609 for a more -- detailed dicussion. In all other cases, we want to be permissive -- and return false. */ -- return (reg_classes_intersect_p (FP_REGS, rclass) -- && GET_MODE_SIZE (to) == UNITS_PER_WORD -- && GET_MODE_SIZE (from) > UNITS_PER_WORD); --} -- - rtx - aarch64_reverse_mask (enum machine_mode mode) - { -@@ -12776,7 +13013,14 @@ aarch64_reverse_mask (enum machine_mode mode) - return force_reg (V16QImode, mask); - } - --/* Implement MODES_TIEABLE_P. */ -+/* Implement MODES_TIEABLE_P. In principle we should always return true. -+ However due to issues with register allocation it is preferable to avoid -+ tieing integer scalar and FP scalar modes. Executing integer operations -+ in general registers is better than treating them as scalar vector -+ operations. This reduces latency and avoids redundant int<->FP moves. -+ So tie modes if they are either the same class, or vector modes with -+ other vector modes, vector structs or any scalar mode. -+*/ - - bool - aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2) -@@ -12787,9 +13031,12 @@ aarch64_modes_tieable_p (machine_mode mode1, machine_mode mode2) - /* We specifically want to allow elements of "structure" modes to - be tieable to the structure. This more general condition allows - other rarer situations too. */ -- if (TARGET_SIMD -- && aarch64_vector_mode_p (mode1) -- && aarch64_vector_mode_p (mode2)) -+ if (aarch64_vector_mode_p (mode1) && aarch64_vector_mode_p (mode2)) -+ return true; -+ -+ /* Also allow any scalar modes with vectors. */ -+ if (aarch64_vector_mode_supported_p (mode1) -+ || aarch64_vector_mode_supported_p (mode2)) - return true; - - return false; -@@ -12953,6 +13200,63 @@ aarch64_expand_movmem (rtx *operands) - return true; - } - -+/* Split a DImode store of a CONST_INT SRC to MEM DST as two -+ SImode stores. Handle the case when the constant has identical -+ bottom and top halves. This is beneficial when the two stores can be -+ merged into an STP and we avoid synthesising potentially expensive -+ immediates twice. Return true if such a split is possible. */ -+ -+bool -+aarch64_split_dimode_const_store (rtx dst, rtx src) -+{ -+ rtx lo = gen_lowpart (SImode, src); -+ rtx hi = gen_highpart_mode (SImode, DImode, src); -+ -+ bool size_p = optimize_function_for_size_p (cfun); -+ -+ if (!rtx_equal_p (lo, hi)) -+ return false; -+ -+ unsigned int orig_cost -+ = aarch64_internal_mov_immediate (NULL_RTX, src, false, DImode); -+ unsigned int lo_cost -+ = aarch64_internal_mov_immediate (NULL_RTX, lo, false, SImode); -+ -+ /* We want to transform: -+ MOV x1, 49370 -+ MOVK x1, 0x140, lsl 16 -+ MOVK x1, 0xc0da, lsl 32 -+ MOVK x1, 0x140, lsl 48 -+ STR x1, [x0] -+ into: -+ MOV w1, 49370 -+ MOVK w1, 0x140, lsl 16 -+ STP w1, w1, [x0] -+ So we want to perform this only when we save two instructions -+ or more. When optimizing for size, however, accept any code size -+ savings we can. */ -+ if (size_p && orig_cost <= lo_cost) -+ return false; -+ -+ if (!size_p -+ && (orig_cost <= lo_cost + 1)) -+ return false; -+ -+ rtx mem_lo = adjust_address (dst, SImode, 0); -+ if (!aarch64_mem_pair_operand (mem_lo, SImode)) -+ return false; -+ -+ rtx tmp_reg = gen_reg_rtx (SImode); -+ aarch64_expand_mov_immediate (tmp_reg, lo); -+ rtx mem_hi = aarch64_move_pointer (mem_lo, GET_MODE_SIZE (SImode)); -+ /* Don't emit an explicit store pair as this may not be always profitable. -+ Let the sched-fusion logic decide whether to merge them. */ -+ emit_move_insn (mem_lo, tmp_reg); -+ emit_move_insn (mem_hi, tmp_reg); -+ -+ return true; -+} -+ - /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ - - static unsigned HOST_WIDE_INT -@@ -13305,6 +13609,14 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) - return false; - } - -+/* Return true iff the instruction fusion described by OP is enabled. */ -+ -+bool -+aarch64_fusion_enabled_p (enum aarch64_fusion_pairs op) -+{ -+ return (aarch64_tune_params.fusible_ops & op) != 0; -+} -+ - /* If MEM is in the form of [base+offset], extract the two parts - of address and set to BASE and OFFSET, otherwise return false - after clearing BASE and OFFSET. */ -@@ -13449,6 +13761,26 @@ aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri, - return; - } - -+/* Implement the TARGET_SCHED_ADJUST_PRIORITY hook. -+ Adjust priority of sha1h instructions so they are scheduled before -+ other SHA1 instructions. */ -+ -+static int -+aarch64_sched_adjust_priority (rtx_insn *insn, int priority) -+{ -+ rtx x = PATTERN (insn); -+ -+ if (GET_CODE (x) == SET) -+ { -+ x = SET_SRC (x); -+ -+ if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SHA1H) -+ return priority + 10; -+ } -+ -+ return priority; -+} -+ - /* Given OPERANDS of consecutive load/store, check if we can merge - them into ldp/stp. LOAD is true if they are load instructions. - MODE is the mode of memory operands. */ -@@ -13483,6 +13815,15 @@ aarch64_operands_ok_for_ldpstp (rtx *operands, bool load, - if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)) - return false; - -+ /* If we have SImode and slow unaligned ldp, -+ check the alignment to be at least 8 byte. */ -+ if (mode == SImode -+ && (aarch64_tune_params.extra_tuning_flags -+ & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW) -+ && !optimize_size -+ && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT) -+ return false; -+ - /* Check if the addresses are in the form of [base+offset]. */ - extract_base_offset_in_addr (mem_1, &base_1, &offset_1); - if (base_1 == NULL_RTX || offset_1 == NULL_RTX) -@@ -13642,6 +13983,15 @@ aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load, - return false; - } - -+ /* If we have SImode and slow unaligned ldp, -+ check the alignment to be at least 8 byte. */ -+ if (mode == SImode -+ && (aarch64_tune_params.extra_tuning_flags -+ & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW) -+ && !optimize_size -+ && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT) -+ return false; -+ - if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1))) - rclass_1 = FP_REGS; - else -@@ -13877,13 +14227,13 @@ aarch64_promoted_type (const_tree t) - /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */ - - static bool --aarch64_optab_supported_p (int op, machine_mode, machine_mode, -+aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode, - optimization_type opt_type) - { - switch (op) - { - case rsqrt_optab: -- return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (); -+ return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1); - - default: - return true; -@@ -14017,6 +14367,10 @@ aarch64_optab_supported_p (int op, machine_mode, machine_mode, - #undef TARGET_LEGITIMATE_CONSTANT_P - #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p - -+#undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT -+#define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \ -+ aarch64_legitimize_address_displacement -+ - #undef TARGET_LIBGCC_CMP_RETURN_MODE - #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode - -@@ -14119,6 +14473,10 @@ aarch64_optab_supported_p (int op, machine_mode, machine_mode, - #undef TARGET_VECTOR_MODE_SUPPORTED_P - #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p - -+#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT -+#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \ -+ aarch64_builtin_support_vector_misalignment -+ - #undef TARGET_ARRAY_MODE_SUPPORTED_P - #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p - -@@ -14196,6 +14554,9 @@ aarch64_optab_supported_p (int op, machine_mode, machine_mode, - #undef TARGET_CAN_USE_DOLOOP_P - #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost - -+#undef TARGET_SCHED_ADJUST_PRIORITY -+#define TARGET_SCHED_ADJUST_PRIORITY aarch64_sched_adjust_priority -+ - #undef TARGET_SCHED_MACRO_FUSION_P - #define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p - -@@ -14220,6 +14581,9 @@ aarch64_optab_supported_p (int op, machine_mode, machine_mode, - #undef TARGET_OPTAB_SUPPORTED_P - #define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p - -+#undef TARGET_OMIT_STRUCT_RETURN_REG -+#define TARGET_OMIT_STRUCT_RETURN_REG true -+ - struct gcc_target targetm = TARGET_INITIALIZER; - - #include "gt-aarch64.h" ---- a/src/gcc/config/aarch64/aarch64.h -+++ b/src/gcc/config/aarch64/aarch64.h -@@ -132,9 +132,14 @@ extern unsigned aarch64_architecture_version; - #define AARCH64_FL_FP (1 << 1) /* Has FP. */ - #define AARCH64_FL_CRYPTO (1 << 2) /* Has crypto. */ - #define AARCH64_FL_CRC (1 << 3) /* Has CRC. */ --/* ARMv8.1 architecture extensions. */ -+/* ARMv8.1-A architecture extensions. */ - #define AARCH64_FL_LSE (1 << 4) /* Has Large System Extensions. */ --#define AARCH64_FL_V8_1 (1 << 5) /* Has ARMv8.1 extensions. */ -+#define AARCH64_FL_V8_1 (1 << 5) /* Has ARMv8.1-A extensions. */ -+/* ARMv8.2-A architecture extensions. */ -+#define AARCH64_FL_V8_2 (1 << 8) /* Has ARMv8.2-A features. */ -+#define AARCH64_FL_F16 (1 << 9) /* Has ARMv8.2-A FP16 extensions. */ -+/* ARMv8.3-A architecture extensions. */ -+#define AARCH64_FL_V8_3 (1 << 10) /* Has ARMv8.3-A features. */ - - /* Has FP and SIMD. */ - #define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD) -@@ -146,6 +151,10 @@ extern unsigned aarch64_architecture_version; - #define AARCH64_FL_FOR_ARCH8 (AARCH64_FL_FPSIMD) - #define AARCH64_FL_FOR_ARCH8_1 \ - (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC | AARCH64_FL_V8_1) -+#define AARCH64_FL_FOR_ARCH8_2 \ -+ (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2) -+#define AARCH64_FL_FOR_ARCH8_3 \ -+ (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3) - - /* Macros to test ISA flags. */ - -@@ -155,6 +164,9 @@ extern unsigned aarch64_architecture_version; - #define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD) - #define AARCH64_ISA_LSE (aarch64_isa_flags & AARCH64_FL_LSE) - #define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_V8_1) -+#define AARCH64_ISA_V8_2 (aarch64_isa_flags & AARCH64_FL_V8_2) -+#define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16) -+#define AARCH64_ISA_V8_3 (aarch64_isa_flags & AARCH64_FL_V8_3) - - /* Crypto is an optional extension to AdvSIMD. */ - #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO) -@@ -165,6 +177,13 @@ extern unsigned aarch64_architecture_version; - /* Atomic instructions that can be enabled through the +lse extension. */ - #define TARGET_LSE (AARCH64_ISA_LSE) - -+/* ARMv8.2-A FP16 support that can be enabled through the +fp16 extension. */ -+#define TARGET_FP_F16INST (TARGET_FLOAT && AARCH64_ISA_F16) -+#define TARGET_SIMD_F16INST (TARGET_SIMD && AARCH64_ISA_F16) -+ -+/* ARMv8.3-A features. */ -+#define TARGET_ARMV8_3 (AARCH64_ISA_V8_3) -+ - /* Make sure this is always defined so we don't have to check for ifdefs - but rather use normal ifs. */ - #ifndef TARGET_FIX_ERR_A53_835769_DEFAULT -@@ -193,7 +212,7 @@ extern unsigned aarch64_architecture_version; - ((aarch64_fix_a53_err843419 == 2) \ - ? TARGET_FIX_ERR_A53_843419_DEFAULT : aarch64_fix_a53_err843419) - --/* ARMv8.1 Adv.SIMD support. */ -+/* ARMv8.1-A Adv.SIMD support. */ - #define TARGET_SIMD_RDMA (TARGET_SIMD && AARCH64_ISA_RDMA) - - /* Standard register usage. */ -@@ -539,11 +558,14 @@ struct GTY (()) aarch64_frame - STACK_BOUNDARY. */ - HOST_WIDE_INT saved_varargs_size; - -+ /* The size of the saved callee-save int/FP registers. */ -+ - HOST_WIDE_INT saved_regs_size; -- /* Padding if needed after the all the callee save registers have -- been saved. */ -- HOST_WIDE_INT padding0; -- HOST_WIDE_INT hardfp_offset; /* HARD_FRAME_POINTER_REGNUM */ -+ -+ /* Offset from the base of the frame (incomming SP) to the -+ top of the locals area. This value is always a multiple of -+ STACK_BOUNDARY. */ -+ HOST_WIDE_INT locals_offset; - - /* Offset from the base of the frame (incomming SP) to the - hard_frame_pointer. This value is always a multiple of -@@ -553,12 +575,25 @@ struct GTY (()) aarch64_frame - /* The size of the frame. This value is the offset from base of the - * frame (incomming SP) to the stack_pointer. This value is always - * a multiple of STACK_BOUNDARY. */ -+ HOST_WIDE_INT frame_size; -+ -+ /* The size of the initial stack adjustment before saving callee-saves. */ -+ HOST_WIDE_INT initial_adjust; -+ -+ /* The writeback value when pushing callee-save registers. -+ It is zero when no push is used. */ -+ HOST_WIDE_INT callee_adjust; -+ -+ /* The offset from SP to the callee-save registers after initial_adjust. -+ It may be non-zero if no push is used (ie. callee_adjust == 0). */ -+ HOST_WIDE_INT callee_offset; -+ -+ /* The size of the stack adjustment after saving callee-saves. */ -+ HOST_WIDE_INT final_adjust; - - unsigned wb_candidate1; - unsigned wb_candidate2; - -- HOST_WIDE_INT frame_size; -- - bool laid_out; - }; - -@@ -652,21 +687,6 @@ typedef struct - - #define CONSTANT_ADDRESS_P(X) aarch64_constant_address_p(X) - --/* Try a machine-dependent way of reloading an illegitimate address -- operand. If we find one, push the reload and jump to WIN. This -- macro is used in only one place: `find_reloads_address' in reload.c. */ -- --#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_L, WIN) \ --do { \ -- rtx new_x = aarch64_legitimize_reload_address (&(X), MODE, OPNUM, TYPE, \ -- IND_L); \ -- if (new_x) \ -- { \ -- X = new_x; \ -- goto WIN; \ -- } \ --} while (0) -- - #define REGNO_OK_FOR_BASE_P(REGNO) \ - aarch64_regno_ok_for_base_p (REGNO, true) - -@@ -722,7 +742,12 @@ do { \ - #define USE_STORE_PRE_INCREMENT(MODE) 0 - #define USE_STORE_PRE_DECREMENT(MODE) 0 - --/* ?? #define WORD_REGISTER_OPERATIONS */ -+/* WORD_REGISTER_OPERATIONS does not hold for AArch64. -+ The assigned word_mode is DImode but operations narrower than SImode -+ behave as 32-bit operations if using the W-form of the registers rather -+ than as word_mode (64-bit) operations as WORD_REGISTER_OPERATIONS -+ expects. */ -+#define WORD_REGISTER_OPERATIONS 0 - - /* Define if loading from memory in MODE, an integral mode narrower than - BITS_PER_WORD will either zero-extend or sign-extend. The value of this -@@ -842,10 +867,7 @@ do { \ - extern void __aarch64_sync_cache_range (void *, void *); \ - __aarch64_sync_cache_range (beg, end) - --#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ -- aarch64_cannot_change_mode_class (FROM, TO, CLASS) -- --#define SHIFT_COUNT_TRUNCATED !TARGET_SIMD -+#define SHIFT_COUNT_TRUNCATED (!TARGET_SIMD) - - /* Choose appropriate mode for caller saves, so we do the minimum - required size of load/store. */ ---- a/src/gcc/config/aarch64/aarch64.md -+++ b/src/gcc/config/aarch64/aarch64.md -@@ -75,6 +75,8 @@ - UNSPEC_CRC32H - UNSPEC_CRC32W - UNSPEC_CRC32X -+ UNSPEC_FCVTZS -+ UNSPEC_FCVTZU - UNSPEC_URECPE - UNSPEC_FRECPE - UNSPEC_FRECPS -@@ -105,6 +107,7 @@ - UNSPEC_NOP - UNSPEC_PRLG_STK - UNSPEC_RBIT -+ UNSPEC_SCVTF - UNSPEC_SISD_NEG - UNSPEC_SISD_SSHL - UNSPEC_SISD_USHL -@@ -122,6 +125,7 @@ - UNSPEC_TLSLE24 - UNSPEC_TLSLE32 - UNSPEC_TLSLE48 -+ UNSPEC_UCVTF - UNSPEC_USHL_2S - UNSPEC_VSTRUCTDUMMY - UNSPEC_SP_SET -@@ -837,13 +841,6 @@ - || aarch64_is_noplt_call_p (callee))) - XEXP (operands[0], 0) = force_reg (Pmode, callee); - -- /* FIXME: This is a band-aid. Need to analyze why expand_expr_addr_expr -- is generating an SImode symbol reference. See PR 64971. */ -- if (TARGET_ILP32 -- && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF -- && GET_MODE (XEXP (operands[0], 0)) == SImode) -- XEXP (operands[0], 0) = convert_memory_address (Pmode, -- XEXP (operands[0], 0)); - if (operands[2] == NULL_RTX) - operands[2] = const0_rtx; - -@@ -875,14 +872,6 @@ - || aarch64_is_noplt_call_p (callee))) - XEXP (operands[1], 0) = force_reg (Pmode, callee); - -- /* FIXME: This is a band-aid. Need to analyze why expand_expr_addr_expr -- is generating an SImode symbol reference. See PR 64971. */ -- if (TARGET_ILP32 -- && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF -- && GET_MODE (XEXP (operands[1], 0)) == SImode) -- XEXP (operands[1], 0) = convert_memory_address (Pmode, -- XEXP (operands[1], 0)); -- - if (operands[3] == NULL_RTX) - operands[3] = const0_rtx; - -@@ -1003,6 +992,11 @@ - (match_operand:GPI 1 "general_operand" ""))] - "" - " -+ if (MEM_P (operands[0]) && CONST_INT_P (operands[1]) -+ && <MODE>mode == DImode -+ && aarch64_split_dimode_const_store (operands[0], operands[1])) -+ DONE; -+ - if (GET_CODE (operands[0]) == MEM && operands[1] != const0_rtx) - operands[1] = force_reg (<MODE>mode, operands[1]); - -@@ -1160,11 +1154,12 @@ - ) - - (define_insn "*movhf_aarch64" -- [(set (match_operand:HF 0 "nonimmediate_operand" "=w, ?r,w,w,m,r,m ,r") -- (match_operand:HF 1 "general_operand" "?rY, w,w,m,w,m,rY,r"))] -+ [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w,m,r,m ,r") -+ (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,m,w,m,rY,r"))] - "TARGET_FLOAT && (register_operand (operands[0], HFmode) - || aarch64_reg_or_fp_zero (operands[1], HFmode))" - "@ -+ movi\\t%0.4h, #0 - mov\\t%0.h[0], %w1 - umov\\t%w0, %1.h[0] - mov\\t%0.h[0], %1.h[0] -@@ -1173,18 +1168,18 @@ - ldrh\\t%w0, %1 - strh\\t%w1, %0 - mov\\t%w0, %w1" -- [(set_attr "type" "neon_from_gp,neon_to_gp,neon_move,\ -+ [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\ - f_loads,f_stores,load1,store1,mov_reg") -- (set_attr "simd" "yes,yes,yes,*,*,*,*,*") -- (set_attr "fp" "*,*,*,yes,yes,*,*,*")] -+ (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")] - ) - - (define_insn "*movsf_aarch64" -- [(set (match_operand:SF 0 "nonimmediate_operand" "=w, ?r,w,w ,w,m,r,m ,r") -- (match_operand:SF 1 "general_operand" "?rY, w,w,Ufc,m,w,m,rY,r"))] -+ [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r") -+ (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))] - "TARGET_FLOAT && (register_operand (operands[0], SFmode) - || aarch64_reg_or_fp_zero (operands[1], SFmode))" - "@ -+ movi\\t%0.2s, #0 - fmov\\t%s0, %w1 - fmov\\t%w0, %s1 - fmov\\t%s0, %s1 -@@ -1194,16 +1189,18 @@ - ldr\\t%w0, %1 - str\\t%w1, %0 - mov\\t%w0, %w1" -- [(set_attr "type" "f_mcr,f_mrc,fmov,fconsts,\ -- f_loads,f_stores,load1,store1,mov_reg")] -+ [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,\ -+ f_loads,f_stores,load1,store1,mov_reg") -+ (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")] - ) - - (define_insn "*movdf_aarch64" -- [(set (match_operand:DF 0 "nonimmediate_operand" "=w, ?r,w,w ,w,m,r,m ,r") -- (match_operand:DF 1 "general_operand" "?rY, w,w,Ufc,m,w,m,rY,r"))] -+ [(set (match_operand:DF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w,m,r,m ,r") -+ (match_operand:DF 1 "general_operand" "Y ,?rY, w,w,Ufc,m,w,m,rY,r"))] - "TARGET_FLOAT && (register_operand (operands[0], DFmode) - || aarch64_reg_or_fp_zero (operands[1], DFmode))" - "@ -+ movi\\t%d0, #0 - fmov\\t%d0, %x1 - fmov\\t%x0, %d1 - fmov\\t%d0, %d1 -@@ -1213,8 +1210,9 @@ - ldr\\t%x0, %1 - str\\t%x1, %0 - mov\\t%x0, %x1" -- [(set_attr "type" "f_mcr,f_mrc,fmov,fconstd,\ -- f_loadd,f_stored,load1,store1,mov_reg")] -+ [(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\ -+ f_loadd,f_stored,load1,store1,mov_reg") -+ (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")] - ) - - (define_insn "*movtf_aarch64" -@@ -1239,7 +1237,6 @@ - [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\ - f_loadd,f_stored,load2,store2,store2") - (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4") -- (set_attr "fp" "*,*,yes,yes,*,yes,yes,yes,*,*,*") - (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")] - ) - -@@ -1552,10 +1549,10 @@ - (zero_extend:GPI (match_operand:SHORT 1 "nonimmediate_operand" "r,m,m")))] - "" - "@ -- uxt<SHORT:size>\t%<GPI:w>0, %w1 -+ and\t%<GPI:w>0, %<GPI:w>1, <SHORT:short_mask> - ldr<SHORT:size>\t%w0, %1 - ldr\t%<SHORT:size>0, %1" -- [(set_attr "type" "extend,load1,load1")] -+ [(set_attr "type" "logic_imm,load1,load1")] - ) - - (define_expand "<optab>qihi2" -@@ -1564,16 +1561,26 @@ - "" - ) - --(define_insn "*<optab>qihi2_aarch64" -+(define_insn "*extendqihi2_aarch64" - [(set (match_operand:HI 0 "register_operand" "=r,r") -- (ANY_EXTEND:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))] -+ (sign_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))] - "" - "@ -- <su>xtb\t%w0, %w1 -- <ldrxt>b\t%w0, %1" -+ sxtb\t%w0, %w1 -+ ldrsb\t%w0, %1" - [(set_attr "type" "extend,load1")] - ) - -+(define_insn "*zero_extendqihi2_aarch64" -+ [(set (match_operand:HI 0 "register_operand" "=r,r") -+ (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "r,m")))] -+ "" -+ "@ -+ and\t%w0, %w1, 255 -+ ldrb\t%w0, %1" -+ [(set_attr "type" "logic_imm,load1")] -+) -+ - ;; ------------------------------------------------------------------- - ;; Simple arithmetic - ;; ------------------------------------------------------------------- -@@ -1585,25 +1592,16 @@ - (match_operand:GPI 2 "aarch64_pluslong_operand" "")))] - "" - { -- if (aarch64_pluslong_strict_immedate (operands[2], <MODE>mode)) -- { -- /* Give CSE the opportunity to share this constant across additions. */ -- if (!cse_not_expected && can_create_pseudo_p ()) -- operands[2] = force_reg (<MODE>mode, operands[2]); -- -- /* Split will refuse to operate on a modification to the stack pointer. -- Aid the prologue and epilogue expanders by splitting this now. */ -- else if (reload_completed && operands[0] == stack_pointer_rtx) -- { -- HOST_WIDE_INT i = INTVAL (operands[2]); -- HOST_WIDE_INT s = (i >= 0 ? i & 0xfff : -(-i & 0xfff)); -- emit_insn (gen_rtx_SET (operands[0], -- gen_rtx_PLUS (<MODE>mode, operands[1], -- GEN_INT (i - s)))); -- operands[1] = operands[0]; -- operands[2] = GEN_INT (s); -- } -- } -+ /* If operands[1] is a subreg extract the inner RTX. */ -+ rtx op1 = REG_P (operands[1]) ? operands[1] : SUBREG_REG (operands[1]); -+ -+ /* If the constant is too large for a single instruction and isn't frame -+ based, split off the immediate so it is available for CSE. */ -+ if (!aarch64_plus_immediate (operands[2], <MODE>mode) -+ && can_create_pseudo_p () -+ && (!REG_P (op1) -+ || !REGNO_PTR_FRAME_P (REGNO (op1)))) -+ operands[2] = force_reg (<MODE>mode, operands[2]); - }) - - (define_insn "*add<mode>3_aarch64" -@@ -1765,7 +1763,7 @@ - "aarch64_zero_extend_const_eq (<DWI>mode, operands[2], - <MODE>mode, operands[1])" - "@ -- cmn\\t%<w>0, %<w>1 -+ cmn\\t%<w>0, %1 - cmp\\t%<w>0, #%n1" - [(set_attr "type" "alus_imm")] - ) -@@ -1797,11 +1795,11 @@ - "aarch64_zero_extend_const_eq (<DWI>mode, operands[3], - <MODE>mode, operands[2])" - "@ -- adds\\t%<w>0, %<w>1, %<w>2 -+ adds\\t%<w>0, %<w>1, %2 - subs\\t%<w>0, %<w>1, #%n2" - [(set_attr "type" "alus_imm")] - ) -- -+ - (define_insn "add<mode>3_compareC" - [(set (reg:CC_C CC_REGNUM) - (ne:CC_C -@@ -3404,7 +3402,9 @@ - (LOGICAL:SI (match_operand:SI 1 "register_operand" "%r,r") - (match_operand:SI 2 "aarch64_logical_operand" "r,K"))))] - "" -- "<logical>\\t%w0, %w1, %w2" -+ "@ -+ <logical>\\t%w0, %w1, %w2 -+ <logical>\\t%w0, %w1, %2" - [(set_attr "type" "logic_reg,logic_imm")] - ) - -@@ -3417,7 +3417,9 @@ - (set (match_operand:GPI 0 "register_operand" "=r,r") - (and:GPI (match_dup 1) (match_dup 2)))] - "" -- "ands\\t%<w>0, %<w>1, %<w>2" -+ "@ -+ ands\\t%<w>0, %<w>1, %<w>2 -+ ands\\t%<w>0, %<w>1, %2" - [(set_attr "type" "logics_reg,logics_imm")] - ) - -@@ -3431,7 +3433,9 @@ - (set (match_operand:DI 0 "register_operand" "=r,r") - (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))] - "" -- "ands\\t%w0, %w1, %w2" -+ "@ -+ ands\\t%w0, %w1, %w2 -+ ands\\t%w0, %w1, %2" - [(set_attr "type" "logics_reg,logics_imm")] - ) - -@@ -3741,6 +3745,39 @@ - } - ) - -+;; Pop count be done via the "CNT" instruction in AdvSIMD. -+;; -+;; MOV v.1d, x0 -+;; CNT v1.8b, v.8b -+;; ADDV b2, v1.8b -+;; MOV w0, v2.b[0] -+ -+(define_expand "popcount<mode>2" -+ [(match_operand:GPI 0 "register_operand") -+ (match_operand:GPI 1 "register_operand")] -+ "TARGET_SIMD" -+{ -+ rtx v = gen_reg_rtx (V8QImode); -+ rtx v1 = gen_reg_rtx (V8QImode); -+ rtx r = gen_reg_rtx (QImode); -+ rtx in = operands[1]; -+ rtx out = operands[0]; -+ if(<MODE>mode == SImode) -+ { -+ rtx tmp; -+ tmp = gen_reg_rtx (DImode); -+ /* If we have SImode, zero extend to DImode, pop count does -+ not change if we have extra zeros. */ -+ emit_insn (gen_zero_extendsidi2 (tmp, in)); -+ in = tmp; -+ } -+ emit_move_insn (v, gen_lowpart (V8QImode, in)); -+ emit_insn (gen_popcountv8qi2 (v1, v)); -+ emit_insn (gen_reduc_plus_scal_v8qi (r, v1)); -+ emit_insn (gen_zero_extendqi<mode>2 (out, r)); -+ DONE; -+}) -+ - (define_insn "clrsb<mode>2" - [(set (match_operand:GPI 0 "register_operand" "=r") - (clrsb:GPI (match_operand:GPI 1 "register_operand" "r")))] -@@ -3757,16 +3794,23 @@ - [(set_attr "type" "rbit")] - ) - --(define_expand "ctz<mode>2" -- [(match_operand:GPI 0 "register_operand") -- (match_operand:GPI 1 "register_operand")] -+;; Split after reload into RBIT + CLZ. Since RBIT is represented as an UNSPEC -+;; it is unlikely to fold with any other operation, so keep this as a CTZ -+;; expression and split after reload to enable scheduling them apart if -+;; needed. -+ -+(define_insn_and_split "ctz<mode>2" -+ [(set (match_operand:GPI 0 "register_operand" "=r") -+ (ctz:GPI (match_operand:GPI 1 "register_operand" "r")))] - "" -- { -- emit_insn (gen_rbit<mode>2 (operands[0], operands[1])); -- emit_insn (gen_clz<mode>2 (operands[0], operands[0])); -- DONE; -- } --) -+ "#" -+ "reload_completed" -+ [(const_int 0)] -+ " -+ emit_insn (gen_rbit<mode>2 (operands[0], operands[1])); -+ emit_insn (gen_clz<mode>2 (operands[0], operands[0])); -+ DONE; -+") - - (define_insn "*and<mode>_compare0" - [(set (reg:CC_NZ CC_REGNUM) -@@ -3778,6 +3822,18 @@ - [(set_attr "type" "alus_imm")] - ) - -+(define_insn "*ands<mode>_compare0" -+ [(set (reg:CC_NZ CC_REGNUM) -+ (compare:CC_NZ -+ (zero_extend:GPI (match_operand:SHORT 1 "register_operand" "r")) -+ (const_int 0))) -+ (set (match_operand:GPI 0 "register_operand" "=r") -+ (zero_extend:GPI (match_dup 1)))] -+ "" -+ "ands\\t%<GPI:w>0, %<GPI:w>1, <short_mask>" -+ [(set_attr "type" "alus_imm")] -+) -+ - (define_insn "*and<mode>3nr_compare0" - [(set (reg:CC_NZ CC_REGNUM) - (compare:CC_NZ -@@ -3785,7 +3841,9 @@ - (match_operand:GPI 1 "aarch64_logical_operand" "r,<lconst>")) - (const_int 0)))] - "" -- "tst\\t%<w>0, %<w>1" -+ "@ -+ tst\\t%<w>0, %<w>1 -+ tst\\t%<w>0, %1" - [(set_attr "type" "logics_reg,logics_imm")] - ) - -@@ -3851,22 +3909,16 @@ - (define_expand "ashl<mode>3" - [(set (match_operand:SHORT 0 "register_operand") - (ashift:SHORT (match_operand:SHORT 1 "register_operand") -- (match_operand:QI 2 "nonmemory_operand")))] -+ (match_operand:QI 2 "const_int_operand")))] - "" - { -- if (CONST_INT_P (operands[2])) -- { -- operands[2] = GEN_INT (INTVAL (operands[2]) -- & (GET_MODE_BITSIZE (<MODE>mode) - 1)); -+ operands[2] = GEN_INT (INTVAL (operands[2]) & GET_MODE_MASK (<MODE>mode)); - -- if (operands[2] == const0_rtx) -- { -- emit_insn (gen_mov<mode> (operands[0], operands[1])); -- DONE; -- } -+ if (operands[2] == const0_rtx) -+ { -+ emit_insn (gen_mov<mode> (operands[0], operands[1])); -+ DONE; - } -- else -- FAIL; - } - ) - -@@ -3915,33 +3967,35 @@ - - ;; Logical left shift using SISD or Integer instruction - (define_insn "*aarch64_ashl_sisd_or_int_<mode>3" -- [(set (match_operand:GPI 0 "register_operand" "=r,w,w") -- (ashift:GPI -- (match_operand:GPI 1 "register_operand" "r,w,w") -- (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "rUs<cmode>,Us<cmode>,w")))] -+ [(set (match_operand:GPI 0 "register_operand" "=r,r,w,w") -+ (ashift:GPI -+ (match_operand:GPI 1 "register_operand" "r,r,w,w") -+ (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "Us<cmode>,r,Us<cmode>,w")))] - "" - "@ -+ lsl\t%<w>0, %<w>1, %2 - lsl\t%<w>0, %<w>1, %<w>2 - shl\t%<rtn>0<vas>, %<rtn>1<vas>, %2 - ushl\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>" -- [(set_attr "simd" "no,yes,yes") -- (set_attr "type" "shift_reg,neon_shift_imm<q>, neon_shift_reg<q>")] -+ [(set_attr "simd" "no,no,yes,yes") -+ (set_attr "type" "bfx,shift_reg,neon_shift_imm<q>, neon_shift_reg<q>")] - ) - - ;; Logical right shift using SISD or Integer instruction - (define_insn "*aarch64_lshr_sisd_or_int_<mode>3" -- [(set (match_operand:GPI 0 "register_operand" "=r,w,&w,&w") -- (lshiftrt:GPI -- (match_operand:GPI 1 "register_operand" "r,w,w,w") -- (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "rUs<cmode>,Us<cmode>,w,0")))] -+ [(set (match_operand:GPI 0 "register_operand" "=r,r,w,&w,&w") -+ (lshiftrt:GPI -+ (match_operand:GPI 1 "register_operand" "r,r,w,w,w") -+ (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "Us<cmode>,r,Us<cmode>,w,0")))] - "" - "@ -+ lsr\t%<w>0, %<w>1, %2 - lsr\t%<w>0, %<w>1, %<w>2 - ushr\t%<rtn>0<vas>, %<rtn>1<vas>, %2 - # - #" -- [(set_attr "simd" "no,yes,yes,yes") -- (set_attr "type" "shift_reg,neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>")] -+ [(set_attr "simd" "no,no,yes,yes,yes") -+ (set_attr "type" "bfx,shift_reg,neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>")] - ) - - (define_split -@@ -3976,18 +4030,19 @@ - - ;; Arithmetic right shift using SISD or Integer instruction - (define_insn "*aarch64_ashr_sisd_or_int_<mode>3" -- [(set (match_operand:GPI 0 "register_operand" "=r,w,&w,&w") -+ [(set (match_operand:GPI 0 "register_operand" "=r,r,w,&w,&w") - (ashiftrt:GPI -- (match_operand:GPI 1 "register_operand" "r,w,w,w") -- (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "rUs<cmode>,Us<cmode>,w,0")))] -+ (match_operand:GPI 1 "register_operand" "r,r,w,w,w") -+ (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us<cmode>,r,Us<cmode>,w,0")))] - "" - "@ -+ asr\t%<w>0, %<w>1, %2 - asr\t%<w>0, %<w>1, %<w>2 - sshr\t%<rtn>0<vas>, %<rtn>1<vas>, %2 - # - #" -- [(set_attr "simd" "no,yes,yes,yes") -- (set_attr "type" "shift_reg,neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>")] -+ [(set_attr "simd" "no,no,yes,yes,yes") -+ (set_attr "type" "bfx,shift_reg,neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>")] - ) - - (define_split -@@ -4079,21 +4134,25 @@ - [(set (match_operand:GPI 0 "register_operand" "=r,r") - (rotatert:GPI - (match_operand:GPI 1 "register_operand" "r,r") -- (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "r,Us<cmode>")))] -+ (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "Us<cmode>,r")))] - "" -- "ror\\t%<w>0, %<w>1, %<w>2" -- [(set_attr "type" "shift_reg, rotate_imm")] -+ "@ -+ ror\\t%<w>0, %<w>1, %2 -+ ror\\t%<w>0, %<w>1, %<w>2" -+ [(set_attr "type" "rotate_imm,shift_reg")] - ) - - ;; zero_extend version of above - (define_insn "*<optab>si3_insn_uxtw" -- [(set (match_operand:DI 0 "register_operand" "=r") -+ [(set (match_operand:DI 0 "register_operand" "=r,r") - (zero_extend:DI (SHIFT:SI -- (match_operand:SI 1 "register_operand" "r") -- (match_operand:QI 2 "aarch64_reg_or_shift_imm_si" "rUss"))))] -+ (match_operand:SI 1 "register_operand" "r,r") -+ (match_operand:QI 2 "aarch64_reg_or_shift_imm_si" "Uss,r"))))] - "" -- "<shift>\\t%w0, %w1, %w2" -- [(set_attr "type" "shift_reg")] -+ "@ -+ <shift>\\t%w0, %w1, %2 -+ <shift>\\t%w0, %w1, %w2" -+ [(set_attr "type" "bfx,shift_reg")] - ) - - (define_insn "*<optab><mode>3_insn" -@@ -4105,7 +4164,7 @@ - operands[3] = GEN_INT (<sizen> - UINTVAL (operands[2])); - return "<bfshift>\t%w0, %w1, %2, %3"; - } -- [(set_attr "type" "bfm")] -+ [(set_attr "type" "bfx")] - ) - - (define_insn "*extr<mode>5_insn" -@@ -4117,7 +4176,7 @@ - "UINTVAL (operands[3]) < GET_MODE_BITSIZE (<MODE>mode) && - (UINTVAL (operands[3]) + UINTVAL (operands[4]) == GET_MODE_BITSIZE (<MODE>mode))" - "extr\\t%<w>0, %<w>1, %<w>2, %4" -- [(set_attr "type" "shift_imm")] -+ [(set_attr "type" "rotate_imm")] - ) - - ;; There are no canonicalisation rules for ashift and lshiftrt inside an ior -@@ -4132,7 +4191,7 @@ - && (UINTVAL (operands[3]) + UINTVAL (operands[4]) - == GET_MODE_BITSIZE (<MODE>mode))" - "extr\\t%<w>0, %<w>1, %<w>2, %4" -- [(set_attr "type" "shift_imm")] -+ [(set_attr "type" "rotate_imm")] - ) - - ;; zero_extend version of the above -@@ -4146,7 +4205,7 @@ - "UINTVAL (operands[3]) < 32 && - (UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32)" - "extr\\t%w0, %w1, %w2, %4" -- [(set_attr "type" "shift_imm")] -+ [(set_attr "type" "rotate_imm")] - ) - - (define_insn "*extrsi5_insn_uxtw_alt" -@@ -4159,7 +4218,7 @@ - "UINTVAL (operands[3]) < 32 && - (UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32)" - "extr\\t%w0, %w1, %w2, %4" -- [(set_attr "type" "shift_imm")] -+ [(set_attr "type" "rotate_imm")] - ) - - (define_insn "*ror<mode>3_insn" -@@ -4198,7 +4257,7 @@ - operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2])); - return "<su>bfiz\t%<GPI:w>0, %<GPI:w>1, %2, %3"; - } -- [(set_attr "type" "bfm")] -+ [(set_attr "type" "bfx")] - ) - - (define_insn "*zero_extend<GPI:mode>_lshr<SHORT:mode>" -@@ -4211,7 +4270,7 @@ - operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2])); - return "ubfx\t%<GPI:w>0, %<GPI:w>1, %2, %3"; - } -- [(set_attr "type" "bfm")] -+ [(set_attr "type" "bfx")] - ) - - (define_insn "*extend<GPI:mode>_ashr<SHORT:mode>" -@@ -4224,7 +4283,7 @@ - operands[3] = GEN_INT (<SHORT:sizen> - UINTVAL (operands[2])); - return "sbfx\\t%<GPI:w>0, %<GPI:w>1, %2, %3"; - } -- [(set_attr "type" "bfm")] -+ [(set_attr "type" "bfx")] - ) - - ;; ------------------------------------------------------------------- -@@ -4256,7 +4315,27 @@ - "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]), - 1, GET_MODE_BITSIZE (<MODE>mode) - 1)" - "<su>bfx\\t%<w>0, %<w>1, %3, %2" -- [(set_attr "type" "bfm")] -+ [(set_attr "type" "bfx")] -+) -+ -+;; When the bit position and width add up to 32 we can use a W-reg LSR -+;; instruction taking advantage of the implicit zero-extension of the X-reg. -+(define_split -+ [(set (match_operand:DI 0 "register_operand") -+ (zero_extract:DI (match_operand:DI 1 "register_operand") -+ (match_operand 2 -+ "aarch64_simd_shift_imm_offset_di") -+ (match_operand 3 -+ "aarch64_simd_shift_imm_di")))] -+ "IN_RANGE (INTVAL (operands[2]) + INTVAL (operands[3]), 1, -+ GET_MODE_BITSIZE (DImode) - 1) -+ && (INTVAL (operands[2]) + INTVAL (operands[3])) -+ == GET_MODE_BITSIZE (SImode)" -+ [(set (match_dup 0) -+ (zero_extend:DI (lshiftrt:SI (match_dup 4) (match_dup 3))))] -+ { -+ operands[4] = gen_lowpart (SImode, operands[1]); -+ } - ) - - ;; Bitfield Insert (insv) -@@ -4338,7 +4417,7 @@ - : GEN_INT (<GPI:sizen> - UINTVAL (operands[2])); - return "<su>bfiz\t%<GPI:w>0, %<GPI:w>1, %2, %3"; - } -- [(set_attr "type" "bfm")] -+ [(set_attr "type" "bfx")] - ) - - ;; XXX We should match (any_extend (ashift)) here, like (and (ashift)) below -@@ -4348,11 +4427,27 @@ - (and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r") - (match_operand 2 "const_int_operand" "n")) - (match_operand 3 "const_int_operand" "n")))] -- "(INTVAL (operands[2]) < (<GPI:sizen>)) -- && exact_log2 ((INTVAL (operands[3]) >> INTVAL (operands[2])) + 1) >= 0 -- && (INTVAL (operands[3]) & ((1 << INTVAL (operands[2])) - 1)) == 0" -+ "aarch64_mask_and_shift_for_ubfiz_p (<MODE>mode, operands[3], operands[2])" - "ubfiz\\t%<w>0, %<w>1, %2, %P3" -- [(set_attr "type" "bfm")] -+ [(set_attr "type" "bfx")] -+) -+ -+;; When the bit position and width of the equivalent extraction add up to 32 -+;; we can use a W-reg LSL instruction taking advantage of the implicit -+;; zero-extension of the X-reg. -+(define_split -+ [(set (match_operand:DI 0 "register_operand") -+ (and:DI (ashift:DI (match_operand:DI 1 "register_operand") -+ (match_operand 2 "const_int_operand")) -+ (match_operand 3 "const_int_operand")))] -+ "aarch64_mask_and_shift_for_ubfiz_p (DImode, operands[3], operands[2]) -+ && (INTVAL (operands[2]) + popcount_hwi (INTVAL (operands[3]))) -+ == GET_MODE_BITSIZE (SImode)" -+ [(set (match_dup 0) -+ (zero_extend:DI (ashift:SI (match_dup 4) (match_dup 2))))] -+ { -+ operands[4] = gen_lowpart (SImode, operands[1]); -+ } - ) - - (define_insn "bswap<mode>2" -@@ -4420,22 +4515,23 @@ - ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. - - (define_insn "<frint_pattern><mode>2" -- [(set (match_operand:GPF 0 "register_operand" "=w") -- (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")] -+ [(set (match_operand:GPF_F16 0 "register_operand" "=w") -+ (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")] - FRINT))] - "TARGET_FLOAT" - "frint<frint_suffix>\\t%<s>0, %<s>1" -- [(set_attr "type" "f_rint<s>")] -+ [(set_attr "type" "f_rint<stype>")] - ) - - ;; frcvt floating-point round to integer and convert standard patterns. - ;; Expands to lbtrunc, lceil, lfloor, lround. --(define_insn "l<fcvt_pattern><su_optab><GPF:mode><GPI:mode>2" -+(define_insn "l<fcvt_pattern><su_optab><GPF_F16:mode><GPI:mode>2" - [(set (match_operand:GPI 0 "register_operand" "=r") -- (FIXUORS:GPI (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")] -- FCVT)))] -+ (FIXUORS:GPI -+ (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")] -+ FCVT)))] - "TARGET_FLOAT" -- "fcvt<frint_suffix><su>\\t%<GPI:w>0, %<GPF:s>1" -+ "fcvt<frint_suffix><su>\\t%<GPI:w>0, %<GPF_F16:s>1" - [(set_attr "type" "f_cvtf2i")] - ) - -@@ -4461,23 +4557,24 @@ - ;; fma - no throw - - (define_insn "fma<mode>4" -- [(set (match_operand:GPF 0 "register_operand" "=w") -- (fma:GPF (match_operand:GPF 1 "register_operand" "w") -- (match_operand:GPF 2 "register_operand" "w") -- (match_operand:GPF 3 "register_operand" "w")))] -+ [(set (match_operand:GPF_F16 0 "register_operand" "=w") -+ (fma:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w") -+ (match_operand:GPF_F16 2 "register_operand" "w") -+ (match_operand:GPF_F16 3 "register_operand" "w")))] - "TARGET_FLOAT" - "fmadd\\t%<s>0, %<s>1, %<s>2, %<s>3" -- [(set_attr "type" "fmac<s>")] -+ [(set_attr "type" "fmac<stype>")] - ) - - (define_insn "fnma<mode>4" -- [(set (match_operand:GPF 0 "register_operand" "=w") -- (fma:GPF (neg:GPF (match_operand:GPF 1 "register_operand" "w")) -- (match_operand:GPF 2 "register_operand" "w") -- (match_operand:GPF 3 "register_operand" "w")))] -+ [(set (match_operand:GPF_F16 0 "register_operand" "=w") -+ (fma:GPF_F16 -+ (neg:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")) -+ (match_operand:GPF_F16 2 "register_operand" "w") -+ (match_operand:GPF_F16 3 "register_operand" "w")))] - "TARGET_FLOAT" - "fmsub\\t%<s>0, %<s>1, %<s>2, %<s>3" -- [(set_attr "type" "fmac<s>")] -+ [(set_attr "type" "fmac<stype>")] - ) - - (define_insn "fms<mode>4" -@@ -4563,19 +4660,11 @@ - [(set_attr "type" "f_cvt")] - ) - --(define_insn "fix_trunc<GPF:mode><GPI:mode>2" -- [(set (match_operand:GPI 0 "register_operand" "=r") -- (fix:GPI (match_operand:GPF 1 "register_operand" "w")))] -- "TARGET_FLOAT" -- "fcvtzs\\t%<GPI:w>0, %<GPF:s>1" -- [(set_attr "type" "f_cvtf2i")] --) -- --(define_insn "fixuns_trunc<GPF:mode><GPI:mode>2" -+(define_insn "<optab>_trunc<GPF_F16:mode><GPI:mode>2" - [(set (match_operand:GPI 0 "register_operand" "=r") -- (unsigned_fix:GPI (match_operand:GPF 1 "register_operand" "w")))] -+ (FIXUORS:GPI (match_operand:GPF_F16 1 "register_operand" "w")))] - "TARGET_FLOAT" -- "fcvtzu\\t%<GPI:w>0, %<GPF:s>1" -+ "fcvtz<su>\t%<GPI:w>0, %<GPF_F16:s>1" - [(set_attr "type" "f_cvtf2i")] - ) - -@@ -4599,38 +4688,116 @@ - [(set_attr "type" "f_cvti2f")] - ) - -+(define_insn "<optab><mode>hf2" -+ [(set (match_operand:HF 0 "register_operand" "=w") -+ (FLOATUORS:HF (match_operand:GPI 1 "register_operand" "r")))] -+ "TARGET_FP_F16INST" -+ "<su_optab>cvtf\t%h0, %<w>1" -+ [(set_attr "type" "f_cvti2f")] -+) -+ -+;; Convert between fixed-point and floating-point (scalar modes) -+ -+(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><GPF:mode>3" -+ [(set (match_operand:<GPF:FCVT_TARGET> 0 "register_operand" "=r, w") -+ (unspec:<GPF:FCVT_TARGET> [(match_operand:GPF 1 "register_operand" "w, w") -+ (match_operand:SI 2 "immediate_operand" "i, i")] -+ FCVT_F2FIXED))] -+ "" -+ "@ -+ <FCVT_F2FIXED:fcvt_fixed_insn>\t%<GPF:w1>0, %<GPF:s>1, #%2 -+ <FCVT_F2FIXED:fcvt_fixed_insn>\t%<GPF:s>0, %<GPF:s>1, #%2" -+ [(set_attr "type" "f_cvtf2i, neon_fp_to_int_<GPF:Vetype>") -+ (set_attr "fp" "yes, *") -+ (set_attr "simd" "*, yes")] -+) -+ -+(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><GPI:mode>3" -+ [(set (match_operand:<GPI:FCVT_TARGET> 0 "register_operand" "=w, w") -+ (unspec:<GPI:FCVT_TARGET> [(match_operand:GPI 1 "register_operand" "r, w") -+ (match_operand:SI 2 "immediate_operand" "i, i")] -+ FCVT_FIXED2F))] -+ "" -+ "@ -+ <FCVT_FIXED2F:fcvt_fixed_insn>\t%<GPI:v>0, %<GPI:w>1, #%2 -+ <FCVT_FIXED2F:fcvt_fixed_insn>\t%<GPI:v>0, %<GPI:v>1, #%2" -+ [(set_attr "type" "f_cvti2f, neon_int_to_fp_<GPI:Vetype>") -+ (set_attr "fp" "yes, *") -+ (set_attr "simd" "*, yes")] -+) -+ -+(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn>hf<mode>3" -+ [(set (match_operand:GPI 0 "register_operand" "=r") -+ (unspec:GPI [(match_operand:HF 1 "register_operand" "w") -+ (match_operand:SI 2 "immediate_operand" "i")] -+ FCVT_F2FIXED))] -+ "TARGET_FP_F16INST" -+ "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<GPI:w>0, %h1, #%2" -+ [(set_attr "type" "f_cvtf2i")] -+) -+ -+(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><mode>hf3" -+ [(set (match_operand:HF 0 "register_operand" "=w") -+ (unspec:HF [(match_operand:GPI 1 "register_operand" "r") -+ (match_operand:SI 2 "immediate_operand" "i")] -+ FCVT_FIXED2F))] -+ "TARGET_FP_F16INST" -+ "<FCVT_FIXED2F:fcvt_fixed_insn>\t%h0, %<GPI:w>1, #%2" -+ [(set_attr "type" "f_cvti2f")] -+) -+ -+(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn>hf3" -+ [(set (match_operand:HI 0 "register_operand" "=w") -+ (unspec:HI [(match_operand:HF 1 "register_operand" "w") -+ (match_operand:SI 2 "immediate_operand" "i")] -+ FCVT_F2FIXED))] -+ "TARGET_SIMD" -+ "<FCVT_F2FIXED:fcvt_fixed_insn>\t%h0, %h1, #%2" -+ [(set_attr "type" "neon_fp_to_int_s")] -+) -+ -+(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn>hi3" -+ [(set (match_operand:HF 0 "register_operand" "=w") -+ (unspec:HF [(match_operand:HI 1 "register_operand" "w") -+ (match_operand:SI 2 "immediate_operand" "i")] -+ FCVT_FIXED2F))] -+ "TARGET_SIMD" -+ "<FCVT_FIXED2F:fcvt_fixed_insn>\t%h0, %h1, #%2" -+ [(set_attr "type" "neon_int_to_fp_s")] -+) -+ - ;; ------------------------------------------------------------------- - ;; Floating-point arithmetic - ;; ------------------------------------------------------------------- - - (define_insn "add<mode>3" -- [(set (match_operand:GPF 0 "register_operand" "=w") -- (plus:GPF -- (match_operand:GPF 1 "register_operand" "w") -- (match_operand:GPF 2 "register_operand" "w")))] -+ [(set (match_operand:GPF_F16 0 "register_operand" "=w") -+ (plus:GPF_F16 -+ (match_operand:GPF_F16 1 "register_operand" "w") -+ (match_operand:GPF_F16 2 "register_operand" "w")))] - "TARGET_FLOAT" - "fadd\\t%<s>0, %<s>1, %<s>2" -- [(set_attr "type" "fadd<s>")] -+ [(set_attr "type" "fadd<stype>")] - ) - - (define_insn "sub<mode>3" -- [(set (match_operand:GPF 0 "register_operand" "=w") -- (minus:GPF -- (match_operand:GPF 1 "register_operand" "w") -- (match_operand:GPF 2 "register_operand" "w")))] -+ [(set (match_operand:GPF_F16 0 "register_operand" "=w") -+ (minus:GPF_F16 -+ (match_operand:GPF_F16 1 "register_operand" "w") -+ (match_operand:GPF_F16 2 "register_operand" "w")))] - "TARGET_FLOAT" - "fsub\\t%<s>0, %<s>1, %<s>2" -- [(set_attr "type" "fadd<s>")] -+ [(set_attr "type" "fadd<stype>")] - ) - - (define_insn "mul<mode>3" -- [(set (match_operand:GPF 0 "register_operand" "=w") -- (mult:GPF -- (match_operand:GPF 1 "register_operand" "w") -- (match_operand:GPF 2 "register_operand" "w")))] -+ [(set (match_operand:GPF_F16 0 "register_operand" "=w") -+ (mult:GPF_F16 -+ (match_operand:GPF_F16 1 "register_operand" "w") -+ (match_operand:GPF_F16 2 "register_operand" "w")))] - "TARGET_FLOAT" - "fmul\\t%<s>0, %<s>1, %<s>2" -- [(set_attr "type" "fmul<s>")] -+ [(set_attr "type" "fmul<stype>")] - ) - - (define_insn "*fnmul<mode>3" -@@ -4653,38 +4820,58 @@ - [(set_attr "type" "fmul<s>")] - ) - --(define_insn "div<mode>3" -- [(set (match_operand:GPF 0 "register_operand" "=w") -- (div:GPF -- (match_operand:GPF 1 "register_operand" "w") -- (match_operand:GPF 2 "register_operand" "w")))] -+(define_expand "div<mode>3" -+ [(set (match_operand:GPF_F16 0 "register_operand") -+ (div:GPF_F16 (match_operand:GPF_F16 1 "general_operand") -+ (match_operand:GPF_F16 2 "register_operand")))] -+ "TARGET_SIMD" -+{ -+ if (aarch64_emit_approx_div (operands[0], operands[1], operands[2])) -+ DONE; -+ -+ operands[1] = force_reg (<MODE>mode, operands[1]); -+}) -+ -+(define_insn "*div<mode>3" -+ [(set (match_operand:GPF_F16 0 "register_operand" "=w") -+ (div:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w") -+ (match_operand:GPF_F16 2 "register_operand" "w")))] - "TARGET_FLOAT" - "fdiv\\t%<s>0, %<s>1, %<s>2" -- [(set_attr "type" "fdiv<s>")] -+ [(set_attr "type" "fdiv<stype>")] - ) - - (define_insn "neg<mode>2" -- [(set (match_operand:GPF 0 "register_operand" "=w") -- (neg:GPF (match_operand:GPF 1 "register_operand" "w")))] -+ [(set (match_operand:GPF_F16 0 "register_operand" "=w") -+ (neg:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))] - "TARGET_FLOAT" - "fneg\\t%<s>0, %<s>1" -- [(set_attr "type" "ffarith<s>")] -+ [(set_attr "type" "ffarith<stype>")] - ) - --(define_insn "sqrt<mode>2" -- [(set (match_operand:GPF 0 "register_operand" "=w") -- (sqrt:GPF (match_operand:GPF 1 "register_operand" "w")))] -+(define_expand "sqrt<mode>2" -+ [(set (match_operand:GPF_F16 0 "register_operand" "=w") -+ (sqrt:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))] -+ "TARGET_FLOAT" -+{ -+ if (aarch64_emit_approx_sqrt (operands[0], operands[1], false)) -+ DONE; -+}) -+ -+(define_insn "*sqrt<mode>2" -+ [(set (match_operand:GPF_F16 0 "register_operand" "=w") -+ (sqrt:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))] - "TARGET_FLOAT" - "fsqrt\\t%<s>0, %<s>1" -- [(set_attr "type" "fsqrt<s>")] -+ [(set_attr "type" "fsqrt<stype>")] - ) - - (define_insn "abs<mode>2" -- [(set (match_operand:GPF 0 "register_operand" "=w") -- (abs:GPF (match_operand:GPF 1 "register_operand" "w")))] -+ [(set (match_operand:GPF_F16 0 "register_operand" "=w") -+ (abs:GPF_F16 (match_operand:GPF_F16 1 "register_operand" "w")))] - "TARGET_FLOAT" - "fabs\\t%<s>0, %<s>1" -- [(set_attr "type" "ffarith<s>")] -+ [(set_attr "type" "ffarith<stype>")] - ) - - ;; Given that smax/smin do not specify the result when either input is NaN, -@@ -4709,15 +4896,17 @@ - [(set_attr "type" "f_minmax<s>")] - ) - --;; Scalar forms for the IEEE-754 fmax()/fmin() functions --(define_insn "<fmaxmin><mode>3" -- [(set (match_operand:GPF 0 "register_operand" "=w") -- (unspec:GPF [(match_operand:GPF 1 "register_operand" "w") -- (match_operand:GPF 2 "register_operand" "w")] -- FMAXMIN))] -+;; Scalar forms for fmax, fmin, fmaxnm, fminnm. -+;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names, -+;; which implement the IEEE fmax ()/fmin () functions. -+(define_insn "<maxmin_uns><mode>3" -+ [(set (match_operand:GPF_F16 0 "register_operand" "=w") -+ (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w") -+ (match_operand:GPF_F16 2 "register_operand" "w")] -+ FMAXMIN_UNS))] - "TARGET_FLOAT" -- "<fmaxmin_op>\\t%<s>0, %<s>1, %<s>2" -- [(set_attr "type" "f_minmax<s>")] -+ "<maxmin_uns_op>\\t%<s>0, %<s>1, %<s>2" -+ [(set_attr "type" "f_minmax<stype>")] - ) - - ;; For copysign (x, y), we want to generate: -@@ -4775,7 +4964,7 @@ - [(set (match_operand:GPF_TF 0 "register_operand" "=w") - (mem:GPF_TF (match_operand 1 "aarch64_constant_pool_symref" "S"))) - (clobber (match_operand:P 2 "register_operand" "=&r"))] -- "TARGET_FLOAT && aarch64_nopcrelative_literal_loads" -+ "TARGET_FLOAT" - { - aarch64_expand_mov_immediate (operands[2], XEXP (operands[1], 0)); - emit_move_insn (operands[0], gen_rtx_MEM (<GPF_TF:MODE>mode, operands[2])); -@@ -4788,7 +4977,7 @@ - [(set (match_operand:VALL 0 "register_operand" "=w") - (mem:VALL (match_operand 1 "aarch64_constant_pool_symref" "S"))) - (clobber (match_operand:P 2 "register_operand" "=&r"))] -- "TARGET_FLOAT && aarch64_nopcrelative_literal_loads" -+ "TARGET_FLOAT" - { - aarch64_expand_mov_immediate (operands[2], XEXP (operands[1], 0)); - emit_move_insn (operands[0], gen_rtx_MEM (<VALL:MODE>mode, operands[2])); -@@ -4961,20 +5150,20 @@ - ;; The TLS ABI specifically requires that the compiler does not schedule - ;; instructions in the TLS stubs, in order to enable linker relaxation. - ;; Therefore we treat the stubs as an atomic sequence. --(define_expand "tlsgd_small" -+(define_expand "tlsgd_small_<mode>" - [(parallel [(set (match_operand 0 "register_operand" "") - (call (mem:DI (match_dup 2)) (const_int 1))) -- (unspec:DI [(match_operand:DI 1 "aarch64_valid_symref" "")] UNSPEC_GOTSMALLTLS) -+ (unspec:DI [(match_operand:PTR 1 "aarch64_valid_symref" "")] UNSPEC_GOTSMALLTLS) - (clobber (reg:DI LR_REGNUM))])] - "" - { - operands[2] = aarch64_tls_get_addr (); - }) - --(define_insn "*tlsgd_small" -+(define_insn "*tlsgd_small_<mode>" - [(set (match_operand 0 "register_operand" "") - (call (mem:DI (match_operand:DI 2 "" "")) (const_int 1))) -- (unspec:DI [(match_operand:DI 1 "aarch64_valid_symref" "S")] UNSPEC_GOTSMALLTLS) -+ (unspec:DI [(match_operand:PTR 1 "aarch64_valid_symref" "S")] UNSPEC_GOTSMALLTLS) - (clobber (reg:DI LR_REGNUM)) - ] - "" -@@ -5182,7 +5371,7 @@ - UNSPEC_SP_TEST)) - (clobber (match_scratch:PTR 3 "=&r"))] - "" -- "ldr\t%<w>3, %x1\;ldr\t%<w>0, %x2\;eor\t%<w>0, %<w>3, %<w>0" -+ "ldr\t%<w>3, %1\;ldr\t%<w>0, %2\;eor\t%<w>0, %<w>3, %<w>0" - [(set_attr "length" "12") - (set_attr "type" "multiple")]) - ---- a/src/gcc/config/aarch64/aarch64.opt -+++ b/src/gcc/config/aarch64/aarch64.opt -@@ -146,10 +146,28 @@ EnumValue - Enum(aarch64_abi) String(lp64) Value(AARCH64_ABI_LP64) - - mpc-relative-literal-loads --Target Report Save Var(nopcrelative_literal_loads) Init(2) Save -+Target Report Save Var(pcrelative_literal_loads) Init(2) Save - PC relative literal loads. - - mlow-precision-recip-sqrt - Common Var(flag_mrecip_low_precision_sqrt) Optimization --When calculating the reciprocal square root approximation, --uses one less step than otherwise, thus reducing latency and precision. -+Enable the reciprocal square root approximation. Enabling this reduces -+precision of reciprocal square root results to about 16 bits for -+single precision and to 32 bits for double precision. -+ -+mlow-precision-sqrt -+Common Var(flag_mlow_precision_sqrt) Optimization -+Enable the square root approximation. Enabling this reduces -+precision of square root results to about 16 bits for -+single precision and to 32 bits for double precision. -+If enabled, it implies -mlow-precision-recip-sqrt. -+ -+mlow-precision-div -+Common Var(flag_mlow_precision_div) Optimization -+Enable the division approximation. Enabling this reduces -+precision of division results to about 16 bits for -+single precision and to 32 bits for double precision. -+ -+mverbose-cost-dump -+Common Undocumented Var(flag_aarch64_verbose_cost) -+Enables verbose cost model dummping in the debug dump files. ---- /dev/null -+++ b/src/gcc/config/aarch64/arm_fp16.h -@@ -0,0 +1,579 @@ -+/* ARM FP16 scalar intrinsics include file. -+ -+ Copyright (C) 2016 Free Software Foundation, Inc. -+ Contributed by ARM Ltd. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published -+ by the Free Software Foundation; either version 3, or (at your -+ option) any later version. -+ -+ GCC is distributed in the hope that it will be useful, but WITHOUT -+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public -+ License for more details. -+ -+ Under Section 7 of GPL version 3, you are granted additional -+ permissions described in the GCC Runtime Library Exception, version -+ 3.1, as published by the Free Software Foundation. -+ -+ You should have received a copy of the GNU General Public License and -+ a copy of the GCC Runtime Library Exception along with this program; -+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -+ <http://www.gnu.org/licenses/>. */ -+ -+#ifndef _AARCH64_FP16_H_ -+#define _AARCH64_FP16_H_ -+ -+#include <stdint.h> -+ -+#pragma GCC push_options -+#pragma GCC target ("arch=armv8.2-a+fp16") -+ -+typedef __fp16 float16_t; -+ -+/* ARMv8.2-A FP16 one operand scalar intrinsics. */ -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vabsh_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_abshf (__a); -+} -+ -+__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+vceqzh_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_cmeqhf_uss (__a, 0.0f); -+} -+ -+__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+vcgezh_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_cmgehf_uss (__a, 0.0f); -+} -+ -+__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+vcgtzh_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_cmgthf_uss (__a, 0.0f); -+} -+ -+__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+vclezh_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_cmlehf_uss (__a, 0.0f); -+} -+ -+__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+vcltzh_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_cmlthf_uss (__a, 0.0f); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vcvth_f16_s16 (int16_t __a) -+{ -+ return __builtin_aarch64_floathihf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vcvth_f16_s32 (int32_t __a) -+{ -+ return __builtin_aarch64_floatsihf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vcvth_f16_s64 (int64_t __a) -+{ -+ return __builtin_aarch64_floatdihf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vcvth_f16_u16 (uint16_t __a) -+{ -+ return __builtin_aarch64_floatunshihf_us (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vcvth_f16_u32 (uint32_t __a) -+{ -+ return __builtin_aarch64_floatunssihf_us (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vcvth_f16_u64 (uint64_t __a) -+{ -+ return __builtin_aarch64_floatunsdihf_us (__a); -+} -+ -+__extension__ static __inline int16_t __attribute__ ((__always_inline__)) -+vcvth_s16_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_fix_trunchfhi (__a); -+} -+ -+__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -+vcvth_s32_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_fix_trunchfsi (__a); -+} -+ -+__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -+vcvth_s64_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_fix_trunchfdi (__a); -+} -+ -+__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+vcvth_u16_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_fixuns_trunchfhi_us (__a); -+} -+ -+__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+vcvth_u32_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_fixuns_trunchfsi_us (__a); -+} -+ -+__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -+vcvth_u64_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_fixuns_trunchfdi_us (__a); -+} -+ -+__extension__ static __inline int16_t __attribute__ ((__always_inline__)) -+vcvtah_s16_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lroundhfhi (__a); -+} -+ -+__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -+vcvtah_s32_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lroundhfsi (__a); -+} -+ -+__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -+vcvtah_s64_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lroundhfdi (__a); -+} -+ -+__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+vcvtah_u16_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lrounduhfhi_us (__a); -+} -+ -+__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+vcvtah_u32_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lrounduhfsi_us (__a); -+} -+ -+__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -+vcvtah_u64_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lrounduhfdi_us (__a); -+} -+ -+__extension__ static __inline int16_t __attribute__ ((__always_inline__)) -+vcvtmh_s16_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lfloorhfhi (__a); -+} -+ -+__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -+vcvtmh_s32_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lfloorhfsi (__a); -+} -+ -+__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -+vcvtmh_s64_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lfloorhfdi (__a); -+} -+ -+__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+vcvtmh_u16_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lflooruhfhi_us (__a); -+} -+ -+__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+vcvtmh_u32_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lflooruhfsi_us (__a); -+} -+ -+__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -+vcvtmh_u64_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lflooruhfdi_us (__a); -+} -+ -+__extension__ static __inline int16_t __attribute__ ((__always_inline__)) -+vcvtnh_s16_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lfrintnhfhi (__a); -+} -+ -+__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -+vcvtnh_s32_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lfrintnhfsi (__a); -+} -+ -+__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -+vcvtnh_s64_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lfrintnhfdi (__a); -+} -+ -+__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+vcvtnh_u16_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lfrintnuhfhi_us (__a); -+} -+ -+__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+vcvtnh_u32_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lfrintnuhfsi_us (__a); -+} -+ -+__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -+vcvtnh_u64_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lfrintnuhfdi_us (__a); -+} -+ -+__extension__ static __inline int16_t __attribute__ ((__always_inline__)) -+vcvtph_s16_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lceilhfhi (__a); -+} -+ -+__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -+vcvtph_s32_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lceilhfsi (__a); -+} -+ -+__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -+vcvtph_s64_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lceilhfdi (__a); -+} -+ -+__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+vcvtph_u16_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lceiluhfhi_us (__a); -+} -+ -+__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+vcvtph_u32_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lceiluhfsi_us (__a); -+} -+ -+__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -+vcvtph_u64_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_lceiluhfdi_us (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vnegh_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_neghf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vrecpeh_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_frecpehf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vrecpxh_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_frecpxhf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vrndh_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_btrunchf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vrndah_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_roundhf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vrndih_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_nearbyinthf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vrndmh_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_floorhf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vrndnh_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_frintnhf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vrndph_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_ceilhf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vrndxh_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_rinthf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vrsqrteh_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_rsqrtehf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vsqrth_f16 (float16_t __a) -+{ -+ return __builtin_aarch64_sqrthf (__a); -+} -+ -+/* ARMv8.2-A FP16 two operands scalar intrinsics. */ -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vaddh_f16 (float16_t __a, float16_t __b) -+{ -+ return __a + __b; -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vabdh_f16 (float16_t __a, float16_t __b) -+{ -+ return __builtin_aarch64_fabdhf (__a, __b); -+} -+ -+__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+vcageh_f16 (float16_t __a, float16_t __b) -+{ -+ return __builtin_aarch64_facgehf_uss (__a, __b); -+} -+ -+__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+vcagth_f16 (float16_t __a, float16_t __b) -+{ -+ return __builtin_aarch64_facgthf_uss (__a, __b); -+} -+ -+__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+vcaleh_f16 (float16_t __a, float16_t __b) -+{ -+ return __builtin_aarch64_faclehf_uss (__a, __b); -+} -+ -+__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+vcalth_f16 (float16_t __a, float16_t __b) -+{ -+ return __builtin_aarch64_faclthf_uss (__a, __b); -+} -+ -+__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+vceqh_f16 (float16_t __a, float16_t __b) -+{ -+ return __builtin_aarch64_cmeqhf_uss (__a, __b); -+} -+ -+__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+vcgeh_f16 (float16_t __a, float16_t __b) -+{ -+ return __builtin_aarch64_cmgehf_uss (__a, __b); -+} -+ -+__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+vcgth_f16 (float16_t __a, float16_t __b) -+{ -+ return __builtin_aarch64_cmgthf_uss (__a, __b); -+} -+ -+__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+vcleh_f16 (float16_t __a, float16_t __b) -+{ -+ return __builtin_aarch64_cmlehf_uss (__a, __b); -+} -+ -+__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+vclth_f16 (float16_t __a, float16_t __b) -+{ -+ return __builtin_aarch64_cmlthf_uss (__a, __b); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vcvth_n_f16_s16 (int16_t __a, const int __b) -+{ -+ return __builtin_aarch64_scvtfhi (__a, __b); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vcvth_n_f16_s32 (int32_t __a, const int __b) -+{ -+ return __builtin_aarch64_scvtfsihf (__a, __b); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vcvth_n_f16_s64 (int64_t __a, const int __b) -+{ -+ return __builtin_aarch64_scvtfdihf (__a, __b); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vcvth_n_f16_u16 (uint16_t __a, const int __b) -+{ -+ return __builtin_aarch64_ucvtfhi_sus (__a, __b); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vcvth_n_f16_u32 (uint32_t __a, const int __b) -+{ -+ return __builtin_aarch64_ucvtfsihf_sus (__a, __b); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vcvth_n_f16_u64 (uint64_t __a, const int __b) -+{ -+ return __builtin_aarch64_ucvtfdihf_sus (__a, __b); -+} -+ -+__extension__ static __inline int16_t __attribute__ ((__always_inline__)) -+vcvth_n_s16_f16 (float16_t __a, const int __b) -+{ -+ return __builtin_aarch64_fcvtzshf (__a, __b); -+} -+ -+__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -+vcvth_n_s32_f16 (float16_t __a, const int __b) -+{ -+ return __builtin_aarch64_fcvtzshfsi (__a, __b); -+} -+ -+__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -+vcvth_n_s64_f16 (float16_t __a, const int __b) -+{ -+ return __builtin_aarch64_fcvtzshfdi (__a, __b); -+} -+ -+__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+vcvth_n_u16_f16 (float16_t __a, const int __b) -+{ -+ return __builtin_aarch64_fcvtzuhf_uss (__a, __b); -+} -+ -+__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+vcvth_n_u32_f16 (float16_t __a, const int __b) -+{ -+ return __builtin_aarch64_fcvtzuhfsi_uss (__a, __b); -+} -+ -+__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -+vcvth_n_u64_f16 (float16_t __a, const int __b) -+{ -+ return __builtin_aarch64_fcvtzuhfdi_uss (__a, __b); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vdivh_f16 (float16_t __a, float16_t __b) -+{ -+ return __a / __b; -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vmaxh_f16 (float16_t __a, float16_t __b) -+{ -+ return __builtin_aarch64_fmaxhf (__a, __b); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vmaxnmh_f16 (float16_t __a, float16_t __b) -+{ -+ return __builtin_aarch64_fmaxhf (__a, __b); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vminh_f16 (float16_t __a, float16_t __b) -+{ -+ return __builtin_aarch64_fminhf (__a, __b); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vminnmh_f16 (float16_t __a, float16_t __b) -+{ -+ return __builtin_aarch64_fminhf (__a, __b); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vmulh_f16 (float16_t __a, float16_t __b) -+{ -+ return __a * __b; -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vmulxh_f16 (float16_t __a, float16_t __b) -+{ -+ return __builtin_aarch64_fmulxhf (__a, __b); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vrecpsh_f16 (float16_t __a, float16_t __b) -+{ -+ return __builtin_aarch64_frecpshf (__a, __b); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vrsqrtsh_f16 (float16_t __a, float16_t __b) -+{ -+ return __builtin_aarch64_rsqrtshf (__a, __b); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vsubh_f16 (float16_t __a, float16_t __b) -+{ -+ return __a - __b; -+} -+ -+/* ARMv8.2-A FP16 three operands scalar intrinsics. */ -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vfmah_f16 (float16_t __a, float16_t __b, float16_t __c) -+{ -+ return __builtin_aarch64_fmahf (__b, __c, __a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vfmsh_f16 (float16_t __a, float16_t __b, float16_t __c) -+{ -+ return __builtin_aarch64_fnmahf (__b, __c, __a); -+} -+ -+#pragma GCC pop_options -+ -+#endif ---- a/src/gcc/config/aarch64/arm_neon.h -+++ b/src/gcc/config/aarch64/arm_neon.h -@@ -58,6 +58,7 @@ typedef __Float64x2_t float64x2_t; - typedef __Poly8x16_t poly8x16_t; - typedef __Poly16x8_t poly16x8_t; - typedef __Poly64x2_t poly64x2_t; -+typedef __Poly64x1_t poly64x1_t; - typedef __Uint8x16_t uint8x16_t; - typedef __Uint16x8_t uint16x8_t; - typedef __Uint32x4_t uint32x4_t; -@@ -202,6 +203,36 @@ typedef struct poly16x8x2_t - poly16x8_t val[2]; - } poly16x8x2_t; - -+typedef struct poly64x1x2_t -+{ -+ poly64x1_t val[2]; -+} poly64x1x2_t; -+ -+typedef struct poly64x1x3_t -+{ -+ poly64x1_t val[3]; -+} poly64x1x3_t; -+ -+typedef struct poly64x1x4_t -+{ -+ poly64x1_t val[4]; -+} poly64x1x4_t; -+ -+typedef struct poly64x2x2_t -+{ -+ poly64x2_t val[2]; -+} poly64x2x2_t; -+ -+typedef struct poly64x2x3_t -+{ -+ poly64x2_t val[3]; -+} poly64x2x3_t; -+ -+typedef struct poly64x2x4_t -+{ -+ poly64x2_t val[4]; -+} poly64x2x4_t; -+ - typedef struct int8x8x3_t - { - int8x8_t val[3]; -@@ -466,6 +497,8 @@ typedef struct poly16x8x4_t - #define __aarch64_vdup_lane_any(__size, __q, __a, __b) \ - vdup##__q##_n_##__size (__aarch64_vget_lane_any (__a, __b)) - -+#define __aarch64_vdup_lane_f16(__a, __b) \ -+ __aarch64_vdup_lane_any (f16, , __a, __b) - #define __aarch64_vdup_lane_f32(__a, __b) \ - __aarch64_vdup_lane_any (f32, , __a, __b) - #define __aarch64_vdup_lane_f64(__a, __b) \ -@@ -474,6 +507,8 @@ typedef struct poly16x8x4_t - __aarch64_vdup_lane_any (p8, , __a, __b) - #define __aarch64_vdup_lane_p16(__a, __b) \ - __aarch64_vdup_lane_any (p16, , __a, __b) -+#define __aarch64_vdup_lane_p64(__a, __b) \ -+ __aarch64_vdup_lane_any (p64, , __a, __b) - #define __aarch64_vdup_lane_s8(__a, __b) \ - __aarch64_vdup_lane_any (s8, , __a, __b) - #define __aarch64_vdup_lane_s16(__a, __b) \ -@@ -492,6 +527,8 @@ typedef struct poly16x8x4_t - __aarch64_vdup_lane_any (u64, , __a, __b) - - /* __aarch64_vdup_laneq internal macros. */ -+#define __aarch64_vdup_laneq_f16(__a, __b) \ -+ __aarch64_vdup_lane_any (f16, , __a, __b) - #define __aarch64_vdup_laneq_f32(__a, __b) \ - __aarch64_vdup_lane_any (f32, , __a, __b) - #define __aarch64_vdup_laneq_f64(__a, __b) \ -@@ -500,6 +537,8 @@ typedef struct poly16x8x4_t - __aarch64_vdup_lane_any (p8, , __a, __b) - #define __aarch64_vdup_laneq_p16(__a, __b) \ - __aarch64_vdup_lane_any (p16, , __a, __b) -+#define __aarch64_vdup_laneq_p64(__a, __b) \ -+ __aarch64_vdup_lane_any (p64, , __a, __b) - #define __aarch64_vdup_laneq_s8(__a, __b) \ - __aarch64_vdup_lane_any (s8, , __a, __b) - #define __aarch64_vdup_laneq_s16(__a, __b) \ -@@ -518,6 +557,8 @@ typedef struct poly16x8x4_t - __aarch64_vdup_lane_any (u64, , __a, __b) - - /* __aarch64_vdupq_lane internal macros. */ -+#define __aarch64_vdupq_lane_f16(__a, __b) \ -+ __aarch64_vdup_lane_any (f16, q, __a, __b) - #define __aarch64_vdupq_lane_f32(__a, __b) \ - __aarch64_vdup_lane_any (f32, q, __a, __b) - #define __aarch64_vdupq_lane_f64(__a, __b) \ -@@ -526,6 +567,8 @@ typedef struct poly16x8x4_t - __aarch64_vdup_lane_any (p8, q, __a, __b) - #define __aarch64_vdupq_lane_p16(__a, __b) \ - __aarch64_vdup_lane_any (p16, q, __a, __b) -+#define __aarch64_vdupq_lane_p64(__a, __b) \ -+ __aarch64_vdup_lane_any (p64, q, __a, __b) - #define __aarch64_vdupq_lane_s8(__a, __b) \ - __aarch64_vdup_lane_any (s8, q, __a, __b) - #define __aarch64_vdupq_lane_s16(__a, __b) \ -@@ -544,6 +587,8 @@ typedef struct poly16x8x4_t - __aarch64_vdup_lane_any (u64, q, __a, __b) - - /* __aarch64_vdupq_laneq internal macros. */ -+#define __aarch64_vdupq_laneq_f16(__a, __b) \ -+ __aarch64_vdup_lane_any (f16, q, __a, __b) - #define __aarch64_vdupq_laneq_f32(__a, __b) \ - __aarch64_vdup_lane_any (f32, q, __a, __b) - #define __aarch64_vdupq_laneq_f64(__a, __b) \ -@@ -552,6 +597,8 @@ typedef struct poly16x8x4_t - __aarch64_vdup_lane_any (p8, q, __a, __b) - #define __aarch64_vdupq_laneq_p16(__a, __b) \ - __aarch64_vdup_lane_any (p16, q, __a, __b) -+#define __aarch64_vdupq_laneq_p64(__a, __b) \ -+ __aarch64_vdup_lane_any (p64, q, __a, __b) - #define __aarch64_vdupq_laneq_s8(__a, __b) \ - __aarch64_vdup_lane_any (s8, q, __a, __b) - #define __aarch64_vdupq_laneq_s16(__a, __b) \ -@@ -601,535 +648,619 @@ typedef struct poly16x8x4_t - }) - - /* vadd */ --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vadd_s8 (int8x8_t __a, int8x8_t __b) - { - return __a + __b; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vadd_s16 (int16x4_t __a, int16x4_t __b) - { - return __a + __b; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vadd_s32 (int32x2_t __a, int32x2_t __b) - { - return __a + __b; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vadd_f32 (float32x2_t __a, float32x2_t __b) - { - return __a + __b; - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vadd_f64 (float64x1_t __a, float64x1_t __b) - { - return __a + __b; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vadd_u8 (uint8x8_t __a, uint8x8_t __b) - { - return __a + __b; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vadd_u16 (uint16x4_t __a, uint16x4_t __b) - { - return __a + __b; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vadd_u32 (uint32x2_t __a, uint32x2_t __b) - { - return __a + __b; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vadd_s64 (int64x1_t __a, int64x1_t __b) - { - return __a + __b; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vadd_u64 (uint64x1_t __a, uint64x1_t __b) - { - return __a + __b; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddq_s8 (int8x16_t __a, int8x16_t __b) - { - return __a + __b; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddq_s16 (int16x8_t __a, int16x8_t __b) - { - return __a + __b; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddq_s32 (int32x4_t __a, int32x4_t __b) - { - return __a + __b; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddq_s64 (int64x2_t __a, int64x2_t __b) - { - return __a + __b; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddq_f32 (float32x4_t __a, float32x4_t __b) - { - return __a + __b; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddq_f64 (float64x2_t __a, float64x2_t __b) - { - return __a + __b; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return __a + __b; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return __a + __b; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return __a + __b; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddq_u64 (uint64x2_t __a, uint64x2_t __b) - { - return __a + __b; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddl_s8 (int8x8_t __a, int8x8_t __b) - { - return (int16x8_t) __builtin_aarch64_saddlv8qi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddl_s16 (int16x4_t __a, int16x4_t __b) - { - return (int32x4_t) __builtin_aarch64_saddlv4hi (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddl_s32 (int32x2_t __a, int32x2_t __b) - { - return (int64x2_t) __builtin_aarch64_saddlv2si (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddl_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint16x8_t) __builtin_aarch64_uaddlv8qi ((int8x8_t) __a, - (int8x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddl_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint32x4_t) __builtin_aarch64_uaddlv4hi ((int16x4_t) __a, - (int16x4_t) __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddl_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint64x2_t) __builtin_aarch64_uaddlv2si ((int32x2_t) __a, - (int32x2_t) __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddl_high_s8 (int8x16_t __a, int8x16_t __b) - { - return (int16x8_t) __builtin_aarch64_saddl2v16qi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddl_high_s16 (int16x8_t __a, int16x8_t __b) - { - return (int32x4_t) __builtin_aarch64_saddl2v8hi (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddl_high_s32 (int32x4_t __a, int32x4_t __b) - { - return (int64x2_t) __builtin_aarch64_saddl2v4si (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddl_high_u8 (uint8x16_t __a, uint8x16_t __b) - { - return (uint16x8_t) __builtin_aarch64_uaddl2v16qi ((int8x16_t) __a, - (int8x16_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddl_high_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint32x4_t) __builtin_aarch64_uaddl2v8hi ((int16x8_t) __a, - (int16x8_t) __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddl_high_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint64x2_t) __builtin_aarch64_uaddl2v4si ((int32x4_t) __a, - (int32x4_t) __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddw_s8 (int16x8_t __a, int8x8_t __b) - { - return (int16x8_t) __builtin_aarch64_saddwv8qi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddw_s16 (int32x4_t __a, int16x4_t __b) - { - return (int32x4_t) __builtin_aarch64_saddwv4hi (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddw_s32 (int64x2_t __a, int32x2_t __b) - { - return (int64x2_t) __builtin_aarch64_saddwv2si (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddw_u8 (uint16x8_t __a, uint8x8_t __b) - { - return (uint16x8_t) __builtin_aarch64_uaddwv8qi ((int16x8_t) __a, - (int8x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddw_u16 (uint32x4_t __a, uint16x4_t __b) - { - return (uint32x4_t) __builtin_aarch64_uaddwv4hi ((int32x4_t) __a, - (int16x4_t) __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddw_u32 (uint64x2_t __a, uint32x2_t __b) - { - return (uint64x2_t) __builtin_aarch64_uaddwv2si ((int64x2_t) __a, - (int32x2_t) __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddw_high_s8 (int16x8_t __a, int8x16_t __b) - { - return (int16x8_t) __builtin_aarch64_saddw2v16qi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddw_high_s16 (int32x4_t __a, int16x8_t __b) - { - return (int32x4_t) __builtin_aarch64_saddw2v8hi (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddw_high_s32 (int64x2_t __a, int32x4_t __b) - { - return (int64x2_t) __builtin_aarch64_saddw2v4si (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddw_high_u8 (uint16x8_t __a, uint8x16_t __b) - { - return (uint16x8_t) __builtin_aarch64_uaddw2v16qi ((int16x8_t) __a, - (int8x16_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddw_high_u16 (uint32x4_t __a, uint16x8_t __b) - { - return (uint32x4_t) __builtin_aarch64_uaddw2v8hi ((int32x4_t) __a, - (int16x8_t) __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddw_high_u32 (uint64x2_t __a, uint32x4_t __b) - { - return (uint64x2_t) __builtin_aarch64_uaddw2v4si ((int64x2_t) __a, - (int32x4_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhadd_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x8_t) __builtin_aarch64_shaddv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhadd_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t) __builtin_aarch64_shaddv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhadd_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t) __builtin_aarch64_shaddv2si (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhadd_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x8_t) __builtin_aarch64_uhaddv8qi ((int8x8_t) __a, - (int8x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhadd_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint16x4_t) __builtin_aarch64_uhaddv4hi ((int16x4_t) __a, - (int16x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhadd_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint32x2_t) __builtin_aarch64_uhaddv2si ((int32x2_t) __a, - (int32x2_t) __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhaddq_s8 (int8x16_t __a, int8x16_t __b) - { - return (int8x16_t) __builtin_aarch64_shaddv16qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhaddq_s16 (int16x8_t __a, int16x8_t __b) - { - return (int16x8_t) __builtin_aarch64_shaddv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhaddq_s32 (int32x4_t __a, int32x4_t __b) - { - return (int32x4_t) __builtin_aarch64_shaddv4si (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhaddq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return (uint8x16_t) __builtin_aarch64_uhaddv16qi ((int8x16_t) __a, - (int8x16_t) __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhaddq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint16x8_t) __builtin_aarch64_uhaddv8hi ((int16x8_t) __a, - (int16x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhaddq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint32x4_t) __builtin_aarch64_uhaddv4si ((int32x4_t) __a, - (int32x4_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhadd_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x8_t) __builtin_aarch64_srhaddv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhadd_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t) __builtin_aarch64_srhaddv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhadd_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t) __builtin_aarch64_srhaddv2si (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhadd_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x8_t) __builtin_aarch64_urhaddv8qi ((int8x8_t) __a, - (int8x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhadd_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint16x4_t) __builtin_aarch64_urhaddv4hi ((int16x4_t) __a, - (int16x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhadd_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint32x2_t) __builtin_aarch64_urhaddv2si ((int32x2_t) __a, - (int32x2_t) __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhaddq_s8 (int8x16_t __a, int8x16_t __b) - { - return (int8x16_t) __builtin_aarch64_srhaddv16qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhaddq_s16 (int16x8_t __a, int16x8_t __b) - { - return (int16x8_t) __builtin_aarch64_srhaddv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhaddq_s32 (int32x4_t __a, int32x4_t __b) - { - return (int32x4_t) __builtin_aarch64_srhaddv4si (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return (uint8x16_t) __builtin_aarch64_urhaddv16qi ((int8x16_t) __a, - (int8x16_t) __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint16x8_t) __builtin_aarch64_urhaddv8hi ((int16x8_t) __a, - (int16x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint32x4_t) __builtin_aarch64_urhaddv4si ((int32x4_t) __a, - (int32x4_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddhn_s16 (int16x8_t __a, int16x8_t __b) - { - return (int8x8_t) __builtin_aarch64_addhnv8hi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddhn_s32 (int32x4_t __a, int32x4_t __b) - { - return (int16x4_t) __builtin_aarch64_addhnv4si (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddhn_s64 (int64x2_t __a, int64x2_t __b) - { - return (int32x2_t) __builtin_aarch64_addhnv2di (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddhn_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint8x8_t) __builtin_aarch64_addhnv8hi ((int16x8_t) __a, - (int16x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddhn_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint16x4_t) __builtin_aarch64_addhnv4si ((int32x4_t) __a, - (int32x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddhn_u64 (uint64x2_t __a, uint64x2_t __b) - { - return (uint32x2_t) __builtin_aarch64_addhnv2di ((int64x2_t) __a, - (int64x2_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vraddhn_s16 (int16x8_t __a, int16x8_t __b) - { - return (int8x8_t) __builtin_aarch64_raddhnv8hi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vraddhn_s32 (int32x4_t __a, int32x4_t __b) - { - return (int16x4_t) __builtin_aarch64_raddhnv4si (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vraddhn_s64 (int64x2_t __a, int64x2_t __b) - { - return (int32x2_t) __builtin_aarch64_raddhnv2di (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vraddhn_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint8x8_t) __builtin_aarch64_raddhnv8hi ((int16x8_t) __a, - (int16x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vraddhn_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint16x4_t) __builtin_aarch64_raddhnv4si ((int32x4_t) __a, - (int32x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vraddhn_u64 (uint64x2_t __a, uint64x2_t __b) - { - return (uint32x2_t) __builtin_aarch64_raddhnv2di ((int64x2_t) __a, - (int64x2_t) __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c) - { - return (int8x16_t) __builtin_aarch64_addhn2v8hi (__a, __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c) - { - return (int16x8_t) __builtin_aarch64_addhn2v4si (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c) - { - return (int32x4_t) __builtin_aarch64_addhn2v2di (__a, __b, __c); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) - { - return (uint8x16_t) __builtin_aarch64_addhn2v8hi ((int8x8_t) __a, -@@ -1137,7 +1268,8 @@ vaddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) - (int16x8_t) __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) - { - return (uint16x8_t) __builtin_aarch64_addhn2v4si ((int16x4_t) __a, -@@ -1145,7 +1277,8 @@ vaddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) - (int32x4_t) __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) - { - return (uint32x4_t) __builtin_aarch64_addhn2v2di ((int32x2_t) __a, -@@ -1153,25 +1286,29 @@ vaddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) - (int64x2_t) __c); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vraddhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c) - { - return (int8x16_t) __builtin_aarch64_raddhn2v8hi (__a, __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vraddhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c) - { - return (int16x8_t) __builtin_aarch64_raddhn2v4si (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vraddhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c) - { - return (int32x4_t) __builtin_aarch64_raddhn2v2di (__a, __b, __c); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) - { - return (uint8x16_t) __builtin_aarch64_raddhn2v8hi ((int8x8_t) __a, -@@ -1179,7 +1316,8 @@ vraddhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) - (int16x8_t) __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) - { - return (uint16x8_t) __builtin_aarch64_raddhn2v4si ((int16x4_t) __a, -@@ -1187,7 +1325,8 @@ vraddhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) - (int32x4_t) __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) - { - return (uint32x4_t) __builtin_aarch64_raddhn2v2di ((int32x2_t) __a, -@@ -1195,1101 +1334,1280 @@ vraddhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) - (int64x2_t) __c); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdiv_f32 (float32x2_t __a, float32x2_t __b) - { - return __a / __b; - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdiv_f64 (float64x1_t __a, float64x1_t __b) - { - return __a / __b; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdivq_f32 (float32x4_t __a, float32x4_t __b) - { - return __a / __b; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdivq_f64 (float64x2_t __a, float64x2_t __b) - { - return __a / __b; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_s8 (int8x8_t __a, int8x8_t __b) - { - return __a * __b; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_s16 (int16x4_t __a, int16x4_t __b) - { - return __a * __b; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_s32 (int32x2_t __a, int32x2_t __b) - { - return __a * __b; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_f32 (float32x2_t __a, float32x2_t __b) - { - return __a * __b; - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_f64 (float64x1_t __a, float64x1_t __b) - { - return __a * __b; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_u8 (uint8x8_t __a, uint8x8_t __b) - { - return __a * __b; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_u16 (uint16x4_t __a, uint16x4_t __b) - { - return __a * __b; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_u32 (uint32x2_t __a, uint32x2_t __b) - { - return __a * __b; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_p8 (poly8x8_t __a, poly8x8_t __b) - { - return (poly8x8_t) __builtin_aarch64_pmulv8qi ((int8x8_t) __a, - (int8x8_t) __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_s8 (int8x16_t __a, int8x16_t __b) - { - return __a * __b; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_s16 (int16x8_t __a, int16x8_t __b) - { - return __a * __b; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_s32 (int32x4_t __a, int32x4_t __b) - { - return __a * __b; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_f32 (float32x4_t __a, float32x4_t __b) - { - return __a * __b; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_f64 (float64x2_t __a, float64x2_t __b) - { - return __a * __b; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return __a * __b; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return __a * __b; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return __a * __b; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_p8 (poly8x16_t __a, poly8x16_t __b) - { - return (poly8x16_t) __builtin_aarch64_pmulv16qi ((int8x16_t) __a, - (int8x16_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vand_s8 (int8x8_t __a, int8x8_t __b) - { - return __a & __b; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vand_s16 (int16x4_t __a, int16x4_t __b) - { - return __a & __b; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vand_s32 (int32x2_t __a, int32x2_t __b) - { - return __a & __b; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vand_u8 (uint8x8_t __a, uint8x8_t __b) - { - return __a & __b; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vand_u16 (uint16x4_t __a, uint16x4_t __b) - { - return __a & __b; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vand_u32 (uint32x2_t __a, uint32x2_t __b) - { - return __a & __b; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vand_s64 (int64x1_t __a, int64x1_t __b) - { - return __a & __b; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vand_u64 (uint64x1_t __a, uint64x1_t __b) - { - return __a & __b; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vandq_s8 (int8x16_t __a, int8x16_t __b) - { - return __a & __b; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vandq_s16 (int16x8_t __a, int16x8_t __b) - { - return __a & __b; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vandq_s32 (int32x4_t __a, int32x4_t __b) - { - return __a & __b; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vandq_s64 (int64x2_t __a, int64x2_t __b) - { - return __a & __b; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vandq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return __a & __b; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vandq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return __a & __b; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vandq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return __a & __b; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vandq_u64 (uint64x2_t __a, uint64x2_t __b) - { - return __a & __b; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorr_s8 (int8x8_t __a, int8x8_t __b) - { - return __a | __b; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorr_s16 (int16x4_t __a, int16x4_t __b) - { - return __a | __b; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorr_s32 (int32x2_t __a, int32x2_t __b) - { - return __a | __b; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorr_u8 (uint8x8_t __a, uint8x8_t __b) - { - return __a | __b; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorr_u16 (uint16x4_t __a, uint16x4_t __b) - { - return __a | __b; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorr_u32 (uint32x2_t __a, uint32x2_t __b) - { - return __a | __b; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorr_s64 (int64x1_t __a, int64x1_t __b) - { - return __a | __b; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorr_u64 (uint64x1_t __a, uint64x1_t __b) - { - return __a | __b; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorrq_s8 (int8x16_t __a, int8x16_t __b) - { - return __a | __b; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorrq_s16 (int16x8_t __a, int16x8_t __b) - { - return __a | __b; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorrq_s32 (int32x4_t __a, int32x4_t __b) - { - return __a | __b; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorrq_s64 (int64x2_t __a, int64x2_t __b) - { - return __a | __b; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorrq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return __a | __b; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorrq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return __a | __b; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorrq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return __a | __b; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorrq_u64 (uint64x2_t __a, uint64x2_t __b) - { - return __a | __b; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veor_s8 (int8x8_t __a, int8x8_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veor_s16 (int16x4_t __a, int16x4_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veor_s32 (int32x2_t __a, int32x2_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veor_u8 (uint8x8_t __a, uint8x8_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veor_u16 (uint16x4_t __a, uint16x4_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veor_u32 (uint32x2_t __a, uint32x2_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veor_s64 (int64x1_t __a, int64x1_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veor_u64 (uint64x1_t __a, uint64x1_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veorq_s8 (int8x16_t __a, int8x16_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veorq_s16 (int16x8_t __a, int16x8_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veorq_s32 (int32x4_t __a, int32x4_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veorq_s64 (int64x2_t __a, int64x2_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veorq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veorq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veorq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veorq_u64 (uint64x2_t __a, uint64x2_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbic_s8 (int8x8_t __a, int8x8_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbic_s16 (int16x4_t __a, int16x4_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbic_s32 (int32x2_t __a, int32x2_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbic_u8 (uint8x8_t __a, uint8x8_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbic_u16 (uint16x4_t __a, uint16x4_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbic_u32 (uint32x2_t __a, uint32x2_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbic_s64 (int64x1_t __a, int64x1_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbic_u64 (uint64x1_t __a, uint64x1_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbicq_s8 (int8x16_t __a, int8x16_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbicq_s16 (int16x8_t __a, int16x8_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbicq_s32 (int32x4_t __a, int32x4_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbicq_s64 (int64x2_t __a, int64x2_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbicq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbicq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbicq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbicq_u64 (uint64x2_t __a, uint64x2_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorn_s8 (int8x8_t __a, int8x8_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorn_s16 (int16x4_t __a, int16x4_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorn_s32 (int32x2_t __a, int32x2_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorn_u8 (uint8x8_t __a, uint8x8_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorn_u16 (uint16x4_t __a, uint16x4_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorn_u32 (uint32x2_t __a, uint32x2_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorn_s64 (int64x1_t __a, int64x1_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorn_u64 (uint64x1_t __a, uint64x1_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vornq_s8 (int8x16_t __a, int8x16_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vornq_s16 (int16x8_t __a, int16x8_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vornq_s32 (int32x4_t __a, int32x4_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vornq_s64 (int64x2_t __a, int64x2_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vornq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vornq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vornq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vornq_u64 (uint64x2_t __a, uint64x2_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsub_s8 (int8x8_t __a, int8x8_t __b) - { - return __a - __b; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsub_s16 (int16x4_t __a, int16x4_t __b) - { - return __a - __b; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsub_s32 (int32x2_t __a, int32x2_t __b) - { - return __a - __b; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsub_f32 (float32x2_t __a, float32x2_t __b) - { - return __a - __b; - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsub_f64 (float64x1_t __a, float64x1_t __b) - { - return __a - __b; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsub_u8 (uint8x8_t __a, uint8x8_t __b) - { - return __a - __b; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsub_u16 (uint16x4_t __a, uint16x4_t __b) - { - return __a - __b; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsub_u32 (uint32x2_t __a, uint32x2_t __b) - { - return __a - __b; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsub_s64 (int64x1_t __a, int64x1_t __b) - { - return __a - __b; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsub_u64 (uint64x1_t __a, uint64x1_t __b) - { - return __a - __b; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubq_s8 (int8x16_t __a, int8x16_t __b) - { - return __a - __b; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubq_s16 (int16x8_t __a, int16x8_t __b) - { - return __a - __b; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubq_s32 (int32x4_t __a, int32x4_t __b) - { - return __a - __b; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubq_s64 (int64x2_t __a, int64x2_t __b) - { - return __a - __b; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubq_f32 (float32x4_t __a, float32x4_t __b) - { - return __a - __b; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubq_f64 (float64x2_t __a, float64x2_t __b) - { - return __a - __b; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return __a - __b; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return __a - __b; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return __a - __b; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubq_u64 (uint64x2_t __a, uint64x2_t __b) - { - return __a - __b; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubl_s8 (int8x8_t __a, int8x8_t __b) - { - return (int16x8_t) __builtin_aarch64_ssublv8qi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubl_s16 (int16x4_t __a, int16x4_t __b) - { - return (int32x4_t) __builtin_aarch64_ssublv4hi (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubl_s32 (int32x2_t __a, int32x2_t __b) - { - return (int64x2_t) __builtin_aarch64_ssublv2si (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubl_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint16x8_t) __builtin_aarch64_usublv8qi ((int8x8_t) __a, - (int8x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubl_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint32x4_t) __builtin_aarch64_usublv4hi ((int16x4_t) __a, - (int16x4_t) __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubl_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint64x2_t) __builtin_aarch64_usublv2si ((int32x2_t) __a, - (int32x2_t) __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubl_high_s8 (int8x16_t __a, int8x16_t __b) - { - return (int16x8_t) __builtin_aarch64_ssubl2v16qi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubl_high_s16 (int16x8_t __a, int16x8_t __b) - { - return (int32x4_t) __builtin_aarch64_ssubl2v8hi (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubl_high_s32 (int32x4_t __a, int32x4_t __b) - { - return (int64x2_t) __builtin_aarch64_ssubl2v4si (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubl_high_u8 (uint8x16_t __a, uint8x16_t __b) - { - return (uint16x8_t) __builtin_aarch64_usubl2v16qi ((int8x16_t) __a, - (int8x16_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubl_high_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint32x4_t) __builtin_aarch64_usubl2v8hi ((int16x8_t) __a, - (int16x8_t) __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubl_high_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint64x2_t) __builtin_aarch64_usubl2v4si ((int32x4_t) __a, - (int32x4_t) __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubw_s8 (int16x8_t __a, int8x8_t __b) - { - return (int16x8_t) __builtin_aarch64_ssubwv8qi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubw_s16 (int32x4_t __a, int16x4_t __b) - { - return (int32x4_t) __builtin_aarch64_ssubwv4hi (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubw_s32 (int64x2_t __a, int32x2_t __b) - { - return (int64x2_t) __builtin_aarch64_ssubwv2si (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubw_u8 (uint16x8_t __a, uint8x8_t __b) - { - return (uint16x8_t) __builtin_aarch64_usubwv8qi ((int16x8_t) __a, - (int8x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubw_u16 (uint32x4_t __a, uint16x4_t __b) - { - return (uint32x4_t) __builtin_aarch64_usubwv4hi ((int32x4_t) __a, - (int16x4_t) __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubw_u32 (uint64x2_t __a, uint32x2_t __b) - { - return (uint64x2_t) __builtin_aarch64_usubwv2si ((int64x2_t) __a, - (int32x2_t) __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubw_high_s8 (int16x8_t __a, int8x16_t __b) - { - return (int16x8_t) __builtin_aarch64_ssubw2v16qi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubw_high_s16 (int32x4_t __a, int16x8_t __b) - { - return (int32x4_t) __builtin_aarch64_ssubw2v8hi (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubw_high_s32 (int64x2_t __a, int32x4_t __b) - { - return (int64x2_t) __builtin_aarch64_ssubw2v4si (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubw_high_u8 (uint16x8_t __a, uint8x16_t __b) - { - return (uint16x8_t) __builtin_aarch64_usubw2v16qi ((int16x8_t) __a, - (int8x16_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubw_high_u16 (uint32x4_t __a, uint16x8_t __b) - { - return (uint32x4_t) __builtin_aarch64_usubw2v8hi ((int32x4_t) __a, - (int16x8_t) __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubw_high_u32 (uint64x2_t __a, uint32x4_t __b) - { - return (uint64x2_t) __builtin_aarch64_usubw2v4si ((int64x2_t) __a, - (int32x4_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqadd_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqadd_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqadd_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqadd_s64 (int64x1_t __a, int64x1_t __b) - { - return (int64x1_t) {__builtin_aarch64_sqadddi (__a[0], __b[0])}; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqadd_u8 (uint8x8_t __a, uint8x8_t __b) - { - return __builtin_aarch64_uqaddv8qi_uuu (__a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsub_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x8_t)__builtin_aarch64_shsubv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsub_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t) __builtin_aarch64_shsubv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsub_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t) __builtin_aarch64_shsubv2si (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsub_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x8_t) __builtin_aarch64_uhsubv8qi ((int8x8_t) __a, - (int8x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsub_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint16x4_t) __builtin_aarch64_uhsubv4hi ((int16x4_t) __a, - (int16x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsub_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint32x2_t) __builtin_aarch64_uhsubv2si ((int32x2_t) __a, - (int32x2_t) __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsubq_s8 (int8x16_t __a, int8x16_t __b) - { - return (int8x16_t) __builtin_aarch64_shsubv16qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsubq_s16 (int16x8_t __a, int16x8_t __b) - { - return (int16x8_t) __builtin_aarch64_shsubv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsubq_s32 (int32x4_t __a, int32x4_t __b) - { - return (int32x4_t) __builtin_aarch64_shsubv4si (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsubq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return (uint8x16_t) __builtin_aarch64_uhsubv16qi ((int8x16_t) __a, - (int8x16_t) __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsubq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint16x8_t) __builtin_aarch64_uhsubv8hi ((int16x8_t) __a, - (int16x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsubq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint32x4_t) __builtin_aarch64_uhsubv4si ((int32x4_t) __a, - (int32x4_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubhn_s16 (int16x8_t __a, int16x8_t __b) - { - return (int8x8_t) __builtin_aarch64_subhnv8hi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubhn_s32 (int32x4_t __a, int32x4_t __b) - { - return (int16x4_t) __builtin_aarch64_subhnv4si (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubhn_s64 (int64x2_t __a, int64x2_t __b) - { - return (int32x2_t) __builtin_aarch64_subhnv2di (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubhn_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint8x8_t) __builtin_aarch64_subhnv8hi ((int16x8_t) __a, - (int16x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubhn_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint16x4_t) __builtin_aarch64_subhnv4si ((int32x4_t) __a, - (int32x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubhn_u64 (uint64x2_t __a, uint64x2_t __b) - { - return (uint32x2_t) __builtin_aarch64_subhnv2di ((int64x2_t) __a, - (int64x2_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsubhn_s16 (int16x8_t __a, int16x8_t __b) - { - return (int8x8_t) __builtin_aarch64_rsubhnv8hi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsubhn_s32 (int32x4_t __a, int32x4_t __b) - { - return (int16x4_t) __builtin_aarch64_rsubhnv4si (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsubhn_s64 (int64x2_t __a, int64x2_t __b) - { - return (int32x2_t) __builtin_aarch64_rsubhnv2di (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsubhn_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint8x8_t) __builtin_aarch64_rsubhnv8hi ((int16x8_t) __a, - (int16x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsubhn_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint16x4_t) __builtin_aarch64_rsubhnv4si ((int32x4_t) __a, - (int32x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsubhn_u64 (uint64x2_t __a, uint64x2_t __b) - { - return (uint32x2_t) __builtin_aarch64_rsubhnv2di ((int64x2_t) __a, - (int64x2_t) __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsubhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c) - { - return (int8x16_t) __builtin_aarch64_rsubhn2v8hi (__a, __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsubhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c) - { - return (int16x8_t) __builtin_aarch64_rsubhn2v4si (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsubhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c) - { - return (int32x4_t) __builtin_aarch64_rsubhn2v2di (__a, __b, __c); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsubhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) - { - return (uint8x16_t) __builtin_aarch64_rsubhn2v8hi ((int8x8_t) __a, -@@ -2297,7 +2615,8 @@ vrsubhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) - (int16x8_t) __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsubhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) - { - return (uint16x8_t) __builtin_aarch64_rsubhn2v4si ((int16x4_t) __a, -@@ -2305,7 +2624,8 @@ vrsubhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) - (int32x4_t) __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsubhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) - { - return (uint32x4_t) __builtin_aarch64_rsubhn2v2di ((int32x2_t) __a, -@@ -2313,25 +2633,29 @@ vrsubhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) - (int64x2_t) __c); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubhn_high_s16 (int8x8_t __a, int16x8_t __b, int16x8_t __c) - { - return (int8x16_t) __builtin_aarch64_subhn2v8hi (__a, __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubhn_high_s32 (int16x4_t __a, int32x4_t __b, int32x4_t __c) - { - return (int16x8_t) __builtin_aarch64_subhn2v4si (__a, __b, __c);; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubhn_high_s64 (int32x2_t __a, int64x2_t __b, int64x2_t __c) - { - return (int32x4_t) __builtin_aarch64_subhn2v2di (__a, __b, __c); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) - { - return (uint8x16_t) __builtin_aarch64_subhn2v8hi ((int8x8_t) __a, -@@ -2339,7 +2663,8 @@ vsubhn_high_u16 (uint8x8_t __a, uint16x8_t __b, uint16x8_t __c) - (int16x8_t) __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) - { - return (uint16x8_t) __builtin_aarch64_subhn2v4si ((int16x4_t) __a, -@@ -2347,7 +2672,8 @@ vsubhn_high_u32 (uint16x4_t __a, uint32x4_t __b, uint32x4_t __c) - (int32x4_t) __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) - { - return (uint32x4_t) __builtin_aarch64_subhn2v2di ((int32x2_t) __a, -@@ -2355,453 +2681,542 @@ vsubhn_high_u64 (uint32x2_t __a, uint64x2_t __b, uint64x2_t __c) - (int64x2_t) __c); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqadd_u16 (uint16x4_t __a, uint16x4_t __b) - { - return __builtin_aarch64_uqaddv4hi_uuu (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqadd_u32 (uint32x2_t __a, uint32x2_t __b) - { - return __builtin_aarch64_uqaddv2si_uuu (__a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqadd_u64 (uint64x1_t __a, uint64x1_t __b) - { - return (uint64x1_t) {__builtin_aarch64_uqadddi_uuu (__a[0], __b[0])}; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqaddq_s8 (int8x16_t __a, int8x16_t __b) - { - return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqaddq_s16 (int16x8_t __a, int16x8_t __b) - { - return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqaddq_s32 (int32x4_t __a, int32x4_t __b) - { - return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqaddq_s64 (int64x2_t __a, int64x2_t __b) - { - return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqaddq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return __builtin_aarch64_uqaddv16qi_uuu (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqaddq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return __builtin_aarch64_uqaddv8hi_uuu (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqaddq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return __builtin_aarch64_uqaddv4si_uuu (__a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqaddq_u64 (uint64x2_t __a, uint64x2_t __b) - { - return __builtin_aarch64_uqaddv2di_uuu (__a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsub_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsub_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsub_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsub_s64 (int64x1_t __a, int64x1_t __b) - { - return (int64x1_t) {__builtin_aarch64_sqsubdi (__a[0], __b[0])}; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsub_u8 (uint8x8_t __a, uint8x8_t __b) - { - return __builtin_aarch64_uqsubv8qi_uuu (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsub_u16 (uint16x4_t __a, uint16x4_t __b) - { - return __builtin_aarch64_uqsubv4hi_uuu (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsub_u32 (uint32x2_t __a, uint32x2_t __b) - { - return __builtin_aarch64_uqsubv2si_uuu (__a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsub_u64 (uint64x1_t __a, uint64x1_t __b) - { - return (uint64x1_t) {__builtin_aarch64_uqsubdi_uuu (__a[0], __b[0])}; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsubq_s8 (int8x16_t __a, int8x16_t __b) - { - return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsubq_s16 (int16x8_t __a, int16x8_t __b) - { - return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsubq_s32 (int32x4_t __a, int32x4_t __b) - { - return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsubq_s64 (int64x2_t __a, int64x2_t __b) - { - return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsubq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return __builtin_aarch64_uqsubv16qi_uuu (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsubq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return __builtin_aarch64_uqsubv8hi_uuu (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsubq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return __builtin_aarch64_uqsubv4si_uuu (__a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsubq_u64 (uint64x2_t __a, uint64x2_t __b) - { - return __builtin_aarch64_uqsubv2di_uuu (__a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqneg_s8 (int8x8_t __a) - { - return (int8x8_t) __builtin_aarch64_sqnegv8qi (__a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqneg_s16 (int16x4_t __a) - { - return (int16x4_t) __builtin_aarch64_sqnegv4hi (__a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqneg_s32 (int32x2_t __a) - { - return (int32x2_t) __builtin_aarch64_sqnegv2si (__a); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqneg_s64 (int64x1_t __a) - { - return (int64x1_t) {__builtin_aarch64_sqnegdi (__a[0])}; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqnegq_s8 (int8x16_t __a) - { - return (int8x16_t) __builtin_aarch64_sqnegv16qi (__a); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqnegq_s16 (int16x8_t __a) - { - return (int16x8_t) __builtin_aarch64_sqnegv8hi (__a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqnegq_s32 (int32x4_t __a) - { - return (int32x4_t) __builtin_aarch64_sqnegv4si (__a); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqabs_s8 (int8x8_t __a) - { - return (int8x8_t) __builtin_aarch64_sqabsv8qi (__a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqabs_s16 (int16x4_t __a) - { - return (int16x4_t) __builtin_aarch64_sqabsv4hi (__a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqabs_s32 (int32x2_t __a) - { - return (int32x2_t) __builtin_aarch64_sqabsv2si (__a); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqabs_s64 (int64x1_t __a) - { - return (int64x1_t) {__builtin_aarch64_sqabsdi (__a[0])}; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqabsq_s8 (int8x16_t __a) - { - return (int8x16_t) __builtin_aarch64_sqabsv16qi (__a); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqabsq_s16 (int16x8_t __a) - { - return (int16x8_t) __builtin_aarch64_sqabsv8hi (__a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqabsq_s32 (int32x4_t __a) - { - return (int32x4_t) __builtin_aarch64_sqabsv4si (__a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmulh_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t) __builtin_aarch64_sqdmulhv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmulh_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t) __builtin_aarch64_sqdmulhv2si (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmulhq_s16 (int16x8_t __a, int16x8_t __b) - { - return (int16x8_t) __builtin_aarch64_sqdmulhv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmulhq_s32 (int32x4_t __a, int32x4_t __b) - { - return (int32x4_t) __builtin_aarch64_sqdmulhv4si (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmulh_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t) __builtin_aarch64_sqrdmulhv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmulh_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t) __builtin_aarch64_sqrdmulhv2si (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b) - { - return (int16x8_t) __builtin_aarch64_sqrdmulhv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b) - { - return (int32x4_t) __builtin_aarch64_sqrdmulhv4si (__a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_s8 (uint64_t __a) - { - return (int8x8_t) __a; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_s16 (uint64_t __a) - { - return (int16x4_t) __a; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_s32 (uint64_t __a) - { - return (int32x2_t) __a; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_s64 (uint64_t __a) - { - return (int64x1_t) {__a}; - } - --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_f16 (uint64_t __a) - { - return (float16x4_t) __a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_f32 (uint64_t __a) - { - return (float32x2_t) __a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_u8 (uint64_t __a) - { - return (uint8x8_t) __a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_u16 (uint64_t __a) - { - return (uint16x4_t) __a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_u32 (uint64_t __a) - { - return (uint32x2_t) __a; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_u64 (uint64_t __a) - { - return (uint64x1_t) {__a}; - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_f64 (uint64_t __a) - { - return (float64x1_t) __a; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_p8 (uint64_t __a) - { - return (poly8x8_t) __a; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_p16 (uint64_t __a) - { - return (poly16x4_t) __a; - } - -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcreate_p64 (uint64_t __a) -+{ -+ return (poly64x1_t) __a; -+} -+ - /* vget_lane */ - --__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_f16 (float16x4_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_f32 (float32x2_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_f64 (float64x1_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_p8 (poly8x8_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_p16 (poly16x4_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vget_lane_p64 (poly64x1_t __a, const int __b) -+{ -+ return __aarch64_vget_lane_any (__a, __b); -+} -+ -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_s8 (int8x8_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_s16 (int16x4_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_s32 (int32x2_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_s64 (int64x1_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_u8 (uint8x8_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_u16 (uint16x4_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_u32 (uint32x2_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_u64 (uint64x1_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); -@@ -2809,79 +3224,99 @@ vget_lane_u64 (uint64x1_t __a, const int __b) - - /* vgetq_lane */ - --__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_f16 (float16x8_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_f32 (float32x4_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_f64 (float64x2_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_p8 (poly8x16_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_p16 (poly16x8_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vgetq_lane_p64 (poly64x2_t __a, const int __b) -+{ -+ return __aarch64_vget_lane_any (__a, __b); -+} -+ -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_s8 (int8x16_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_s16 (int16x8_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_s32 (int32x4_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_s64 (int64x2_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_u8 (uint8x16_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_u16 (uint16x8_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_u32 (uint32x4_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_u64 (uint64x2_t __a, const int __b) - { - return __aarch64_vget_lane_any (__a, __b); -@@ -2889,1953 +3324,2832 @@ vgetq_lane_u64 (uint64x2_t __a, const int __b) - - /* vreinterpret */ - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_f16 (float16x4_t __a) - { - return (poly8x8_t) __a; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_f64 (float64x1_t __a) - { - return (poly8x8_t) __a; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_s8 (int8x8_t __a) - { - return (poly8x8_t) __a; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_s16 (int16x4_t __a) - { - return (poly8x8_t) __a; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_s32 (int32x2_t __a) - { - return (poly8x8_t) __a; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_s64 (int64x1_t __a) - { - return (poly8x8_t) __a; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_f32 (float32x2_t __a) - { - return (poly8x8_t) __a; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_u8 (uint8x8_t __a) - { - return (poly8x8_t) __a; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_u16 (uint16x4_t __a) - { - return (poly8x8_t) __a; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_u32 (uint32x2_t __a) - { - return (poly8x8_t) __a; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_u64 (uint64x1_t __a) - { - return (poly8x8_t) __a; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_p16 (poly16x4_t __a) - { - return (poly8x8_t) __a; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_p8_p64 (poly64x1_t __a) -+{ -+ return (poly8x8_t) __a; -+} -+ -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_f64 (float64x2_t __a) - { - return (poly8x16_t) __a; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_s8 (int8x16_t __a) - { - return (poly8x16_t) __a; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_s16 (int16x8_t __a) - { - return (poly8x16_t) __a; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_s32 (int32x4_t __a) - { - return (poly8x16_t) __a; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_s64 (int64x2_t __a) - { - return (poly8x16_t) __a; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_f16 (float16x8_t __a) - { - return (poly8x16_t) __a; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_f32 (float32x4_t __a) - { - return (poly8x16_t) __a; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_u8 (uint8x16_t __a) - { - return (poly8x16_t) __a; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_u16 (uint16x8_t __a) - { - return (poly8x16_t) __a; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_u32 (uint32x4_t __a) - { - return (poly8x16_t) __a; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_u64 (uint64x2_t __a) - { - return (poly8x16_t) __a; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_p16 (poly16x8_t __a) - { - return (poly8x16_t) __a; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p8_p64 (poly64x2_t __a) -+{ -+ return (poly8x16_t) __a; -+} -+ -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p8_p128 (poly128_t __a) -+{ -+ return (poly8x16_t)__a; -+} -+ -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_f16 (float16x4_t __a) - { - return (poly16x4_t) __a; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_f64 (float64x1_t __a) - { - return (poly16x4_t) __a; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_s8 (int8x8_t __a) - { - return (poly16x4_t) __a; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_s16 (int16x4_t __a) - { - return (poly16x4_t) __a; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_s32 (int32x2_t __a) - { - return (poly16x4_t) __a; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_s64 (int64x1_t __a) - { - return (poly16x4_t) __a; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_f32 (float32x2_t __a) - { - return (poly16x4_t) __a; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_u8 (uint8x8_t __a) - { - return (poly16x4_t) __a; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_u16 (uint16x4_t __a) - { - return (poly16x4_t) __a; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_u32 (uint32x2_t __a) - { - return (poly16x4_t) __a; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_u64 (uint64x1_t __a) - { - return (poly16x4_t) __a; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_p8 (poly8x8_t __a) - { - return (poly16x4_t) __a; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_p16_p64 (poly64x1_t __a) -+{ -+ return (poly16x4_t) __a; -+} -+ -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_f64 (float64x2_t __a) - { - return (poly16x8_t) __a; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_s8 (int8x16_t __a) - { - return (poly16x8_t) __a; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_s16 (int16x8_t __a) - { - return (poly16x8_t) __a; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_s32 (int32x4_t __a) - { - return (poly16x8_t) __a; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_s64 (int64x2_t __a) - { - return (poly16x8_t) __a; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_f16 (float16x8_t __a) - { - return (poly16x8_t) __a; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_f32 (float32x4_t __a) - { - return (poly16x8_t) __a; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_u8 (uint8x16_t __a) - { - return (poly16x8_t) __a; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_u16 (uint16x8_t __a) - { - return (poly16x8_t) __a; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_u32 (uint32x4_t __a) - { - return (poly16x8_t) __a; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_u64 (uint64x2_t __a) - { - return (poly16x8_t) __a; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_p8 (poly8x16_t __a) - { - return (poly16x8_t) __a; - } - --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p16_p64 (poly64x2_t __a) -+{ -+ return (poly16x8_t) __a; -+} -+ -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p16_p128 (poly128_t __a) -+{ -+ return (poly16x8_t)__a; -+} -+ -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_p64_f16 (float16x4_t __a) -+{ -+ return (poly64x1_t) __a; -+} -+ -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_p64_f64 (float64x1_t __a) -+{ -+ return (poly64x1_t) __a; -+} -+ -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_p64_s8 (int8x8_t __a) -+{ -+ return (poly64x1_t) __a; -+} -+ -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_p64_s16 (int16x4_t __a) -+{ -+ return (poly64x1_t) __a; -+} -+ -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_p64_s32 (int32x2_t __a) -+{ -+ return (poly64x1_t) __a; -+} -+ -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_p64_s64 (int64x1_t __a) -+{ -+ return (poly64x1_t) __a; -+} -+ -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_p64_f32 (float32x2_t __a) -+{ -+ return (poly64x1_t) __a; -+} -+ -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_p64_u8 (uint8x8_t __a) -+{ -+ return (poly64x1_t) __a; -+} -+ -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_p64_u16 (uint16x4_t __a) -+{ -+ return (poly64x1_t) __a; -+} -+ -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_p64_u32 (uint32x2_t __a) -+{ -+ return (poly64x1_t) __a; -+} -+ -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_p64_u64 (uint64x1_t __a) -+{ -+ return (poly64x1_t) __a; -+} -+ -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_p64_p8 (poly8x8_t __a) -+{ -+ return (poly64x1_t) __a; -+} -+ -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_p64_p16 (poly16x4_t __a) -+{ -+ return (poly64x1_t)__a; -+} -+ -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p64_f64 (float64x2_t __a) -+{ -+ return (poly64x2_t) __a; -+} -+ -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p64_s8 (int8x16_t __a) -+{ -+ return (poly64x2_t) __a; -+} -+ -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p64_s16 (int16x8_t __a) -+{ -+ return (poly64x2_t) __a; -+} -+ -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p64_s32 (int32x4_t __a) -+{ -+ return (poly64x2_t) __a; -+} -+ -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p64_s64 (int64x2_t __a) -+{ -+ return (poly64x2_t) __a; -+} -+ -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p64_f16 (float16x8_t __a) -+{ -+ return (poly64x2_t) __a; -+} -+ -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p64_f32 (float32x4_t __a) -+{ -+ return (poly64x2_t) __a; -+} -+ -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p64_p128 (poly128_t __a) -+{ -+ return (poly64x2_t)__a; -+} -+ -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p64_u8 (uint8x16_t __a) -+{ -+ return (poly64x2_t) __a; -+} -+ -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p64_u16 (uint16x8_t __a) -+{ -+ return (poly64x2_t) __a; -+} -+ -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p64_p16 (poly16x8_t __a) -+{ -+ return (poly64x2_t)__a; -+} -+ -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p64_u32 (uint32x4_t __a) -+{ -+ return (poly64x2_t) __a; -+} -+ -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p64_u64 (uint64x2_t __a) -+{ -+ return (poly64x2_t) __a; -+} -+ -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p64_p8 (poly8x16_t __a) -+{ -+ return (poly64x2_t) __a; -+} -+ -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p128_p8 (poly8x16_t __a) -+{ -+ return (poly128_t)__a; -+} -+ -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p128_p16 (poly16x8_t __a) -+{ -+ return (poly128_t)__a; -+} -+ -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p128_f16 (float16x8_t __a) -+{ -+ return (poly128_t) __a; -+} -+ -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p128_f32 (float32x4_t __a) -+{ -+ return (poly128_t)__a; -+} -+ -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p128_p64 (poly64x2_t __a) -+{ -+ return (poly128_t)__a; -+} -+ -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p128_s64 (int64x2_t __a) -+{ -+ return (poly128_t)__a; -+} -+ -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p128_u64 (uint64x2_t __a) -+{ -+ return (poly128_t)__a; -+} -+ -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p128_s8 (int8x16_t __a) -+{ -+ return (poly128_t)__a; -+} -+ -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p128_s16 (int16x8_t __a) -+{ -+ return (poly128_t)__a; -+} -+ -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p128_s32 (int32x4_t __a) -+{ -+ return (poly128_t)__a; -+} -+ -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p128_u8 (uint8x16_t __a) -+{ -+ return (poly128_t)__a; -+} -+ -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p128_u16 (uint16x8_t __a) -+{ -+ return (poly128_t)__a; -+} -+ -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_p128_u32 (uint32x4_t __a) -+{ -+ return (poly128_t)__a; -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_f64 (float64x1_t __a) - { - return (float16x4_t) __a; - } - --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_s8 (int8x8_t __a) - { - return (float16x4_t) __a; - } - --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_s16 (int16x4_t __a) - { - return (float16x4_t) __a; - } - --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_s32 (int32x2_t __a) - { - return (float16x4_t) __a; - } - --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_s64 (int64x1_t __a) - { - return (float16x4_t) __a; - } - --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_f32 (float32x2_t __a) - { - return (float16x4_t) __a; - } - --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_u8 (uint8x8_t __a) - { - return (float16x4_t) __a; - } - --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_u16 (uint16x4_t __a) - { - return (float16x4_t) __a; - } - --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_u32 (uint32x2_t __a) - { - return (float16x4_t) __a; - } - --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_u64 (uint64x1_t __a) - { - return (float16x4_t) __a; - } - --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_p8 (poly8x8_t __a) - { - return (float16x4_t) __a; - } - --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_p16 (poly16x4_t __a) - { - return (float16x4_t) __a; - } - --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_f16_p64 (poly64x1_t __a) -+{ -+ return (float16x4_t) __a; -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_f64 (float64x2_t __a) - { - return (float16x8_t) __a; - } - --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_s8 (int8x16_t __a) - { - return (float16x8_t) __a; - } - --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_s16 (int16x8_t __a) - { - return (float16x8_t) __a; - } - --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_s32 (int32x4_t __a) - { - return (float16x8_t) __a; - } - --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_s64 (int64x2_t __a) - { - return (float16x8_t) __a; - } - --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_f32 (float32x4_t __a) - { - return (float16x8_t) __a; - } - --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_u8 (uint8x16_t __a) - { - return (float16x8_t) __a; - } - --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_u16 (uint16x8_t __a) - { - return (float16x8_t) __a; - } - --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_u32 (uint32x4_t __a) - { - return (float16x8_t) __a; - } - --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_u64 (uint64x2_t __a) - { - return (float16x8_t) __a; - } - --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_p8 (poly8x16_t __a) - { - return (float16x8_t) __a; - } - --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_f16_p128 (poly128_t __a) -+{ -+ return (float16x8_t) __a; -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_p16 (poly16x8_t __a) - { - return (float16x8_t) __a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_f16_p64 (poly64x2_t __a) -+{ -+ return (float16x8_t) __a; -+} -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_f16 (float16x4_t __a) - { - return (float32x2_t) __a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_f64 (float64x1_t __a) - { - return (float32x2_t) __a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_s8 (int8x8_t __a) - { - return (float32x2_t) __a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_s16 (int16x4_t __a) - { - return (float32x2_t) __a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_s32 (int32x2_t __a) - { - return (float32x2_t) __a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_s64 (int64x1_t __a) - { - return (float32x2_t) __a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_u8 (uint8x8_t __a) - { - return (float32x2_t) __a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_u16 (uint16x4_t __a) - { - return (float32x2_t) __a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_u32 (uint32x2_t __a) - { - return (float32x2_t) __a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_u64 (uint64x1_t __a) - { - return (float32x2_t) __a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_p8 (poly8x8_t __a) - { - return (float32x2_t) __a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_p16 (poly16x4_t __a) - { - return (float32x2_t) __a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_f32_p64 (poly64x1_t __a) -+{ -+ return (float32x2_t) __a; -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_f16 (float16x8_t __a) - { - return (float32x4_t) __a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_f64 (float64x2_t __a) - { - return (float32x4_t) __a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_s8 (int8x16_t __a) - { - return (float32x4_t) __a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_s16 (int16x8_t __a) - { - return (float32x4_t) __a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_s32 (int32x4_t __a) - { - return (float32x4_t) __a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_s64 (int64x2_t __a) - { - return (float32x4_t) __a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_u8 (uint8x16_t __a) - { - return (float32x4_t) __a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_u16 (uint16x8_t __a) - { - return (float32x4_t) __a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_u32 (uint32x4_t __a) - { - return (float32x4_t) __a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_u64 (uint64x2_t __a) - { - return (float32x4_t) __a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_p8 (poly8x16_t __a) - { - return (float32x4_t) __a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_p16 (poly16x8_t __a) - { - return (float32x4_t) __a; - } - --__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_f32_p64 (poly64x2_t __a) -+{ -+ return (float32x4_t) __a; -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_f32_p128 (poly128_t __a) -+{ -+ return (float32x4_t)__a; -+} -+ -+ -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f64_f16 (float16x4_t __a) - { - return (float64x1_t) __a; - } - --__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f64_f32 (float32x2_t __a) - { - return (float64x1_t) __a; - } - --__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f64_p8 (poly8x8_t __a) - { - return (float64x1_t) __a; - } - --__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f64_p16 (poly16x4_t __a) - { - return (float64x1_t) __a; - } - --__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_f64_p64 (poly64x1_t __a) -+{ -+ return (float64x1_t) __a; -+} -+ -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f64_s8 (int8x8_t __a) - { - return (float64x1_t) __a; - } - --__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f64_s16 (int16x4_t __a) - { - return (float64x1_t) __a; - } - --__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f64_s32 (int32x2_t __a) - { - return (float64x1_t) __a; - } - --__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f64_s64 (int64x1_t __a) - { - return (float64x1_t) __a; - } - --__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f64_u8 (uint8x8_t __a) - { - return (float64x1_t) __a; - } - --__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f64_u16 (uint16x4_t __a) - { - return (float64x1_t) __a; - } - --__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f64_u32 (uint32x2_t __a) - { - return (float64x1_t) __a; - } - --__extension__ static __inline float64x1_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f64_u64 (uint64x1_t __a) - { - return (float64x1_t) __a; - } - --__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f64_f16 (float16x8_t __a) - { - return (float64x2_t) __a; - } - --__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f64_f32 (float32x4_t __a) - { - return (float64x2_t) __a; - } - --__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f64_p8 (poly8x16_t __a) - { - return (float64x2_t) __a; - } - --__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f64_p16 (poly16x8_t __a) - { - return (float64x2_t) __a; - } - --__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_f64_p64 (poly64x2_t __a) -+{ -+ return (float64x2_t) __a; -+} -+ -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f64_s8 (int8x16_t __a) - { - return (float64x2_t) __a; - } - --__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f64_s16 (int16x8_t __a) - { - return (float64x2_t) __a; - } - --__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f64_s32 (int32x4_t __a) - { - return (float64x2_t) __a; - } - --__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f64_s64 (int64x2_t __a) - { - return (float64x2_t) __a; - } - --__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f64_u8 (uint8x16_t __a) - { - return (float64x2_t) __a; - } - --__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f64_u16 (uint16x8_t __a) - { - return (float64x2_t) __a; - } - --__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f64_u32 (uint32x4_t __a) - { - return (float64x2_t) __a; - } - --__extension__ static __inline float64x2_t __attribute__((__always_inline__)) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f64_u64 (uint64x2_t __a) - { - return (float64x2_t) __a; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_f16 (float16x4_t __a) - { - return (int64x1_t) __a; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_f64 (float64x1_t __a) - { - return (int64x1_t) __a; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_s8 (int8x8_t __a) - { - return (int64x1_t) __a; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_s16 (int16x4_t __a) - { - return (int64x1_t) __a; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_s32 (int32x2_t __a) - { - return (int64x1_t) __a; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_f32 (float32x2_t __a) - { - return (int64x1_t) __a; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_u8 (uint8x8_t __a) - { - return (int64x1_t) __a; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_u16 (uint16x4_t __a) - { - return (int64x1_t) __a; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_u32 (uint32x2_t __a) - { - return (int64x1_t) __a; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_u64 (uint64x1_t __a) - { - return (int64x1_t) __a; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_p8 (poly8x8_t __a) - { - return (int64x1_t) __a; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_p16 (poly16x4_t __a) - { - return (int64x1_t) __a; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_s64_p64 (poly64x1_t __a) -+{ -+ return (int64x1_t) __a; -+} -+ -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_f64 (float64x2_t __a) - { - return (int64x2_t) __a; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_s8 (int8x16_t __a) - { - return (int64x2_t) __a; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_s16 (int16x8_t __a) - { - return (int64x2_t) __a; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_s32 (int32x4_t __a) - { - return (int64x2_t) __a; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_f16 (float16x8_t __a) - { - return (int64x2_t) __a; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_f32 (float32x4_t __a) - { - return (int64x2_t) __a; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_u8 (uint8x16_t __a) - { - return (int64x2_t) __a; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_u16 (uint16x8_t __a) - { - return (int64x2_t) __a; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_u32 (uint32x4_t __a) - { - return (int64x2_t) __a; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_u64 (uint64x2_t __a) - { - return (int64x2_t) __a; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_p8 (poly8x16_t __a) - { - return (int64x2_t) __a; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_p16 (poly16x8_t __a) - { - return (int64x2_t) __a; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_s64_p64 (poly64x2_t __a) -+{ -+ return (int64x2_t) __a; -+} -+ -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_s64_p128 (poly128_t __a) -+{ -+ return (int64x2_t)__a; -+} -+ -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_f16 (float16x4_t __a) - { - return (uint64x1_t) __a; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_f64 (float64x1_t __a) - { - return (uint64x1_t) __a; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_s8 (int8x8_t __a) - { - return (uint64x1_t) __a; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_s16 (int16x4_t __a) - { - return (uint64x1_t) __a; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_s32 (int32x2_t __a) - { - return (uint64x1_t) __a; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_s64 (int64x1_t __a) - { - return (uint64x1_t) __a; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_f32 (float32x2_t __a) - { - return (uint64x1_t) __a; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_u8 (uint8x8_t __a) - { - return (uint64x1_t) __a; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_u16 (uint16x4_t __a) - { - return (uint64x1_t) __a; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_u32 (uint32x2_t __a) - { - return (uint64x1_t) __a; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_p8 (poly8x8_t __a) - { - return (uint64x1_t) __a; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_p16 (poly16x4_t __a) - { - return (uint64x1_t) __a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_u64_p64 (poly64x1_t __a) -+{ -+ return (uint64x1_t) __a; -+} -+ -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_f64 (float64x2_t __a) - { - return (uint64x2_t) __a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_s8 (int8x16_t __a) - { - return (uint64x2_t) __a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_s16 (int16x8_t __a) - { - return (uint64x2_t) __a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_s32 (int32x4_t __a) - { - return (uint64x2_t) __a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_s64 (int64x2_t __a) - { - return (uint64x2_t) __a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_f16 (float16x8_t __a) - { - return (uint64x2_t) __a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_f32 (float32x4_t __a) - { - return (uint64x2_t) __a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_u8 (uint8x16_t __a) - { - return (uint64x2_t) __a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_u16 (uint16x8_t __a) - { - return (uint64x2_t) __a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_u32 (uint32x4_t __a) - { - return (uint64x2_t) __a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_p8 (poly8x16_t __a) - { - return (uint64x2_t) __a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_p16 (poly16x8_t __a) - { - return (uint64x2_t) __a; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_u64_p64 (poly64x2_t __a) -+{ -+ return (uint64x2_t) __a; -+} -+ -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_u64_p128 (poly128_t __a) -+{ -+ return (uint64x2_t)__a; -+} -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_f16 (float16x4_t __a) - { - return (int8x8_t) __a; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_f64 (float64x1_t __a) - { - return (int8x8_t) __a; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_s16 (int16x4_t __a) - { - return (int8x8_t) __a; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_s32 (int32x2_t __a) - { - return (int8x8_t) __a; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_s64 (int64x1_t __a) - { - return (int8x8_t) __a; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_f32 (float32x2_t __a) - { - return (int8x8_t) __a; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_u8 (uint8x8_t __a) - { - return (int8x8_t) __a; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_u16 (uint16x4_t __a) - { - return (int8x8_t) __a; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_u32 (uint32x2_t __a) - { - return (int8x8_t) __a; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_u64 (uint64x1_t __a) - { - return (int8x8_t) __a; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_p8 (poly8x8_t __a) - { - return (int8x8_t) __a; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_p16 (poly16x4_t __a) - { - return (int8x8_t) __a; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_s8_p64 (poly64x1_t __a) -+{ -+ return (int8x8_t) __a; -+} -+ -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_f64 (float64x2_t __a) - { - return (int8x16_t) __a; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_s16 (int16x8_t __a) - { - return (int8x16_t) __a; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_s32 (int32x4_t __a) - { - return (int8x16_t) __a; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_s64 (int64x2_t __a) - { - return (int8x16_t) __a; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_f16 (float16x8_t __a) - { - return (int8x16_t) __a; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_f32 (float32x4_t __a) - { - return (int8x16_t) __a; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_u8 (uint8x16_t __a) - { - return (int8x16_t) __a; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_u16 (uint16x8_t __a) - { - return (int8x16_t) __a; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_u32 (uint32x4_t __a) - { - return (int8x16_t) __a; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_u64 (uint64x2_t __a) - { - return (int8x16_t) __a; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_p8 (poly8x16_t __a) - { - return (int8x16_t) __a; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_p16 (poly16x8_t __a) - { - return (int8x16_t) __a; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_s8_p64 (poly64x2_t __a) -+{ -+ return (int8x16_t) __a; -+} -+ -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_s8_p128 (poly128_t __a) -+{ -+ return (int8x16_t)__a; -+} -+ -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_f16 (float16x4_t __a) - { - return (int16x4_t) __a; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_f64 (float64x1_t __a) - { - return (int16x4_t) __a; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_s8 (int8x8_t __a) - { - return (int16x4_t) __a; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_s32 (int32x2_t __a) - { - return (int16x4_t) __a; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_s64 (int64x1_t __a) - { - return (int16x4_t) __a; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_f32 (float32x2_t __a) - { - return (int16x4_t) __a; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_u8 (uint8x8_t __a) - { - return (int16x4_t) __a; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_u16 (uint16x4_t __a) - { - return (int16x4_t) __a; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_u32 (uint32x2_t __a) - { - return (int16x4_t) __a; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_u64 (uint64x1_t __a) - { - return (int16x4_t) __a; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_p8 (poly8x8_t __a) - { - return (int16x4_t) __a; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_p16 (poly16x4_t __a) - { - return (int16x4_t) __a; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_s16_p64 (poly64x1_t __a) -+{ -+ return (int16x4_t) __a; -+} -+ -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_f64 (float64x2_t __a) - { - return (int16x8_t) __a; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_s8 (int8x16_t __a) - { - return (int16x8_t) __a; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_s32 (int32x4_t __a) - { - return (int16x8_t) __a; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_s64 (int64x2_t __a) - { - return (int16x8_t) __a; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_f16 (float16x8_t __a) - { - return (int16x8_t) __a; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_f32 (float32x4_t __a) - { - return (int16x8_t) __a; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_u8 (uint8x16_t __a) - { - return (int16x8_t) __a; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_u16 (uint16x8_t __a) - { - return (int16x8_t) __a; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_u32 (uint32x4_t __a) - { - return (int16x8_t) __a; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_u64 (uint64x2_t __a) - { - return (int16x8_t) __a; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_p8 (poly8x16_t __a) - { - return (int16x8_t) __a; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_p16 (poly16x8_t __a) - { - return (int16x8_t) __a; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_s16_p64 (poly64x2_t __a) -+{ -+ return (int16x8_t) __a; -+} -+ -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_s16_p128 (poly128_t __a) -+{ -+ return (int16x8_t)__a; -+} -+ -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_f16 (float16x4_t __a) - { - return (int32x2_t) __a; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_f64 (float64x1_t __a) - { - return (int32x2_t) __a; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_s8 (int8x8_t __a) - { - return (int32x2_t) __a; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_s16 (int16x4_t __a) - { - return (int32x2_t) __a; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_s64 (int64x1_t __a) - { - return (int32x2_t) __a; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_f32 (float32x2_t __a) - { - return (int32x2_t) __a; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_u8 (uint8x8_t __a) - { - return (int32x2_t) __a; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_u16 (uint16x4_t __a) - { - return (int32x2_t) __a; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_u32 (uint32x2_t __a) - { - return (int32x2_t) __a; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_u64 (uint64x1_t __a) - { - return (int32x2_t) __a; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_p8 (poly8x8_t __a) - { - return (int32x2_t) __a; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_p16 (poly16x4_t __a) - { - return (int32x2_t) __a; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_s32_p64 (poly64x1_t __a) -+{ -+ return (int32x2_t) __a; -+} -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_f64 (float64x2_t __a) - { - return (int32x4_t) __a; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_s8 (int8x16_t __a) - { - return (int32x4_t) __a; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_s16 (int16x8_t __a) - { - return (int32x4_t) __a; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_s64 (int64x2_t __a) - { - return (int32x4_t) __a; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_f16 (float16x8_t __a) - { - return (int32x4_t) __a; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_f32 (float32x4_t __a) - { - return (int32x4_t) __a; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_u8 (uint8x16_t __a) - { - return (int32x4_t) __a; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_u16 (uint16x8_t __a) - { - return (int32x4_t) __a; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_u32 (uint32x4_t __a) - { - return (int32x4_t) __a; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_u64 (uint64x2_t __a) - { - return (int32x4_t) __a; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_p8 (poly8x16_t __a) - { - return (int32x4_t) __a; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_p16 (poly16x8_t __a) - { - return (int32x4_t) __a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_s32_p64 (poly64x2_t __a) -+{ -+ return (int32x4_t) __a; -+} -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_s32_p128 (poly128_t __a) -+{ -+ return (int32x4_t)__a; -+} -+ -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_f16 (float16x4_t __a) - { - return (uint8x8_t) __a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_f64 (float64x1_t __a) - { - return (uint8x8_t) __a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_s8 (int8x8_t __a) - { - return (uint8x8_t) __a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_s16 (int16x4_t __a) - { - return (uint8x8_t) __a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_s32 (int32x2_t __a) - { - return (uint8x8_t) __a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_s64 (int64x1_t __a) - { - return (uint8x8_t) __a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_f32 (float32x2_t __a) - { - return (uint8x8_t) __a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_u16 (uint16x4_t __a) - { - return (uint8x8_t) __a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_u32 (uint32x2_t __a) - { - return (uint8x8_t) __a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_u64 (uint64x1_t __a) - { - return (uint8x8_t) __a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_p8 (poly8x8_t __a) - { - return (uint8x8_t) __a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_p16 (poly16x4_t __a) - { - return (uint8x8_t) __a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_u8_p64 (poly64x1_t __a) -+{ -+ return (uint8x8_t) __a; -+} -+ -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_f64 (float64x2_t __a) - { - return (uint8x16_t) __a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_s8 (int8x16_t __a) - { - return (uint8x16_t) __a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_s16 (int16x8_t __a) - { - return (uint8x16_t) __a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_s32 (int32x4_t __a) - { - return (uint8x16_t) __a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_s64 (int64x2_t __a) - { - return (uint8x16_t) __a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_f16 (float16x8_t __a) - { - return (uint8x16_t) __a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_f32 (float32x4_t __a) - { - return (uint8x16_t) __a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_u16 (uint16x8_t __a) - { - return (uint8x16_t) __a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_u32 (uint32x4_t __a) - { - return (uint8x16_t) __a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_u64 (uint64x2_t __a) - { - return (uint8x16_t) __a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_p8 (poly8x16_t __a) - { - return (uint8x16_t) __a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_p16 (poly16x8_t __a) - { - return (uint8x16_t) __a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_u8_p64 (poly64x2_t __a) -+{ -+ return (uint8x16_t) __a; -+} -+ -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_u8_p128 (poly128_t __a) -+{ -+ return (uint8x16_t)__a; -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_f16 (float16x4_t __a) - { - return (uint16x4_t) __a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_f64 (float64x1_t __a) - { - return (uint16x4_t) __a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_s8 (int8x8_t __a) - { - return (uint16x4_t) __a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_s16 (int16x4_t __a) - { - return (uint16x4_t) __a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_s32 (int32x2_t __a) - { - return (uint16x4_t) __a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_s64 (int64x1_t __a) - { - return (uint16x4_t) __a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_f32 (float32x2_t __a) - { - return (uint16x4_t) __a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_u8 (uint8x8_t __a) - { - return (uint16x4_t) __a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_u32 (uint32x2_t __a) - { - return (uint16x4_t) __a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_u64 (uint64x1_t __a) - { - return (uint16x4_t) __a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_p8 (poly8x8_t __a) - { - return (uint16x4_t) __a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_p16 (poly16x4_t __a) - { - return (uint16x4_t) __a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_u16_p64 (poly64x1_t __a) -+{ -+ return (uint16x4_t) __a; -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_f64 (float64x2_t __a) - { - return (uint16x8_t) __a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_s8 (int8x16_t __a) - { - return (uint16x8_t) __a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_s16 (int16x8_t __a) - { - return (uint16x8_t) __a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_s32 (int32x4_t __a) - { - return (uint16x8_t) __a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_s64 (int64x2_t __a) - { - return (uint16x8_t) __a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_f16 (float16x8_t __a) - { - return (uint16x8_t) __a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_f32 (float32x4_t __a) - { - return (uint16x8_t) __a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_u8 (uint8x16_t __a) - { - return (uint16x8_t) __a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_u32 (uint32x4_t __a) - { - return (uint16x8_t) __a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_u64 (uint64x2_t __a) - { - return (uint16x8_t) __a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_p8 (poly8x16_t __a) - { - return (uint16x8_t) __a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_p16 (poly16x8_t __a) - { - return (uint16x8_t) __a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_u16_p64 (poly64x2_t __a) -+{ -+ return (uint16x8_t) __a; -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_u16_p128 (poly128_t __a) -+{ -+ return (uint16x8_t)__a; -+} -+ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_f16 (float16x4_t __a) - { - return (uint32x2_t) __a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_f64 (float64x1_t __a) - { - return (uint32x2_t) __a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_s8 (int8x8_t __a) - { - return (uint32x2_t) __a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_s16 (int16x4_t __a) - { - return (uint32x2_t) __a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_s32 (int32x2_t __a) - { - return (uint32x2_t) __a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_s64 (int64x1_t __a) - { - return (uint32x2_t) __a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_f32 (float32x2_t __a) - { - return (uint32x2_t) __a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_u8 (uint8x8_t __a) - { - return (uint32x2_t) __a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_u16 (uint16x4_t __a) - { - return (uint32x2_t) __a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_u64 (uint64x1_t __a) - { - return (uint32x2_t) __a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_p8 (poly8x8_t __a) - { - return (uint32x2_t) __a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_p16 (poly16x4_t __a) - { - return (uint32x2_t) __a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpret_u32_p64 (poly64x1_t __a) -+{ -+ return (uint32x2_t) __a; -+} -+ -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_f64 (float64x2_t __a) - { - return (uint32x4_t) __a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_s8 (int8x16_t __a) - { - return (uint32x4_t) __a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_s16 (int16x8_t __a) - { - return (uint32x4_t) __a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_s32 (int32x4_t __a) - { - return (uint32x4_t) __a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_s64 (int64x2_t __a) - { - return (uint32x4_t) __a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_f16 (float16x8_t __a) - { - return (uint32x4_t) __a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_f32 (float32x4_t __a) - { - return (uint32x4_t) __a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_u8 (uint8x16_t __a) - { - return (uint32x4_t) __a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_u16 (uint16x8_t __a) - { - return (uint32x4_t) __a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_u64 (uint64x2_t __a) - { - return (uint32x4_t) __a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_p8 (poly8x16_t __a) - { - return (uint32x4_t) __a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_p16 (poly16x8_t __a) - { - return (uint32x4_t) __a; - } - -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_u32_p64 (poly64x2_t __a) -+{ -+ return (uint32x4_t) __a; -+} -+ -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vreinterpretq_u32_p128 (poly128_t __a) -+{ -+ return (uint32x4_t)__a; -+} -+ - /* vset_lane */ - --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_f16 (float16_t __elem, float16x4_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_f32 (float32_t __elem, float32x2_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_f64 (float64_t __elem, float64x1_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_p8 (poly8_t __elem, poly8x8_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_p16 (poly16_t __elem, poly16x4_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vset_lane_p64 (poly64_t __elem, poly64x1_t __vec, const int __index) -+{ -+ return __aarch64_vset_lane_any (__elem, __vec, __index); -+} -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_s8 (int8_t __elem, int8x8_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_s16 (int16_t __elem, int16x4_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_s32 (int32_t __elem, int32x2_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_s64 (int64_t __elem, int64x1_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_u8 (uint8_t __elem, uint8x8_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_u16 (uint16_t __elem, uint16x4_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_u32 (uint32_t __elem, uint32x2_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_u64 (uint64_t __elem, uint64x1_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); -@@ -4843,79 +6157,99 @@ vset_lane_u64 (uint64_t __elem, uint64x1_t __vec, const int __index) - - /* vsetq_lane */ - --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_f16 (float16_t __elem, float16x8_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_f32 (float32_t __elem, float32x4_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_f64 (float64_t __elem, float64x2_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_p8 (poly8_t __elem, poly8x16_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_p16 (poly16_t __elem, poly16x8_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsetq_lane_p64 (poly64_t __elem, poly64x2_t __vec, const int __index) -+{ -+ return __aarch64_vset_lane_any (__elem, __vec, __index); -+} -+ -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_s8 (int8_t __elem, int8x16_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_s16 (int16_t __elem, int16x8_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_s32 (int32_t __elem, int32x4_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_s64 (int64_t __elem, int64x2_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_u8 (uint8_t __elem, uint8x16_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_u16 (uint16_t __elem, uint16x8_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_u32 (uint32_t __elem, uint32x4_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_u64 (uint64_t __elem, uint64x2_t __vec, const int __index) - { - return __aarch64_vset_lane_any (__elem, __vec, __index); -@@ -4926,79 +6260,99 @@ vsetq_lane_u64 (uint64_t __elem, uint64x2_t __vec, const int __index) - uint64x1_t lo = vcreate_u64 (vgetq_lane_u64 (tmp, 0)); \ - return vreinterpret_##__TYPE##_u64 (lo); - --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_f16 (float16x8_t __a) - { - __GET_LOW (f16); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_f32 (float32x4_t __a) - { - __GET_LOW (f32); - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_f64 (float64x2_t __a) - { - return (float64x1_t) {vgetq_lane_f64 (__a, 0)}; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_p8 (poly8x16_t __a) - { - __GET_LOW (p8); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_p16 (poly16x8_t __a) - { - __GET_LOW (p16); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vget_low_p64 (poly64x2_t __a) -+{ -+ __GET_LOW (p64); -+} -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_s8 (int8x16_t __a) - { - __GET_LOW (s8); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_s16 (int16x8_t __a) - { - __GET_LOW (s16); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_s32 (int32x4_t __a) - { - __GET_LOW (s32); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_s64 (int64x2_t __a) - { - __GET_LOW (s64); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_u8 (uint8x16_t __a) - { - __GET_LOW (u8); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_u16 (uint16x8_t __a) - { - __GET_LOW (u16); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_u32 (uint32x4_t __a) - { - __GET_LOW (u32); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_u64 (uint64x2_t __a) - { - return vcreate_u64 (vgetq_lane_u64 (__a, 0)); -@@ -5011,73 +6365,92 @@ vget_low_u64 (uint64x2_t __a) - uint64x1_t hi = vcreate_u64 (vgetq_lane_u64 (tmp, 1)); \ - return vreinterpret_##__TYPE##_u64 (hi); - --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_f16 (float16x8_t __a) - { - __GET_HIGH (f16); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_f32 (float32x4_t __a) - { - __GET_HIGH (f32); - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_f64 (float64x2_t __a) - { - __GET_HIGH (f64); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_p8 (poly8x16_t __a) - { - __GET_HIGH (p8); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_p16 (poly16x8_t __a) - { - __GET_HIGH (p16); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vget_high_p64 (poly64x2_t __a) -+{ -+ __GET_HIGH (p64); -+} -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_s8 (int8x16_t __a) - { - __GET_HIGH (s8); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_s16 (int16x8_t __a) - { - __GET_HIGH (s16); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_s32 (int32x4_t __a) - { - __GET_HIGH (s32); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_s64 (int64x2_t __a) - { - __GET_HIGH (s64); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_u8 (uint8x16_t __a) - { - __GET_HIGH (u8); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_u16 (uint16x8_t __a) - { - __GET_HIGH (u16); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_u32 (uint32x4_t __a) - { - __GET_HIGH (u32); -@@ -5085,98 +6458,120 @@ vget_high_u32 (uint32x4_t __a) - - #undef __GET_HIGH - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_u64 (uint64x2_t __a) - { - return vcreate_u64 (vgetq_lane_u64 (__a, 1)); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x16_t) __builtin_aarch64_combinev8qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x8_t) __builtin_aarch64_combinev4hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x4_t) __builtin_aarch64_combinev2si (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_s64 (int64x1_t __a, int64x1_t __b) - { - return __builtin_aarch64_combinedi (__a[0], __b[0]); - } - --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_f16 (float16x4_t __a, float16x4_t __b) - { - return __builtin_aarch64_combinev4hf (__a, __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_f32 (float32x2_t __a, float32x2_t __b) - { - return (float32x4_t) __builtin_aarch64_combinev2sf (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a, - (int8x8_t) __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a, - (int16x4_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint32x4_t) __builtin_aarch64_combinev2si ((int32x2_t) __a, - (int32x2_t) __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_u64 (uint64x1_t __a, uint64x1_t __b) - { - return (uint64x2_t) __builtin_aarch64_combinedi (__a[0], __b[0]); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_f64 (float64x1_t __a, float64x1_t __b) - { - return __builtin_aarch64_combinedf (__a[0], __b[0]); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_p8 (poly8x8_t __a, poly8x8_t __b) - { - return (poly8x16_t) __builtin_aarch64_combinev8qi ((int8x8_t) __a, - (int8x8_t) __b); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_p16 (poly16x4_t __a, poly16x4_t __b) - { - return (poly16x8_t) __builtin_aarch64_combinev4hi ((int16x4_t) __a, - (int16x4_t) __b); - } - -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcombine_p64 (poly64x1_t __a, poly64x1_t __b) -+{ -+ return (poly64x2_t) __builtin_aarch64_combinedi_ppp (__a[0], __b[0]); -+} -+ - /* Start of temporary inline asm implementations. */ - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c) - { - int8x8_t result; -@@ -5187,7 +6582,8 @@ vaba_s8 (int8x8_t a, int8x8_t b, int8x8_t c) - return result; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c) - { - int16x4_t result; -@@ -5198,7 +6594,8 @@ vaba_s16 (int16x4_t a, int16x4_t b, int16x4_t c) - return result; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c) - { - int32x2_t result; -@@ -5209,7 +6606,8 @@ vaba_s32 (int32x2_t a, int32x2_t b, int32x2_t c) - return result; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) - { - uint8x8_t result; -@@ -5220,7 +6618,8 @@ vaba_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) - return result; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) - { - uint16x4_t result; -@@ -5231,7 +6630,8 @@ vaba_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) - return result; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) - { - uint32x2_t result; -@@ -5242,7 +6642,8 @@ vaba_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) - return result; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) - { - int16x8_t result; -@@ -5253,7 +6654,8 @@ vabal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) - return result; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) - { - int32x4_t result; -@@ -5264,7 +6666,8 @@ vabal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) - return result; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) - { - int64x2_t result; -@@ -5275,7 +6678,8 @@ vabal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) - return result; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) - { - uint16x8_t result; -@@ -5286,7 +6690,8 @@ vabal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) - return result; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) - { - uint32x4_t result; -@@ -5297,7 +6702,8 @@ vabal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) - return result; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) - { - uint64x2_t result; -@@ -5308,7 +6714,8 @@ vabal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) - return result; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c) - { - int16x8_t result; -@@ -5319,7 +6726,8 @@ vabal_s8 (int16x8_t a, int8x8_t b, int8x8_t c) - return result; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c) - { - int32x4_t result; -@@ -5330,7 +6738,8 @@ vabal_s16 (int32x4_t a, int16x4_t b, int16x4_t c) - return result; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c) - { - int64x2_t result; -@@ -5341,7 +6750,8 @@ vabal_s32 (int64x2_t a, int32x2_t b, int32x2_t c) - return result; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) - { - uint16x8_t result; -@@ -5352,7 +6762,8 @@ vabal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) - return result; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) - { - uint32x4_t result; -@@ -5363,7 +6774,8 @@ vabal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) - return result; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) - { - uint64x2_t result; -@@ -5374,7 +6786,8 @@ vabal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) - return result; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) - { - int8x16_t result; -@@ -5385,7 +6798,8 @@ vabaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) - return result; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) - { - int16x8_t result; -@@ -5396,7 +6810,8 @@ vabaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) - return result; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) - { - int32x4_t result; -@@ -5407,7 +6822,8 @@ vabaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) - return result; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) - { - uint8x16_t result; -@@ -5418,7 +6834,8 @@ vabaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) - return result; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) - { - uint16x8_t result; -@@ -5429,7 +6846,8 @@ vabaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) - return result; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) - { - uint32x4_t result; -@@ -5440,18 +6858,8 @@ vabaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) - return result; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vabd_f32 (float32x2_t a, float32x2_t b) --{ -- float32x2_t result; -- __asm__ ("fabd %0.2s, %1.2s, %2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabd_s8 (int8x8_t a, int8x8_t b) - { - int8x8_t result; -@@ -5462,7 +6870,8 @@ vabd_s8 (int8x8_t a, int8x8_t b) - return result; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabd_s16 (int16x4_t a, int16x4_t b) - { - int16x4_t result; -@@ -5473,7 +6882,8 @@ vabd_s16 (int16x4_t a, int16x4_t b) - return result; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabd_s32 (int32x2_t a, int32x2_t b) - { - int32x2_t result; -@@ -5484,7 +6894,8 @@ vabd_s32 (int32x2_t a, int32x2_t b) - return result; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabd_u8 (uint8x8_t a, uint8x8_t b) - { - uint8x8_t result; -@@ -5495,7 +6906,8 @@ vabd_u8 (uint8x8_t a, uint8x8_t b) - return result; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabd_u16 (uint16x4_t a, uint16x4_t b) - { - uint16x4_t result; -@@ -5506,7 +6918,8 @@ vabd_u16 (uint16x4_t a, uint16x4_t b) - return result; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabd_u32 (uint32x2_t a, uint32x2_t b) - { - uint32x2_t result; -@@ -5517,18 +6930,8 @@ vabd_u32 (uint32x2_t a, uint32x2_t b) - return result; - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vabdd_f64 (float64_t a, float64_t b) --{ -- float64_t result; -- __asm__ ("fabd %d0, %d1, %d2" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdl_high_s8 (int8x16_t a, int8x16_t b) - { - int16x8_t result; -@@ -5539,7 +6942,8 @@ vabdl_high_s8 (int8x16_t a, int8x16_t b) - return result; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdl_high_s16 (int16x8_t a, int16x8_t b) - { - int32x4_t result; -@@ -5550,7 +6954,8 @@ vabdl_high_s16 (int16x8_t a, int16x8_t b) - return result; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdl_high_s32 (int32x4_t a, int32x4_t b) - { - int64x2_t result; -@@ -5561,7 +6966,8 @@ vabdl_high_s32 (int32x4_t a, int32x4_t b) - return result; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdl_high_u8 (uint8x16_t a, uint8x16_t b) - { - uint16x8_t result; -@@ -5572,7 +6978,8 @@ vabdl_high_u8 (uint8x16_t a, uint8x16_t b) - return result; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdl_high_u16 (uint16x8_t a, uint16x8_t b) - { - uint32x4_t result; -@@ -5583,7 +6990,8 @@ vabdl_high_u16 (uint16x8_t a, uint16x8_t b) - return result; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdl_high_u32 (uint32x4_t a, uint32x4_t b) - { - uint64x2_t result; -@@ -5594,7 +7002,8 @@ vabdl_high_u32 (uint32x4_t a, uint32x4_t b) - return result; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdl_s8 (int8x8_t a, int8x8_t b) - { - int16x8_t result; -@@ -5605,7 +7014,8 @@ vabdl_s8 (int8x8_t a, int8x8_t b) - return result; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdl_s16 (int16x4_t a, int16x4_t b) - { - int32x4_t result; -@@ -5616,7 +7026,8 @@ vabdl_s16 (int16x4_t a, int16x4_t b) - return result; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdl_s32 (int32x2_t a, int32x2_t b) - { - int64x2_t result; -@@ -5627,7 +7038,8 @@ vabdl_s32 (int32x2_t a, int32x2_t b) - return result; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdl_u8 (uint8x8_t a, uint8x8_t b) - { - uint16x8_t result; -@@ -5638,7 +7050,8 @@ vabdl_u8 (uint8x8_t a, uint8x8_t b) - return result; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdl_u16 (uint16x4_t a, uint16x4_t b) - { - uint32x4_t result; -@@ -5649,7 +7062,8 @@ vabdl_u16 (uint16x4_t a, uint16x4_t b) - return result; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdl_u32 (uint32x2_t a, uint32x2_t b) - { - uint64x2_t result; -@@ -5660,29 +7074,8 @@ vabdl_u32 (uint32x2_t a, uint32x2_t b) - return result; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vabdq_f32 (float32x4_t a, float32x4_t b) --{ -- float32x4_t result; -- __asm__ ("fabd %0.4s, %1.4s, %2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vabdq_f64 (float64x2_t a, float64x2_t b) --{ -- float64x2_t result; -- __asm__ ("fabd %0.2d, %1.2d, %2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdq_s8 (int8x16_t a, int8x16_t b) - { - int8x16_t result; -@@ -5693,7 +7086,8 @@ vabdq_s8 (int8x16_t a, int8x16_t b) - return result; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdq_s16 (int16x8_t a, int16x8_t b) - { - int16x8_t result; -@@ -5704,7 +7098,8 @@ vabdq_s16 (int16x8_t a, int16x8_t b) - return result; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdq_s32 (int32x4_t a, int32x4_t b) - { - int32x4_t result; -@@ -5715,7 +7110,8 @@ vabdq_s32 (int32x4_t a, int32x4_t b) - return result; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdq_u8 (uint8x16_t a, uint8x16_t b) - { - uint8x16_t result; -@@ -5726,7 +7122,8 @@ vabdq_u8 (uint8x16_t a, uint8x16_t b) - return result; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdq_u16 (uint16x8_t a, uint16x8_t b) - { - uint16x8_t result; -@@ -5737,7 +7134,8 @@ vabdq_u16 (uint16x8_t a, uint16x8_t b) - return result; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdq_u32 (uint32x4_t a, uint32x4_t b) - { - uint32x4_t result; -@@ -5748,18 +7146,8 @@ vabdq_u32 (uint32x4_t a, uint32x4_t b) - return result; - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vabds_f32 (float32_t a, float32_t b) --{ -- float32_t result; -- __asm__ ("fabd %s0, %s1, %s2" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddlv_s8 (int8x8_t a) - { - int16_t result; -@@ -5770,7 +7158,8 @@ vaddlv_s8 (int8x8_t a) - return result; - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddlv_s16 (int16x4_t a) - { - int32_t result; -@@ -5781,7 +7170,8 @@ vaddlv_s16 (int16x4_t a) - return result; - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddlv_u8 (uint8x8_t a) - { - uint16_t result; -@@ -5792,7 +7182,8 @@ vaddlv_u8 (uint8x8_t a) - return result; - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddlv_u16 (uint16x4_t a) - { - uint32_t result; -@@ -5803,7 +7194,8 @@ vaddlv_u16 (uint16x4_t a) - return result; - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddlvq_s8 (int8x16_t a) - { - int16_t result; -@@ -5814,7 +7206,8 @@ vaddlvq_s8 (int8x16_t a) - return result; - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddlvq_s16 (int16x8_t a) - { - int32_t result; -@@ -5825,7 +7218,8 @@ vaddlvq_s16 (int16x8_t a) - return result; - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddlvq_s32 (int32x4_t a) - { - int64_t result; -@@ -5836,7 +7230,8 @@ vaddlvq_s32 (int32x4_t a) - return result; - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddlvq_u8 (uint8x16_t a) - { - uint16_t result; -@@ -5847,7 +7242,8 @@ vaddlvq_u8 (uint8x16_t a) - return result; - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddlvq_u16 (uint16x8_t a) - { - uint32_t result; -@@ -5858,7 +7254,8 @@ vaddlvq_u16 (uint16x8_t a) - return result; - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddlvq_u32 (uint32x4_t a) - { - uint64_t result; -@@ -5869,18584 +7266,23100 @@ vaddlvq_u32 (uint32x4_t a) - return result; - } - --#define vcopyq_lane_f32(a, b, c, d) \ -- __extension__ \ -- ({ \ -- float32x4_t c_ = (c); \ -- float32x4_t a_ = (a); \ -- float32x4_t result; \ -- __asm__ ("ins %0.s[%2], %3.s[%4]" \ -- : "=w"(result) \ -- : "0"(a_), "i"(b), "w"(c_), "i"(d) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtx_f32_f64 (float64x2_t a) -+{ -+ float32x2_t result; -+ __asm__ ("fcvtxn %0.2s,%1.2d" -+ : "=w"(result) -+ : "w"(a) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b) -+{ -+ float32x4_t result; -+ __asm__ ("fcvtxn2 %0.4s,%1.2d" -+ : "=w"(result) -+ : "w" (b), "0"(a) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtxd_f32_f64 (float64_t a) -+{ -+ float32_t result; -+ __asm__ ("fcvtxn %s0,%d1" -+ : "=w"(result) -+ : "w"(a) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c) -+{ -+ float32x2_t result; -+ float32x2_t t1; -+ __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s" -+ : "=w"(result), "=w"(t1) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c) -+{ -+ int16x4_t result; -+ __asm__ ("mla %0.4h,%2.4h,%3.h[0]" -+ : "=w"(result) -+ : "0"(a), "w"(b), "x"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c) -+{ -+ int32x2_t result; -+ __asm__ ("mla %0.2s,%2.2s,%3.s[0]" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c) -+{ -+ uint16x4_t result; -+ __asm__ ("mla %0.4h,%2.4h,%3.h[0]" -+ : "=w"(result) -+ : "0"(a), "w"(b), "x"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c) -+{ -+ uint32x2_t result; -+ __asm__ ("mla %0.2s,%2.2s,%3.s[0]" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c) -+{ -+ int8x8_t result; -+ __asm__ ("mla %0.8b, %2.8b, %3.8b" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c) -+{ -+ int16x4_t result; -+ __asm__ ("mla %0.4h, %2.4h, %3.4h" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c) -+{ -+ int32x2_t result; -+ __asm__ ("mla %0.2s, %2.2s, %3.2s" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) -+{ -+ uint8x8_t result; -+ __asm__ ("mla %0.8b, %2.8b, %3.8b" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) -+{ -+ uint16x4_t result; -+ __asm__ ("mla %0.4h, %2.4h, %3.4h" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) -+{ -+ uint32x2_t result; -+ __asm__ ("mla %0.2s, %2.2s, %3.2s" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} - --#define vcopyq_lane_f64(a, b, c, d) \ -+#define vmlal_high_lane_s16(a, b, c, d) \ - __extension__ \ - ({ \ -- float64x2_t c_ = (c); \ -- float64x2_t a_ = (a); \ -- float64x2_t result; \ -- __asm__ ("ins %0.d[%2], %3.d[%4]" \ -+ int16x4_t c_ = (c); \ -+ int16x8_t b_ = (b); \ -+ int32x4_t a_ = (a); \ -+ int32x4_t result; \ -+ __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \ - : "=w"(result) \ -- : "0"(a_), "i"(b), "w"(c_), "i"(d) \ -+ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vcopyq_lane_p8(a, b, c, d) \ -+#define vmlal_high_lane_s32(a, b, c, d) \ - __extension__ \ - ({ \ -- poly8x16_t c_ = (c); \ -- poly8x16_t a_ = (a); \ -- poly8x16_t result; \ -- __asm__ ("ins %0.b[%2], %3.b[%4]" \ -+ int32x2_t c_ = (c); \ -+ int32x4_t b_ = (b); \ -+ int64x2_t a_ = (a); \ -+ int64x2_t result; \ -+ __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \ - : "=w"(result) \ -- : "0"(a_), "i"(b), "w"(c_), "i"(d) \ -+ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vcopyq_lane_p16(a, b, c, d) \ -+#define vmlal_high_lane_u16(a, b, c, d) \ - __extension__ \ - ({ \ -- poly16x8_t c_ = (c); \ -- poly16x8_t a_ = (a); \ -- poly16x8_t result; \ -- __asm__ ("ins %0.h[%2], %3.h[%4]" \ -+ uint16x4_t c_ = (c); \ -+ uint16x8_t b_ = (b); \ -+ uint32x4_t a_ = (a); \ -+ uint32x4_t result; \ -+ __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \ - : "=w"(result) \ -- : "0"(a_), "i"(b), "w"(c_), "i"(d) \ -+ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vcopyq_lane_s8(a, b, c, d) \ -+#define vmlal_high_lane_u32(a, b, c, d) \ - __extension__ \ - ({ \ -- int8x16_t c_ = (c); \ -- int8x16_t a_ = (a); \ -- int8x16_t result; \ -- __asm__ ("ins %0.b[%2], %3.b[%4]" \ -+ uint32x2_t c_ = (c); \ -+ uint32x4_t b_ = (b); \ -+ uint64x2_t a_ = (a); \ -+ uint64x2_t result; \ -+ __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \ - : "=w"(result) \ -- : "0"(a_), "i"(b), "w"(c_), "i"(d) \ -+ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vcopyq_lane_s16(a, b, c, d) \ -+#define vmlal_high_laneq_s16(a, b, c, d) \ - __extension__ \ - ({ \ - int16x8_t c_ = (c); \ -- int16x8_t a_ = (a); \ -- int16x8_t result; \ -- __asm__ ("ins %0.h[%2], %3.h[%4]" \ -- : "=w"(result) \ -- : "0"(a_), "i"(b), "w"(c_), "i"(d) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vcopyq_lane_s32(a, b, c, d) \ -- __extension__ \ -- ({ \ -- int32x4_t c_ = (c); \ -+ int16x8_t b_ = (b); \ - int32x4_t a_ = (a); \ - int32x4_t result; \ -- __asm__ ("ins %0.s[%2], %3.s[%4]" \ -+ __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \ - : "=w"(result) \ -- : "0"(a_), "i"(b), "w"(c_), "i"(d) \ -+ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vcopyq_lane_s64(a, b, c, d) \ -+#define vmlal_high_laneq_s32(a, b, c, d) \ - __extension__ \ - ({ \ -- int64x2_t c_ = (c); \ -+ int32x4_t c_ = (c); \ -+ int32x4_t b_ = (b); \ - int64x2_t a_ = (a); \ - int64x2_t result; \ -- __asm__ ("ins %0.d[%2], %3.d[%4]" \ -- : "=w"(result) \ -- : "0"(a_), "i"(b), "w"(c_), "i"(d) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vcopyq_lane_u8(a, b, c, d) \ -- __extension__ \ -- ({ \ -- uint8x16_t c_ = (c); \ -- uint8x16_t a_ = (a); \ -- uint8x16_t result; \ -- __asm__ ("ins %0.b[%2], %3.b[%4]" \ -+ __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \ - : "=w"(result) \ -- : "0"(a_), "i"(b), "w"(c_), "i"(d) \ -+ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vcopyq_lane_u16(a, b, c, d) \ -+#define vmlal_high_laneq_u16(a, b, c, d) \ - __extension__ \ - ({ \ - uint16x8_t c_ = (c); \ -- uint16x8_t a_ = (a); \ -- uint16x8_t result; \ -- __asm__ ("ins %0.h[%2], %3.h[%4]" \ -- : "=w"(result) \ -- : "0"(a_), "i"(b), "w"(c_), "i"(d) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vcopyq_lane_u32(a, b, c, d) \ -- __extension__ \ -- ({ \ -- uint32x4_t c_ = (c); \ -+ uint16x8_t b_ = (b); \ - uint32x4_t a_ = (a); \ - uint32x4_t result; \ -- __asm__ ("ins %0.s[%2], %3.s[%4]" \ -+ __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \ - : "=w"(result) \ -- : "0"(a_), "i"(b), "w"(c_), "i"(d) \ -+ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vcopyq_lane_u64(a, b, c, d) \ -+#define vmlal_high_laneq_u32(a, b, c, d) \ - __extension__ \ - ({ \ -- uint64x2_t c_ = (c); \ -+ uint32x4_t c_ = (c); \ -+ uint32x4_t b_ = (b); \ - uint64x2_t a_ = (a); \ - uint64x2_t result; \ -- __asm__ ("ins %0.d[%2], %3.d[%4]" \ -+ __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \ - : "=w"(result) \ -- : "0"(a_), "i"(b), "w"(c_), "i"(d) \ -+ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vcvt_n_f32_s32(a, b) \ -- __extension__ \ -- ({ \ -- int32x2_t a_ = (a); \ -- float32x2_t result; \ -- __asm__ ("scvtf %0.2s, %1.2s, #%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c) -+{ -+ int32x4_t result; -+ __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]" -+ : "=w"(result) -+ : "0"(a), "w"(b), "x"(c) -+ : /* No clobbers */); -+ return result; -+} - --#define vcvt_n_f32_u32(a, b) \ -- __extension__ \ -- ({ \ -- uint32x2_t a_ = (a); \ -- float32x2_t result; \ -- __asm__ ("ucvtf %0.2s, %1.2s, #%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c) -+{ -+ int64x2_t result; -+ __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} - --#define vcvt_n_s32_f32(a, b) \ -- __extension__ \ -- ({ \ -- float32x2_t a_ = (a); \ -- int32x2_t result; \ -- __asm__ ("fcvtzs %0.2s, %1.2s, #%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c) -+{ -+ uint32x4_t result; -+ __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]" -+ : "=w"(result) -+ : "0"(a), "w"(b), "x"(c) -+ : /* No clobbers */); -+ return result; -+} - --#define vcvt_n_u32_f32(a, b) \ -- __extension__ \ -- ({ \ -- float32x2_t a_ = (a); \ -- uint32x2_t result; \ -- __asm__ ("fcvtzu %0.2s, %1.2s, #%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c) -+{ -+ uint64x2_t result; -+ __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} - --#define vcvtd_n_f64_s64(a, b) \ -- __extension__ \ -- ({ \ -- int64_t a_ = (a); \ -- float64_t result; \ -- __asm__ ("scvtf %d0,%d1,%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) -+{ -+ int16x8_t result; -+ __asm__ ("smlal2 %0.8h,%2.16b,%3.16b" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} - --#define vcvtd_n_f64_u64(a, b) \ -- __extension__ \ -- ({ \ -- uint64_t a_ = (a); \ -- float64_t result; \ -- __asm__ ("ucvtf %d0,%d1,%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) -+{ -+ int32x4_t result; -+ __asm__ ("smlal2 %0.4s,%2.8h,%3.8h" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} - --#define vcvtd_n_s64_f64(a, b) \ -- __extension__ \ -- ({ \ -- float64_t a_ = (a); \ -- int64_t result; \ -- __asm__ ("fcvtzs %d0,%d1,%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) -+{ -+ int64x2_t result; -+ __asm__ ("smlal2 %0.2d,%2.4s,%3.4s" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} - --#define vcvtd_n_u64_f64(a, b) \ -- __extension__ \ -- ({ \ -- float64_t a_ = (a); \ -- uint64_t result; \ -- __asm__ ("fcvtzu %d0,%d1,%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) -+{ -+ uint16x8_t result; -+ __asm__ ("umlal2 %0.8h,%2.16b,%3.16b" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) -+{ -+ uint32x4_t result; -+ __asm__ ("umlal2 %0.4s,%2.8h,%3.8h" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) -+{ -+ uint64x2_t result; -+ __asm__ ("umlal2 %0.2d,%2.4s,%3.4s" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} - --#define vcvtq_n_f32_s32(a, b) \ -+#define vmlal_lane_s16(a, b, c, d) \ - __extension__ \ - ({ \ -+ int16x4_t c_ = (c); \ -+ int16x4_t b_ = (b); \ - int32x4_t a_ = (a); \ -- float32x4_t result; \ -- __asm__ ("scvtf %0.4s, %1.4s, #%2" \ -+ int32x4_t result; \ -+ __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \ - : "=w"(result) \ -- : "w"(a_), "i"(b) \ -+ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vcvtq_n_f32_u32(a, b) \ -+#define vmlal_lane_s32(a, b, c, d) \ - __extension__ \ - ({ \ -- uint32x4_t a_ = (a); \ -- float32x4_t result; \ -- __asm__ ("ucvtf %0.4s, %1.4s, #%2" \ -+ int32x2_t c_ = (c); \ -+ int32x2_t b_ = (b); \ -+ int64x2_t a_ = (a); \ -+ int64x2_t result; \ -+ __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \ - : "=w"(result) \ -- : "w"(a_), "i"(b) \ -+ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vcvtq_n_f64_s64(a, b) \ -+#define vmlal_lane_u16(a, b, c, d) \ - __extension__ \ - ({ \ -- int64x2_t a_ = (a); \ -- float64x2_t result; \ -- __asm__ ("scvtf %0.2d, %1.2d, #%2" \ -+ uint16x4_t c_ = (c); \ -+ uint16x4_t b_ = (b); \ -+ uint32x4_t a_ = (a); \ -+ uint32x4_t result; \ -+ __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \ - : "=w"(result) \ -- : "w"(a_), "i"(b) \ -+ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vcvtq_n_f64_u64(a, b) \ -+#define vmlal_lane_u32(a, b, c, d) \ - __extension__ \ - ({ \ -+ uint32x2_t c_ = (c); \ -+ uint32x2_t b_ = (b); \ - uint64x2_t a_ = (a); \ -- float64x2_t result; \ -- __asm__ ("ucvtf %0.2d, %1.2d, #%2" \ -+ uint64x2_t result; \ -+ __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \ - : "=w"(result) \ -- : "w"(a_), "i"(b) \ -+ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vcvtq_n_s32_f32(a, b) \ -+#define vmlal_laneq_s16(a, b, c, d) \ - __extension__ \ - ({ \ -- float32x4_t a_ = (a); \ -+ int16x8_t c_ = (c); \ -+ int16x4_t b_ = (b); \ -+ int32x4_t a_ = (a); \ - int32x4_t result; \ -- __asm__ ("fcvtzs %0.4s, %1.4s, #%2" \ -+ __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \ - : "=w"(result) \ -- : "w"(a_), "i"(b) \ -+ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vcvtq_n_s64_f64(a, b) \ -+#define vmlal_laneq_s32(a, b, c, d) \ - __extension__ \ - ({ \ -- float64x2_t a_ = (a); \ -+ int32x4_t c_ = (c); \ -+ int32x2_t b_ = (b); \ -+ int64x2_t a_ = (a); \ - int64x2_t result; \ -- __asm__ ("fcvtzs %0.2d, %1.2d, #%2" \ -+ __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \ - : "=w"(result) \ -- : "w"(a_), "i"(b) \ -+ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vcvtq_n_u32_f32(a, b) \ -+#define vmlal_laneq_u16(a, b, c, d) \ - __extension__ \ - ({ \ -- float32x4_t a_ = (a); \ -+ uint16x8_t c_ = (c); \ -+ uint16x4_t b_ = (b); \ -+ uint32x4_t a_ = (a); \ - uint32x4_t result; \ -- __asm__ ("fcvtzu %0.4s, %1.4s, #%2" \ -+ __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \ - : "=w"(result) \ -- : "w"(a_), "i"(b) \ -+ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vcvtq_n_u64_f64(a, b) \ -+#define vmlal_laneq_u32(a, b, c, d) \ - __extension__ \ - ({ \ -- float64x2_t a_ = (a); \ -- uint64x2_t result; \ -- __asm__ ("fcvtzu %0.2d, %1.2d, #%2" \ -+ uint32x4_t c_ = (c); \ -+ uint32x2_t b_ = (b); \ -+ uint64x2_t a_ = (a); \ -+ uint64x2_t result; \ -+ __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \ - : "=w"(result) \ -- : "w"(a_), "i"(b) \ -+ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vcvts_n_f32_s32(a, b) \ -- __extension__ \ -- ({ \ -- int32_t a_ = (a); \ -- float32_t result; \ -- __asm__ ("scvtf %s0,%s1,%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c) -+{ -+ int32x4_t result; -+ __asm__ ("smlal %0.4s,%2.4h,%3.h[0]" -+ : "=w"(result) -+ : "0"(a), "w"(b), "x"(c) -+ : /* No clobbers */); -+ return result; -+} - --#define vcvts_n_f32_u32(a, b) \ -- __extension__ \ -- ({ \ -- uint32_t a_ = (a); \ -- float32_t result; \ -- __asm__ ("ucvtf %s0,%s1,%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c) -+{ -+ int64x2_t result; -+ __asm__ ("smlal %0.2d,%2.2s,%3.s[0]" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} - --#define vcvts_n_s32_f32(a, b) \ -- __extension__ \ -- ({ \ -- float32_t a_ = (a); \ -- int32_t result; \ -- __asm__ ("fcvtzs %s0,%s1,%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c) -+{ -+ uint32x4_t result; -+ __asm__ ("umlal %0.4s,%2.4h,%3.h[0]" -+ : "=w"(result) -+ : "0"(a), "w"(b), "x"(c) -+ : /* No clobbers */); -+ return result; -+} - --#define vcvts_n_u32_f32(a, b) \ -- __extension__ \ -- ({ \ -- float32_t a_ = (a); \ -- uint32_t result; \ -- __asm__ ("fcvtzu %s0,%s1,%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c) -+{ -+ uint64x2_t result; -+ __asm__ ("umlal %0.2d,%2.2s,%3.s[0]" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vcvtx_f32_f64 (float64x2_t a) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c) - { -- float32x2_t result; -- __asm__ ("fcvtxn %0.2s,%1.2d" -+ int16x8_t result; -+ __asm__ ("smlal %0.8h,%2.8b,%3.8b" - : "=w"(result) -- : "w"(a) -+ : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vcvtx_high_f32_f64 (float32x2_t a, float64x2_t b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c) - { -- float32x4_t result; -- __asm__ ("fcvtxn2 %0.4s,%1.2d" -+ int32x4_t result; -+ __asm__ ("smlal %0.4s,%2.4h,%3.4h" - : "=w"(result) -- : "w" (b), "0"(a) -+ : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vcvtxd_f32_f64 (float64_t a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c) - { -- float32_t result; -- __asm__ ("fcvtxn %s0,%d1" -+ int64x2_t result; -+ __asm__ ("smlal %0.2d,%2.2s,%3.2s" - : "=w"(result) -- : "w"(a) -+ : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vmla_n_f32 (float32x2_t a, float32x2_t b, float32_t c) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) - { -- float32x2_t result; -- float32x2_t t1; -- __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fadd %0.2s, %0.2s, %1.2s" -+ uint16x8_t result; -+ __asm__ ("umlal %0.8h,%2.8b,%3.8b" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) -+{ -+ uint32x4_t result; -+ __asm__ ("umlal %0.4s,%2.4h,%3.4h" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) -+{ -+ uint64x2_t result; -+ __asm__ ("umlal %0.2d,%2.2s,%3.2s" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) -+{ -+ float32x4_t result; -+ float32x4_t t1; -+ __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s" - : "=w"(result), "=w"(t1) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vmla_n_s16 (int16x4_t a, int16x4_t b, int16_t c) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c) - { -- int16x4_t result; -- __asm__ ("mla %0.4h,%2.4h,%3.h[0]" -+ int16x8_t result; -+ __asm__ ("mla %0.8h,%2.8h,%3.h[0]" - : "=w"(result) - : "0"(a), "w"(b), "x"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vmla_n_s32 (int32x2_t a, int32x2_t b, int32_t c) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c) - { -- int32x2_t result; -- __asm__ ("mla %0.2s,%2.2s,%3.s[0]" -+ int32x4_t result; -+ __asm__ ("mla %0.4s,%2.4s,%3.s[0]" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vmla_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c) - { -- uint16x4_t result; -- __asm__ ("mla %0.4h,%2.4h,%3.h[0]" -+ uint16x8_t result; -+ __asm__ ("mla %0.8h,%2.8h,%3.h[0]" - : "=w"(result) - : "0"(a), "w"(b), "x"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vmla_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c) - { -- uint32x2_t result; -- __asm__ ("mla %0.2s,%2.2s,%3.s[0]" -+ uint32x4_t result; -+ __asm__ ("mla %0.4s,%2.4s,%3.s[0]" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vmla_s8 (int8x8_t a, int8x8_t b, int8x8_t c) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) - { -- int8x8_t result; -- __asm__ ("mla %0.8b, %2.8b, %3.8b" -+ int8x16_t result; -+ __asm__ ("mla %0.16b, %2.16b, %3.16b" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vmla_s16 (int16x4_t a, int16x4_t b, int16x4_t c) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) - { -- int16x4_t result; -- __asm__ ("mla %0.4h, %2.4h, %3.4h" -+ int16x8_t result; -+ __asm__ ("mla %0.8h, %2.8h, %3.8h" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vmla_s32 (int32x2_t a, int32x2_t b, int32x2_t c) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) - { -- int32x2_t result; -- __asm__ ("mla %0.2s, %2.2s, %3.2s" -+ int32x4_t result; -+ __asm__ ("mla %0.4s, %2.4s, %3.4s" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vmla_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) - { -- uint8x8_t result; -- __asm__ ("mla %0.8b, %2.8b, %3.8b" -+ uint8x16_t result; -+ __asm__ ("mla %0.16b, %2.16b, %3.16b" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vmla_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) - { -- uint16x4_t result; -- __asm__ ("mla %0.4h, %2.4h, %3.4h" -+ uint16x8_t result; -+ __asm__ ("mla %0.8h, %2.8h, %3.8h" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vmla_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) - { -- uint32x2_t result; -- __asm__ ("mla %0.2s, %2.2s, %3.2s" -+ uint32x4_t result; -+ __asm__ ("mla %0.4s, %2.4s, %3.4s" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --#define vmlal_high_lane_s16(a, b, c, d) \ -- __extension__ \ -- ({ \ -- int16x4_t c_ = (c); \ -- int16x8_t b_ = (b); \ -- int32x4_t a_ = (a); \ -- int32x4_t result; \ -- __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \ -- : "=w"(result) \ -- : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vmlal_high_lane_s32(a, b, c, d) \ -- __extension__ \ -- ({ \ -- int32x2_t c_ = (c); \ -- int32x4_t b_ = (b); \ -- int64x2_t a_ = (a); \ -- int64x2_t result; \ -- __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \ -- : "=w"(result) \ -- : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c) -+{ -+ float32x2_t result; -+ float32x2_t t1; -+ __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s" -+ : "=w"(result), "=w"(t1) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} - --#define vmlal_high_lane_u16(a, b, c, d) \ -- __extension__ \ -- ({ \ -- uint16x4_t c_ = (c); \ -- uint16x8_t b_ = (b); \ -- uint32x4_t a_ = (a); \ -- uint32x4_t result; \ -- __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \ -- : "=w"(result) \ -- : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c) -+{ -+ int16x4_t result; -+ __asm__ ("mls %0.4h, %2.4h, %3.h[0]" -+ : "=w"(result) -+ : "0"(a), "w"(b), "x"(c) -+ : /* No clobbers */); -+ return result; -+} - --#define vmlal_high_lane_u32(a, b, c, d) \ -- __extension__ \ -- ({ \ -- uint32x2_t c_ = (c); \ -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c) -+{ -+ int32x2_t result; -+ __asm__ ("mls %0.2s, %2.2s, %3.s[0]" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c) -+{ -+ uint16x4_t result; -+ __asm__ ("mls %0.4h, %2.4h, %3.h[0]" -+ : "=w"(result) -+ : "0"(a), "w"(b), "x"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c) -+{ -+ uint32x2_t result; -+ __asm__ ("mls %0.2s, %2.2s, %3.s[0]" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c) -+{ -+ int8x8_t result; -+ __asm__ ("mls %0.8b,%2.8b,%3.8b" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c) -+{ -+ int16x4_t result; -+ __asm__ ("mls %0.4h,%2.4h,%3.4h" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c) -+{ -+ int32x2_t result; -+ __asm__ ("mls %0.2s,%2.2s,%3.2s" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) -+{ -+ uint8x8_t result; -+ __asm__ ("mls %0.8b,%2.8b,%3.8b" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) -+{ -+ uint16x4_t result; -+ __asm__ ("mls %0.4h,%2.4h,%3.4h" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) -+{ -+ uint32x2_t result; -+ __asm__ ("mls %0.2s,%2.2s,%3.2s" -+ : "=w"(result) -+ : "0"(a), "w"(b), "w"(c) -+ : /* No clobbers */); -+ return result; -+} -+ -+#define vmlsl_high_lane_s16(a, b, c, d) \ -+ __extension__ \ -+ ({ \ -+ int16x4_t c_ = (c); \ -+ int16x8_t b_ = (b); \ -+ int32x4_t a_ = (a); \ -+ int32x4_t result; \ -+ __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \ -+ : "=w"(result) \ -+ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vmlsl_high_lane_s32(a, b, c, d) \ -+ __extension__ \ -+ ({ \ -+ int32x2_t c_ = (c); \ -+ int32x4_t b_ = (b); \ -+ int64x2_t a_ = (a); \ -+ int64x2_t result; \ -+ __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \ -+ : "=w"(result) \ -+ : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vmlsl_high_lane_u16(a, b, c, d) \ -+ __extension__ \ -+ ({ \ -+ uint16x4_t c_ = (c); \ -+ uint16x8_t b_ = (b); \ -+ uint32x4_t a_ = (a); \ -+ uint32x4_t result; \ -+ __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \ -+ : "=w"(result) \ -+ : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vmlsl_high_lane_u32(a, b, c, d) \ -+ __extension__ \ -+ ({ \ -+ uint32x2_t c_ = (c); \ - uint32x4_t b_ = (b); \ - uint64x2_t a_ = (a); \ - uint64x2_t result; \ -- __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \ -+ __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmlal_high_laneq_s16(a, b, c, d) \ -+#define vmlsl_high_laneq_s16(a, b, c, d) \ - __extension__ \ - ({ \ - int16x8_t c_ = (c); \ - int16x8_t b_ = (b); \ - int32x4_t a_ = (a); \ - int32x4_t result; \ -- __asm__ ("smlal2 %0.4s, %2.8h, %3.h[%4]" \ -+ __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmlal_high_laneq_s32(a, b, c, d) \ -+#define vmlsl_high_laneq_s32(a, b, c, d) \ - __extension__ \ - ({ \ - int32x4_t c_ = (c); \ - int32x4_t b_ = (b); \ - int64x2_t a_ = (a); \ - int64x2_t result; \ -- __asm__ ("smlal2 %0.2d, %2.4s, %3.s[%4]" \ -+ __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmlal_high_laneq_u16(a, b, c, d) \ -+#define vmlsl_high_laneq_u16(a, b, c, d) \ - __extension__ \ - ({ \ - uint16x8_t c_ = (c); \ - uint16x8_t b_ = (b); \ - uint32x4_t a_ = (a); \ - uint32x4_t result; \ -- __asm__ ("umlal2 %0.4s, %2.8h, %3.h[%4]" \ -+ __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmlal_high_laneq_u32(a, b, c, d) \ -+#define vmlsl_high_laneq_u32(a, b, c, d) \ - __extension__ \ - ({ \ - uint32x4_t c_ = (c); \ - uint32x4_t b_ = (b); \ - uint64x2_t a_ = (a); \ - uint64x2_t result; \ -- __asm__ ("umlal2 %0.2d, %2.4s, %3.s[%4]" \ -+ __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmlal_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c) - { - int32x4_t result; -- __asm__ ("smlal2 %0.4s,%2.8h,%3.h[0]" -+ __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]" - : "=w"(result) - : "0"(a), "w"(b), "x"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vmlal_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c) - { - int64x2_t result; -- __asm__ ("smlal2 %0.2d,%2.4s,%3.s[0]" -+ __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmlal_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c) - { - uint32x4_t result; -- __asm__ ("umlal2 %0.4s,%2.8h,%3.h[0]" -+ __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]" - : "=w"(result) - : "0"(a), "w"(b), "x"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vmlal_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c) - { - uint64x2_t result; -- __asm__ ("umlal2 %0.2d,%2.4s,%3.s[0]" -+ __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmlal_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) - { - int16x8_t result; -- __asm__ ("smlal2 %0.8h,%2.16b,%3.16b" -+ __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmlal_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) - { - int32x4_t result; -- __asm__ ("smlal2 %0.4s,%2.8h,%3.8h" -+ __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vmlal_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) - { - int64x2_t result; -- __asm__ ("smlal2 %0.2d,%2.4s,%3.4s" -+ __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmlal_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) - { - uint16x8_t result; -- __asm__ ("umlal2 %0.8h,%2.16b,%3.16b" -+ __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmlal_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) - { - uint32x4_t result; -- __asm__ ("umlal2 %0.4s,%2.8h,%3.8h" -+ __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vmlal_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) - { - uint64x2_t result; -- __asm__ ("umlal2 %0.2d,%2.4s,%3.4s" -+ __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --#define vmlal_lane_s16(a, b, c, d) \ -+#define vmlsl_lane_s16(a, b, c, d) \ - __extension__ \ - ({ \ - int16x4_t c_ = (c); \ - int16x4_t b_ = (b); \ - int32x4_t a_ = (a); \ - int32x4_t result; \ -- __asm__ ("smlal %0.4s,%2.4h,%3.h[%4]" \ -+ __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmlal_lane_s32(a, b, c, d) \ -+#define vmlsl_lane_s32(a, b, c, d) \ - __extension__ \ - ({ \ - int32x2_t c_ = (c); \ - int32x2_t b_ = (b); \ - int64x2_t a_ = (a); \ - int64x2_t result; \ -- __asm__ ("smlal %0.2d,%2.2s,%3.s[%4]" \ -+ __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmlal_lane_u16(a, b, c, d) \ -+#define vmlsl_lane_u16(a, b, c, d) \ - __extension__ \ - ({ \ - uint16x4_t c_ = (c); \ - uint16x4_t b_ = (b); \ - uint32x4_t a_ = (a); \ - uint32x4_t result; \ -- __asm__ ("umlal %0.4s,%2.4h,%3.h[%4]" \ -+ __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmlal_lane_u32(a, b, c, d) \ -+#define vmlsl_lane_u32(a, b, c, d) \ - __extension__ \ - ({ \ - uint32x2_t c_ = (c); \ - uint32x2_t b_ = (b); \ - uint64x2_t a_ = (a); \ - uint64x2_t result; \ -- __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \ -+ __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmlal_laneq_s16(a, b, c, d) \ -+#define vmlsl_laneq_s16(a, b, c, d) \ - __extension__ \ - ({ \ - int16x8_t c_ = (c); \ - int16x4_t b_ = (b); \ - int32x4_t a_ = (a); \ - int32x4_t result; \ -- __asm__ ("smlal %0.4s, %2.4h, %3.h[%4]" \ -+ __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmlal_laneq_s32(a, b, c, d) \ -+#define vmlsl_laneq_s32(a, b, c, d) \ - __extension__ \ - ({ \ - int32x4_t c_ = (c); \ - int32x2_t b_ = (b); \ - int64x2_t a_ = (a); \ - int64x2_t result; \ -- __asm__ ("smlal %0.2d, %2.2s, %3.s[%4]" \ -+ __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmlal_laneq_u16(a, b, c, d) \ -+#define vmlsl_laneq_u16(a, b, c, d) \ - __extension__ \ - ({ \ - uint16x8_t c_ = (c); \ - uint16x4_t b_ = (b); \ - uint32x4_t a_ = (a); \ - uint32x4_t result; \ -- __asm__ ("umlal %0.4s, %2.4h, %3.h[%4]" \ -+ __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmlal_laneq_u32(a, b, c, d) \ -+#define vmlsl_laneq_u32(a, b, c, d) \ - __extension__ \ - ({ \ - uint32x4_t c_ = (c); \ - uint32x2_t b_ = (b); \ - uint64x2_t a_ = (a); \ - uint64x2_t result; \ -- __asm__ ("umlal %0.2d, %2.2s, %3.s[%4]" \ -+ __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \ - : "=w"(result) \ - : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ - : /* No clobbers */); \ - result; \ - }) - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmlal_n_s16 (int32x4_t a, int16x4_t b, int16_t c) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c) - { - int32x4_t result; -- __asm__ ("smlal %0.4s,%2.4h,%3.h[0]" -+ __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]" - : "=w"(result) - : "0"(a), "w"(b), "x"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vmlal_n_s32 (int64x2_t a, int32x2_t b, int32_t c) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c) - { - int64x2_t result; -- __asm__ ("smlal %0.2d,%2.2s,%3.s[0]" -+ __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmlal_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c) - { - uint32x4_t result; -- __asm__ ("umlal %0.4s,%2.4h,%3.h[0]" -+ __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]" - : "=w"(result) - : "0"(a), "w"(b), "x"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vmlal_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c) - { - uint64x2_t result; -- __asm__ ("umlal %0.2d,%2.2s,%3.s[0]" -+ __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmlal_s8 (int16x8_t a, int8x8_t b, int8x8_t c) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c) - { - int16x8_t result; -- __asm__ ("smlal %0.8h,%2.8b,%3.8b" -+ __asm__ ("smlsl %0.8h, %2.8b, %3.8b" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmlal_s16 (int32x4_t a, int16x4_t b, int16x4_t c) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c) - { - int32x4_t result; -- __asm__ ("smlal %0.4s,%2.4h,%3.4h" -+ __asm__ ("smlsl %0.4s, %2.4h, %3.4h" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vmlal_s32 (int64x2_t a, int32x2_t b, int32x2_t c) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c) - { - int64x2_t result; -- __asm__ ("smlal %0.2d,%2.2s,%3.2s" -+ __asm__ ("smlsl %0.2d, %2.2s, %3.2s" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmlal_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) - { - uint16x8_t result; -- __asm__ ("umlal %0.8h,%2.8b,%3.8b" -+ __asm__ ("umlsl %0.8h, %2.8b, %3.8b" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmlal_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) - { - uint32x4_t result; -- __asm__ ("umlal %0.4s,%2.4h,%3.4h" -+ __asm__ ("umlsl %0.4s, %2.4h, %3.4h" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vmlal_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) - { - uint64x2_t result; -- __asm__ ("umlal %0.2d,%2.2s,%3.2s" -+ __asm__ ("umlsl %0.2d, %2.2s, %3.2s" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vmlaq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) - { - float32x4_t result; - float32x4_t t1; -- __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fadd %0.4s, %0.4s, %1.4s" -+ __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s" - : "=w"(result), "=w"(t1) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmlaq_n_s16 (int16x8_t a, int16x8_t b, int16_t c) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c) - { - int16x8_t result; -- __asm__ ("mla %0.8h,%2.8h,%3.h[0]" -+ __asm__ ("mls %0.8h, %2.8h, %3.h[0]" - : "=w"(result) - : "0"(a), "w"(b), "x"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmlaq_n_s32 (int32x4_t a, int32x4_t b, int32_t c) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c) - { - int32x4_t result; -- __asm__ ("mla %0.4s,%2.4s,%3.s[0]" -+ __asm__ ("mls %0.4s, %2.4s, %3.s[0]" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmlaq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c) - { - uint16x8_t result; -- __asm__ ("mla %0.8h,%2.8h,%3.h[0]" -+ __asm__ ("mls %0.8h, %2.8h, %3.h[0]" - : "=w"(result) - : "0"(a), "w"(b), "x"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmlaq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c) - { - uint32x4_t result; -- __asm__ ("mla %0.4s,%2.4s,%3.s[0]" -+ __asm__ ("mls %0.4s, %2.4s, %3.s[0]" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vmlaq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) - { - int8x16_t result; -- __asm__ ("mla %0.16b, %2.16b, %3.16b" -+ __asm__ ("mls %0.16b,%2.16b,%3.16b" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmlaq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) - { - int16x8_t result; -- __asm__ ("mla %0.8h, %2.8h, %3.8h" -+ __asm__ ("mls %0.8h,%2.8h,%3.8h" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmlaq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) - { - int32x4_t result; -- __asm__ ("mla %0.4s, %2.4s, %3.4s" -+ __asm__ ("mls %0.4s,%2.4s,%3.4s" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vmlaq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) - { - uint8x16_t result; -- __asm__ ("mla %0.16b, %2.16b, %3.16b" -+ __asm__ ("mls %0.16b,%2.16b,%3.16b" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmlaq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) - { - uint16x8_t result; -- __asm__ ("mla %0.8h, %2.8h, %3.8h" -+ __asm__ ("mls %0.8h,%2.8h,%3.8h" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmlaq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) - { - uint32x4_t result; -- __asm__ ("mla %0.4s, %2.4s, %3.4s" -+ __asm__ ("mls %0.4s,%2.4s,%3.4s" - : "=w"(result) - : "0"(a), "w"(b), "w"(c) - : /* No clobbers */); - return result; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vmls_n_f32 (float32x2_t a, float32x2_t b, float32_t c) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovl_high_s8 (int8x16_t a) - { -- float32x2_t result; -- float32x2_t t1; -- __asm__ ("fmul %1.2s, %3.2s, %4.s[0]; fsub %0.2s, %0.2s, %1.2s" -- : "=w"(result), "=w"(t1) -- : "0"(a), "w"(b), "w"(c) -+ int16x8_t result; -+ __asm__ ("sshll2 %0.8h,%1.16b,#0" -+ : "=w"(result) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vmls_n_s16 (int16x4_t a, int16x4_t b, int16_t c) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovl_high_s16 (int16x8_t a) - { -- int16x4_t result; -- __asm__ ("mls %0.4h, %2.4h, %3.h[0]" -+ int32x4_t result; -+ __asm__ ("sshll2 %0.4s,%1.8h,#0" - : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vmls_n_s32 (int32x2_t a, int32x2_t b, int32_t c) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovl_high_s32 (int32x4_t a) - { -- int32x2_t result; -- __asm__ ("mls %0.2s, %2.2s, %3.s[0]" -+ int64x2_t result; -+ __asm__ ("sshll2 %0.2d,%1.4s,#0" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vmls_n_u16 (uint16x4_t a, uint16x4_t b, uint16_t c) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovl_high_u8 (uint8x16_t a) - { -- uint16x4_t result; -- __asm__ ("mls %0.4h, %2.4h, %3.h[0]" -+ uint16x8_t result; -+ __asm__ ("ushll2 %0.8h,%1.16b,#0" - : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vmls_n_u32 (uint32x2_t a, uint32x2_t b, uint32_t c) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovl_high_u16 (uint16x8_t a) - { -- uint32x2_t result; -- __asm__ ("mls %0.2s, %2.2s, %3.s[0]" -+ uint32x4_t result; -+ __asm__ ("ushll2 %0.4s,%1.8h,#0" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vmls_s8 (int8x8_t a, int8x8_t b, int8x8_t c) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovl_high_u32 (uint32x4_t a) - { -- int8x8_t result; -- __asm__ ("mls %0.8b,%2.8b,%3.8b" -+ uint64x2_t result; -+ __asm__ ("ushll2 %0.2d,%1.4s,#0" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vmls_s16 (int16x4_t a, int16x4_t b, int16x4_t c) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovl_s8 (int8x8_t a) - { -- int16x4_t result; -- __asm__ ("mls %0.4h,%2.4h,%3.4h" -+ int16x8_t result; -+ __asm__ ("sshll %0.8h,%1.8b,#0" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vmls_s32 (int32x2_t a, int32x2_t b, int32x2_t c) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovl_s16 (int16x4_t a) - { -- int32x2_t result; -- __asm__ ("mls %0.2s,%2.2s,%3.2s" -+ int32x4_t result; -+ __asm__ ("sshll %0.4s,%1.4h,#0" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vmls_u8 (uint8x8_t a, uint8x8_t b, uint8x8_t c) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovl_s32 (int32x2_t a) - { -- uint8x8_t result; -- __asm__ ("mls %0.8b,%2.8b,%3.8b" -+ int64x2_t result; -+ __asm__ ("sshll %0.2d,%1.2s,#0" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vmls_u16 (uint16x4_t a, uint16x4_t b, uint16x4_t c) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovl_u8 (uint8x8_t a) - { -- uint16x4_t result; -- __asm__ ("mls %0.4h,%2.4h,%3.4h" -+ uint16x8_t result; -+ __asm__ ("ushll %0.8h,%1.8b,#0" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vmls_u32 (uint32x2_t a, uint32x2_t b, uint32x2_t c) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovl_u16 (uint16x4_t a) - { -- uint32x2_t result; -- __asm__ ("mls %0.2s,%2.2s,%3.2s" -+ uint32x4_t result; -+ __asm__ ("ushll %0.4s,%1.4h,#0" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --#define vmlsl_high_lane_s16(a, b, c, d) \ -- __extension__ \ -- ({ \ -- int16x4_t c_ = (c); \ -- int16x8_t b_ = (b); \ -- int32x4_t a_ = (a); \ -- int32x4_t result; \ -- __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \ -- : "=w"(result) \ -- : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovl_u32 (uint32x2_t a) -+{ -+ uint64x2_t result; -+ __asm__ ("ushll %0.2d,%1.2s,#0" -+ : "=w"(result) -+ : "w"(a) -+ : /* No clobbers */); -+ return result; -+} - --#define vmlsl_high_lane_s32(a, b, c, d) \ -- __extension__ \ -- ({ \ -- int32x2_t c_ = (c); \ -- int32x4_t b_ = (b); \ -- int64x2_t a_ = (a); \ -- int64x2_t result; \ -- __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \ -- : "=w"(result) \ -- : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovn_high_s16 (int8x8_t a, int16x8_t b) -+{ -+ int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); -+ __asm__ ("xtn2 %0.16b,%1.8h" -+ : "+w"(result) -+ : "w"(b) -+ : /* No clobbers */); -+ return result; -+} - --#define vmlsl_high_lane_u16(a, b, c, d) \ -- __extension__ \ -- ({ \ -- uint16x4_t c_ = (c); \ -- uint16x8_t b_ = (b); \ -- uint32x4_t a_ = (a); \ -- uint32x4_t result; \ -- __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \ -- : "=w"(result) \ -- : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovn_high_s32 (int16x4_t a, int32x4_t b) -+{ -+ int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); -+ __asm__ ("xtn2 %0.8h,%1.4s" -+ : "+w"(result) -+ : "w"(b) -+ : /* No clobbers */); -+ return result; -+} - --#define vmlsl_high_lane_u32(a, b, c, d) \ -- __extension__ \ -- ({ \ -- uint32x2_t c_ = (c); \ -- uint32x4_t b_ = (b); \ -- uint64x2_t a_ = (a); \ -- uint64x2_t result; \ -- __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \ -- : "=w"(result) \ -- : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovn_high_s64 (int32x2_t a, int64x2_t b) -+{ -+ int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); -+ __asm__ ("xtn2 %0.4s,%1.2d" -+ : "+w"(result) -+ : "w"(b) -+ : /* No clobbers */); -+ return result; -+} - --#define vmlsl_high_laneq_s16(a, b, c, d) \ -- __extension__ \ -- ({ \ -- int16x8_t c_ = (c); \ -- int16x8_t b_ = (b); \ -- int32x4_t a_ = (a); \ -- int32x4_t result; \ -- __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[%4]" \ -- : "=w"(result) \ -- : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovn_high_u16 (uint8x8_t a, uint16x8_t b) -+{ -+ uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); -+ __asm__ ("xtn2 %0.16b,%1.8h" -+ : "+w"(result) -+ : "w"(b) -+ : /* No clobbers */); -+ return result; -+} - --#define vmlsl_high_laneq_s32(a, b, c, d) \ -- __extension__ \ -- ({ \ -- int32x4_t c_ = (c); \ -- int32x4_t b_ = (b); \ -- int64x2_t a_ = (a); \ -- int64x2_t result; \ -- __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[%4]" \ -- : "=w"(result) \ -- : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vmlsl_high_laneq_u16(a, b, c, d) \ -- __extension__ \ -- ({ \ -- uint16x8_t c_ = (c); \ -- uint16x8_t b_ = (b); \ -- uint32x4_t a_ = (a); \ -- uint32x4_t result; \ -- __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[%4]" \ -- : "=w"(result) \ -- : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vmlsl_high_laneq_u32(a, b, c, d) \ -- __extension__ \ -- ({ \ -- uint32x4_t c_ = (c); \ -- uint32x4_t b_ = (b); \ -- uint64x2_t a_ = (a); \ -- uint64x2_t result; \ -- __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[%4]" \ -- : "=w"(result) \ -- : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmlsl_high_n_s16 (int32x4_t a, int16x8_t b, int16_t c) --{ -- int32x4_t result; -- __asm__ ("smlsl2 %0.4s, %2.8h, %3.h[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vmlsl_high_n_s32 (int64x2_t a, int32x4_t b, int32_t c) --{ -- int64x2_t result; -- __asm__ ("smlsl2 %0.2d, %2.4s, %3.s[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmlsl_high_n_u16 (uint32x4_t a, uint16x8_t b, uint16_t c) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovn_high_u32 (uint16x4_t a, uint32x4_t b) - { -- uint32x4_t result; -- __asm__ ("umlsl2 %0.4s, %2.8h, %3.h[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); -+ __asm__ ("xtn2 %0.8h,%1.4s" -+ : "+w"(result) -+ : "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vmlsl_high_n_u32 (uint64x2_t a, uint32x4_t b, uint32_t c) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovn_high_u64 (uint32x2_t a, uint64x2_t b) - { -- uint64x2_t result; -- __asm__ ("umlsl2 %0.2d, %2.4s, %3.s[0]" -- : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); -+ __asm__ ("xtn2 %0.4s,%1.2d" -+ : "+w"(result) -+ : "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmlsl_high_s8 (int16x8_t a, int8x16_t b, int8x16_t c) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovn_s16 (int16x8_t a) - { -- int16x8_t result; -- __asm__ ("smlsl2 %0.8h,%2.16b,%3.16b" -+ int8x8_t result; -+ __asm__ ("xtn %0.8b,%1.8h" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmlsl_high_s16 (int32x4_t a, int16x8_t b, int16x8_t c) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovn_s32 (int32x4_t a) - { -- int32x4_t result; -- __asm__ ("smlsl2 %0.4s,%2.8h,%3.8h" -+ int16x4_t result; -+ __asm__ ("xtn %0.4h,%1.4s" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vmlsl_high_s32 (int64x2_t a, int32x4_t b, int32x4_t c) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovn_s64 (int64x2_t a) - { -- int64x2_t result; -- __asm__ ("smlsl2 %0.2d,%2.4s,%3.4s" -+ int32x2_t result; -+ __asm__ ("xtn %0.2s,%1.2d" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmlsl_high_u8 (uint16x8_t a, uint8x16_t b, uint8x16_t c) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovn_u16 (uint16x8_t a) - { -- uint16x8_t result; -- __asm__ ("umlsl2 %0.8h,%2.16b,%3.16b" -+ uint8x8_t result; -+ __asm__ ("xtn %0.8b,%1.8h" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmlsl_high_u16 (uint32x4_t a, uint16x8_t b, uint16x8_t c) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovn_u32 (uint32x4_t a) - { -- uint32x4_t result; -- __asm__ ("umlsl2 %0.4s,%2.8h,%3.8h" -+ uint16x4_t result; -+ __asm__ ("xtn %0.4h,%1.4s" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vmlsl_high_u32 (uint64x2_t a, uint32x4_t b, uint32x4_t c) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovn_u64 (uint64x2_t a) - { -- uint64x2_t result; -- __asm__ ("umlsl2 %0.2d,%2.4s,%3.4s" -+ uint32x2_t result; -+ __asm__ ("xtn %0.2s,%1.2d" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --#define vmlsl_lane_s16(a, b, c, d) \ -+#define vmull_high_lane_s16(a, b, c) \ - __extension__ \ - ({ \ -- int16x4_t c_ = (c); \ - int16x4_t b_ = (b); \ -- int32x4_t a_ = (a); \ -+ int16x8_t a_ = (a); \ - int32x4_t result; \ -- __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \ -+ __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \ - : "=w"(result) \ -- : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ -+ : "w"(a_), "x"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmlsl_lane_s32(a, b, c, d) \ -+#define vmull_high_lane_s32(a, b, c) \ - __extension__ \ - ({ \ -- int32x2_t c_ = (c); \ - int32x2_t b_ = (b); \ -- int64x2_t a_ = (a); \ -+ int32x4_t a_ = (a); \ - int64x2_t result; \ -- __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \ -+ __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \ - : "=w"(result) \ -- : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ -+ : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmlsl_lane_u16(a, b, c, d) \ -+#define vmull_high_lane_u16(a, b, c) \ - __extension__ \ - ({ \ -- uint16x4_t c_ = (c); \ - uint16x4_t b_ = (b); \ -- uint32x4_t a_ = (a); \ -+ uint16x8_t a_ = (a); \ - uint32x4_t result; \ -- __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \ -+ __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \ - : "=w"(result) \ -- : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ -+ : "w"(a_), "x"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmlsl_lane_u32(a, b, c, d) \ -+#define vmull_high_lane_u32(a, b, c) \ - __extension__ \ - ({ \ -- uint32x2_t c_ = (c); \ - uint32x2_t b_ = (b); \ -- uint64x2_t a_ = (a); \ -+ uint32x4_t a_ = (a); \ - uint64x2_t result; \ -- __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \ -+ __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \ - : "=w"(result) \ -- : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ -+ : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmlsl_laneq_s16(a, b, c, d) \ -+#define vmull_high_laneq_s16(a, b, c) \ - __extension__ \ - ({ \ -- int16x8_t c_ = (c); \ -- int16x4_t b_ = (b); \ -- int32x4_t a_ = (a); \ -+ int16x8_t b_ = (b); \ -+ int16x8_t a_ = (a); \ - int32x4_t result; \ -- __asm__ ("smlsl %0.4s, %2.4h, %3.h[%4]" \ -+ __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \ - : "=w"(result) \ -- : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ -+ : "w"(a_), "x"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmlsl_laneq_s32(a, b, c, d) \ -+#define vmull_high_laneq_s32(a, b, c) \ - __extension__ \ - ({ \ -- int32x4_t c_ = (c); \ -- int32x2_t b_ = (b); \ -- int64x2_t a_ = (a); \ -+ int32x4_t b_ = (b); \ -+ int32x4_t a_ = (a); \ - int64x2_t result; \ -- __asm__ ("smlsl %0.2d, %2.2s, %3.s[%4]" \ -+ __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \ - : "=w"(result) \ -- : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ -+ : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmlsl_laneq_u16(a, b, c, d) \ -+#define vmull_high_laneq_u16(a, b, c) \ - __extension__ \ - ({ \ -- uint16x8_t c_ = (c); \ -- uint16x4_t b_ = (b); \ -- uint32x4_t a_ = (a); \ -+ uint16x8_t b_ = (b); \ -+ uint16x8_t a_ = (a); \ - uint32x4_t result; \ -- __asm__ ("umlsl %0.4s, %2.4h, %3.h[%4]" \ -+ __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \ - : "=w"(result) \ -- : "0"(a_), "w"(b_), "x"(c_), "i"(d) \ -+ : "w"(a_), "x"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmlsl_laneq_u32(a, b, c, d) \ -+#define vmull_high_laneq_u32(a, b, c) \ - __extension__ \ - ({ \ -- uint32x4_t c_ = (c); \ -- uint32x2_t b_ = (b); \ -- uint64x2_t a_ = (a); \ -+ uint32x4_t b_ = (b); \ -+ uint32x4_t a_ = (a); \ - uint64x2_t result; \ -- __asm__ ("umlsl %0.2d, %2.2s, %3.s[%4]" \ -+ __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \ - : "=w"(result) \ -- : "0"(a_), "w"(b_), "w"(c_), "i"(d) \ -+ : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmlsl_n_s16 (int32x4_t a, int16x4_t b, int16_t c) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_high_n_s16 (int16x8_t a, int16_t b) - { - int32x4_t result; -- __asm__ ("smlsl %0.4s, %2.4h, %3.h[0]" -+ __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]" - : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ : "w"(a), "x"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vmlsl_n_s32 (int64x2_t a, int32x2_t b, int32_t c) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_high_n_s32 (int32x4_t a, int32_t b) - { - int64x2_t result; -- __asm__ ("smlsl %0.2d, %2.2s, %3.s[0]" -+ __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmlsl_n_u16 (uint32x4_t a, uint16x4_t b, uint16_t c) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_high_n_u16 (uint16x8_t a, uint16_t b) - { - uint32x4_t result; -- __asm__ ("umlsl %0.4s, %2.4h, %3.h[0]" -+ __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]" - : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ : "w"(a), "x"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vmlsl_n_u32 (uint64x2_t a, uint32x2_t b, uint32_t c) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_high_n_u32 (uint32x4_t a, uint32_t b) - { - uint64x2_t result; -- __asm__ ("umlsl %0.2d, %2.2s, %3.s[0]" -+ __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmlsl_s8 (int16x8_t a, int8x8_t b, int8x8_t c) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_high_p8 (poly8x16_t a, poly8x16_t b) - { -- int16x8_t result; -- __asm__ ("smlsl %0.8h, %2.8b, %3.8b" -+ poly16x8_t result; -+ __asm__ ("pmull2 %0.8h,%1.16b,%2.16b" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmlsl_s16 (int32x4_t a, int16x4_t b, int16x4_t c) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_high_s8 (int8x16_t a, int8x16_t b) - { -- int32x4_t result; -- __asm__ ("smlsl %0.4s, %2.4h, %3.4h" -+ int16x8_t result; -+ __asm__ ("smull2 %0.8h,%1.16b,%2.16b" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vmlsl_s32 (int64x2_t a, int32x2_t b, int32x2_t c) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_high_s16 (int16x8_t a, int16x8_t b) - { -- int64x2_t result; -- __asm__ ("smlsl %0.2d, %2.2s, %3.2s" -+ int32x4_t result; -+ __asm__ ("smull2 %0.4s,%1.8h,%2.8h" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmlsl_u8 (uint16x8_t a, uint8x8_t b, uint8x8_t c) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_high_s32 (int32x4_t a, int32x4_t b) - { -- uint16x8_t result; -- __asm__ ("umlsl %0.8h, %2.8b, %3.8b" -+ int64x2_t result; -+ __asm__ ("smull2 %0.2d,%1.4s,%2.4s" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmlsl_u16 (uint32x4_t a, uint16x4_t b, uint16x4_t c) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_high_u8 (uint8x16_t a, uint8x16_t b) - { -- uint32x4_t result; -- __asm__ ("umlsl %0.4s, %2.4h, %3.4h" -+ uint16x8_t result; -+ __asm__ ("umull2 %0.8h,%1.16b,%2.16b" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vmlsl_u32 (uint64x2_t a, uint32x2_t b, uint32x2_t c) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_high_u16 (uint16x8_t a, uint16x8_t b) - { -- uint64x2_t result; -- __asm__ ("umlsl %0.2d, %2.2s, %3.2s" -+ uint32x4_t result; -+ __asm__ ("umull2 %0.4s,%1.8h,%2.8h" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vmlsq_n_f32 (float32x4_t a, float32x4_t b, float32_t c) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_high_u32 (uint32x4_t a, uint32x4_t b) - { -- float32x4_t result; -- float32x4_t t1; -- __asm__ ("fmul %1.4s, %3.4s, %4.s[0]; fsub %0.4s, %0.4s, %1.4s" -- : "=w"(result), "=w"(t1) -- : "0"(a), "w"(b), "w"(c) -+ uint64x2_t result; -+ __asm__ ("umull2 %0.2d,%1.4s,%2.4s" -+ : "=w"(result) -+ : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmlsq_n_s16 (int16x8_t a, int16x8_t b, int16_t c) -+#define vmull_lane_s16(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ int16x4_t b_ = (b); \ -+ int16x4_t a_ = (a); \ -+ int32x4_t result; \ -+ __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \ -+ : "=w"(result) \ -+ : "w"(a_), "x"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vmull_lane_s32(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ int32x2_t b_ = (b); \ -+ int32x2_t a_ = (a); \ -+ int64x2_t result; \ -+ __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \ -+ : "=w"(result) \ -+ : "w"(a_), "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vmull_lane_u16(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ uint16x4_t b_ = (b); \ -+ uint16x4_t a_ = (a); \ -+ uint32x4_t result; \ -+ __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \ -+ : "=w"(result) \ -+ : "w"(a_), "x"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vmull_lane_u32(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ uint32x2_t b_ = (b); \ -+ uint32x2_t a_ = (a); \ -+ uint64x2_t result; \ -+ __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \ -+ : "=w"(result) \ -+ : "w"(a_), "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vmull_laneq_s16(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ int16x8_t b_ = (b); \ -+ int16x4_t a_ = (a); \ -+ int32x4_t result; \ -+ __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \ -+ : "=w"(result) \ -+ : "w"(a_), "x"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vmull_laneq_s32(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ int32x4_t b_ = (b); \ -+ int32x2_t a_ = (a); \ -+ int64x2_t result; \ -+ __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \ -+ : "=w"(result) \ -+ : "w"(a_), "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vmull_laneq_u16(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ uint16x8_t b_ = (b); \ -+ uint16x4_t a_ = (a); \ -+ uint32x4_t result; \ -+ __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \ -+ : "=w"(result) \ -+ : "w"(a_), "x"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vmull_laneq_u32(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ uint32x4_t b_ = (b); \ -+ uint32x2_t a_ = (a); \ -+ uint64x2_t result; \ -+ __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \ -+ : "=w"(result) \ -+ : "w"(a_), "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_n_s16 (int16x4_t a, int16_t b) - { -- int16x8_t result; -- __asm__ ("mls %0.8h, %2.8h, %3.h[0]" -+ int32x4_t result; -+ __asm__ ("smull %0.4s,%1.4h,%2.h[0]" - : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ : "w"(a), "x"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmlsq_n_s32 (int32x4_t a, int32x4_t b, int32_t c) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_n_s32 (int32x2_t a, int32_t b) - { -- int32x4_t result; -- __asm__ ("mls %0.4s, %2.4s, %3.s[0]" -+ int64x2_t result; -+ __asm__ ("smull %0.2d,%1.2s,%2.s[0]" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmlsq_n_u16 (uint16x8_t a, uint16x8_t b, uint16_t c) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_n_u16 (uint16x4_t a, uint16_t b) - { -- uint16x8_t result; -- __asm__ ("mls %0.8h, %2.8h, %3.h[0]" -+ uint32x4_t result; -+ __asm__ ("umull %0.4s,%1.4h,%2.h[0]" - : "=w"(result) -- : "0"(a), "w"(b), "x"(c) -+ : "w"(a), "x"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmlsq_n_u32 (uint32x4_t a, uint32x4_t b, uint32_t c) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_n_u32 (uint32x2_t a, uint32_t b) - { -- uint32x4_t result; -- __asm__ ("mls %0.4s, %2.4s, %3.s[0]" -+ uint64x2_t result; -+ __asm__ ("umull %0.2d,%1.2s,%2.s[0]" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vmlsq_s8 (int8x16_t a, int8x16_t b, int8x16_t c) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_p8 (poly8x8_t a, poly8x8_t b) - { -- int8x16_t result; -- __asm__ ("mls %0.16b,%2.16b,%3.16b" -+ poly16x8_t result; -+ __asm__ ("pmull %0.8h, %1.8b, %2.8b" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmlsq_s16 (int16x8_t a, int16x8_t b, int16x8_t c) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_s8 (int8x8_t a, int8x8_t b) - { - int16x8_t result; -- __asm__ ("mls %0.8h,%2.8h,%3.8h" -+ __asm__ ("smull %0.8h, %1.8b, %2.8b" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmlsq_s32 (int32x4_t a, int32x4_t b, int32x4_t c) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_s16 (int16x4_t a, int16x4_t b) - { - int32x4_t result; -- __asm__ ("mls %0.4s,%2.4s,%3.4s" -+ __asm__ ("smull %0.4s, %1.4h, %2.4h" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vmlsq_u8 (uint8x16_t a, uint8x16_t b, uint8x16_t c) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_s32 (int32x2_t a, int32x2_t b) - { -- uint8x16_t result; -- __asm__ ("mls %0.16b,%2.16b,%3.16b" -+ int64x2_t result; -+ __asm__ ("smull %0.2d, %1.2s, %2.2s" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmlsq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_u8 (uint8x8_t a, uint8x8_t b) - { - uint16x8_t result; -- __asm__ ("mls %0.8h,%2.8h,%3.8h" -+ __asm__ ("umull %0.8h, %1.8b, %2.8b" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmlsq_u32 (uint32x4_t a, uint32x4_t b, uint32x4_t c) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_u16 (uint16x4_t a, uint16x4_t b) - { - uint32x4_t result; -- __asm__ ("mls %0.4s,%2.4s,%3.4s" -+ __asm__ ("umull %0.4s, %1.4h, %2.4h" - : "=w"(result) -- : "0"(a), "w"(b), "w"(c) -+ : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmovl_high_s8 (int8x16_t a) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_u32 (uint32x2_t a, uint32x2_t b) - { -- int16x8_t result; -- __asm__ ("sshll2 %0.8h,%1.16b,#0" -+ uint64x2_t result; -+ __asm__ ("umull %0.2d, %1.2s, %2.2s" - : "=w"(result) -- : "w"(a) -+ : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmovl_high_s16 (int16x8_t a) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpadal_s8 (int16x4_t a, int8x8_t b) - { -- int32x4_t result; -- __asm__ ("sshll2 %0.4s,%1.8h,#0" -+ int16x4_t result; -+ __asm__ ("sadalp %0.4h,%2.8b" - : "=w"(result) -- : "w"(a) -+ : "0"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vmovl_high_s32 (int32x4_t a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpadal_s16 (int32x2_t a, int16x4_t b) - { -- int64x2_t result; -- __asm__ ("sshll2 %0.2d,%1.4s,#0" -+ int32x2_t result; -+ __asm__ ("sadalp %0.2s,%2.4h" - : "=w"(result) -- : "w"(a) -+ : "0"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmovl_high_u8 (uint8x16_t a) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpadal_s32 (int64x1_t a, int32x2_t b) - { -- uint16x8_t result; -- __asm__ ("ushll2 %0.8h,%1.16b,#0" -+ int64x1_t result; -+ __asm__ ("sadalp %0.1d,%2.2s" - : "=w"(result) -- : "w"(a) -+ : "0"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmovl_high_u16 (uint16x8_t a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpadal_u8 (uint16x4_t a, uint8x8_t b) - { -- uint32x4_t result; -- __asm__ ("ushll2 %0.4s,%1.8h,#0" -+ uint16x4_t result; -+ __asm__ ("uadalp %0.4h,%2.8b" - : "=w"(result) -- : "w"(a) -+ : "0"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vmovl_high_u32 (uint32x4_t a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpadal_u16 (uint32x2_t a, uint16x4_t b) - { -- uint64x2_t result; -- __asm__ ("ushll2 %0.2d,%1.4s,#0" -+ uint32x2_t result; -+ __asm__ ("uadalp %0.2s,%2.4h" - : "=w"(result) -- : "w"(a) -+ : "0"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmovl_s8 (int8x8_t a) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpadal_u32 (uint64x1_t a, uint32x2_t b) -+{ -+ uint64x1_t result; -+ __asm__ ("uadalp %0.1d,%2.2s" -+ : "=w"(result) -+ : "0"(a), "w"(b) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpadalq_s8 (int16x8_t a, int8x16_t b) - { - int16x8_t result; -- __asm__ ("sshll %0.8h,%1.8b,#0" -+ __asm__ ("sadalp %0.8h,%2.16b" - : "=w"(result) -- : "w"(a) -+ : "0"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmovl_s16 (int16x4_t a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpadalq_s16 (int32x4_t a, int16x8_t b) - { - int32x4_t result; -- __asm__ ("sshll %0.4s,%1.4h,#0" -+ __asm__ ("sadalp %0.4s,%2.8h" - : "=w"(result) -- : "w"(a) -+ : "0"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vmovl_s32 (int32x2_t a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpadalq_s32 (int64x2_t a, int32x4_t b) - { - int64x2_t result; -- __asm__ ("sshll %0.2d,%1.2s,#0" -+ __asm__ ("sadalp %0.2d,%2.4s" - : "=w"(result) -- : "w"(a) -+ : "0"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmovl_u8 (uint8x8_t a) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpadalq_u8 (uint16x8_t a, uint8x16_t b) - { - uint16x8_t result; -- __asm__ ("ushll %0.8h,%1.8b,#0" -+ __asm__ ("uadalp %0.8h,%2.16b" - : "=w"(result) -- : "w"(a) -+ : "0"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmovl_u16 (uint16x4_t a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpadalq_u16 (uint32x4_t a, uint16x8_t b) - { - uint32x4_t result; -- __asm__ ("ushll %0.4s,%1.4h,#0" -+ __asm__ ("uadalp %0.4s,%2.8h" - : "=w"(result) -- : "w"(a) -+ : "0"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vmovl_u32 (uint32x2_t a) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpadalq_u32 (uint64x2_t a, uint32x4_t b) - { - uint64x2_t result; -- __asm__ ("ushll %0.2d,%1.2s,#0" -+ __asm__ ("uadalp %0.2d,%2.4s" - : "=w"(result) -- : "w"(a) -+ : "0"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vmovn_high_s16 (int8x8_t a, int16x8_t b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddl_s8 (int8x8_t a) - { -- int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); -- __asm__ ("xtn2 %0.16b,%1.8h" -- : "+w"(result) -- : "w"(b) -+ int16x4_t result; -+ __asm__ ("saddlp %0.4h,%1.8b" -+ : "=w"(result) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmovn_high_s32 (int16x4_t a, int32x4_t b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddl_s16 (int16x4_t a) - { -- int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); -- __asm__ ("xtn2 %0.8h,%1.4s" -- : "+w"(result) -- : "w"(b) -+ int32x2_t result; -+ __asm__ ("saddlp %0.2s,%1.4h" -+ : "=w"(result) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmovn_high_s64 (int32x2_t a, int64x2_t b) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddl_s32 (int32x2_t a) - { -- int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); -- __asm__ ("xtn2 %0.4s,%1.2d" -- : "+w"(result) -- : "w"(b) -+ int64x1_t result; -+ __asm__ ("saddlp %0.1d,%1.2s" -+ : "=w"(result) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vmovn_high_u16 (uint8x8_t a, uint16x8_t b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddl_u8 (uint8x8_t a) - { -- uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); -- __asm__ ("xtn2 %0.16b,%1.8h" -- : "+w"(result) -- : "w"(b) -+ uint16x4_t result; -+ __asm__ ("uaddlp %0.4h,%1.8b" -+ : "=w"(result) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmovn_high_u32 (uint16x4_t a, uint32x4_t b) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddl_u16 (uint16x4_t a) - { -- uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); -- __asm__ ("xtn2 %0.8h,%1.4s" -- : "+w"(result) -- : "w"(b) -+ uint32x2_t result; -+ __asm__ ("uaddlp %0.2s,%1.4h" -+ : "=w"(result) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmovn_high_u64 (uint32x2_t a, uint64x2_t b) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddl_u32 (uint32x2_t a) - { -- uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); -- __asm__ ("xtn2 %0.4s,%1.2d" -- : "+w"(result) -- : "w"(b) -+ uint64x1_t result; -+ __asm__ ("uaddlp %0.1d,%1.2s" -+ : "=w"(result) -+ : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vmovn_s16 (int16x8_t a) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddlq_s8 (int8x16_t a) - { -- int8x8_t result; -- __asm__ ("xtn %0.8b,%1.8h" -+ int16x8_t result; -+ __asm__ ("saddlp %0.8h,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vmovn_s32 (int32x4_t a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddlq_s16 (int16x8_t a) - { -- int16x4_t result; -- __asm__ ("xtn %0.4h,%1.4s" -+ int32x4_t result; -+ __asm__ ("saddlp %0.4s,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vmovn_s64 (int64x2_t a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddlq_s32 (int32x4_t a) - { -- int32x2_t result; -- __asm__ ("xtn %0.2s,%1.2d" -+ int64x2_t result; -+ __asm__ ("saddlp %0.2d,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vmovn_u16 (uint16x8_t a) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddlq_u8 (uint8x16_t a) - { -- uint8x8_t result; -- __asm__ ("xtn %0.8b,%1.8h" -+ uint16x8_t result; -+ __asm__ ("uaddlp %0.8h,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vmovn_u32 (uint32x4_t a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddlq_u16 (uint16x8_t a) - { -- uint16x4_t result; -- __asm__ ("xtn %0.4h,%1.4s" -+ uint32x4_t result; -+ __asm__ ("uaddlp %0.4s,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vmovn_u64 (uint64x2_t a) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddlq_u32 (uint32x4_t a) - { -- uint32x2_t result; -- __asm__ ("xtn %0.2s,%1.2d" -+ uint64x2_t result; -+ __asm__ ("uaddlp %0.2d,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vmul_n_f32 (float32x2_t a, float32_t b) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddq_s8 (int8x16_t a, int8x16_t b) - { -- float32x2_t result; -- __asm__ ("fmul %0.2s,%1.2s,%2.s[0]" -+ int8x16_t result; -+ __asm__ ("addp %0.16b,%1.16b,%2.16b" -+ : "=w"(result) -+ : "w"(a), "w"(b) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddq_s16 (int16x8_t a, int16x8_t b) -+{ -+ int16x8_t result; -+ __asm__ ("addp %0.8h,%1.8h,%2.8h" -+ : "=w"(result) -+ : "w"(a), "w"(b) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddq_s32 (int32x4_t a, int32x4_t b) -+{ -+ int32x4_t result; -+ __asm__ ("addp %0.4s,%1.4s,%2.4s" -+ : "=w"(result) -+ : "w"(a), "w"(b) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddq_s64 (int64x2_t a, int64x2_t b) -+{ -+ int64x2_t result; -+ __asm__ ("addp %0.2d,%1.2d,%2.2d" -+ : "=w"(result) -+ : "w"(a), "w"(b) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddq_u8 (uint8x16_t a, uint8x16_t b) -+{ -+ uint8x16_t result; -+ __asm__ ("addp %0.16b,%1.16b,%2.16b" -+ : "=w"(result) -+ : "w"(a), "w"(b) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddq_u16 (uint16x8_t a, uint16x8_t b) -+{ -+ uint16x8_t result; -+ __asm__ ("addp %0.8h,%1.8h,%2.8h" -+ : "=w"(result) -+ : "w"(a), "w"(b) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddq_u32 (uint32x4_t a, uint32x4_t b) -+{ -+ uint32x4_t result; -+ __asm__ ("addp %0.4s,%1.4s,%2.4s" -+ : "=w"(result) -+ : "w"(a), "w"(b) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddq_u64 (uint64x2_t a, uint64x2_t b) -+{ -+ uint64x2_t result; -+ __asm__ ("addp %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vmul_n_s16 (int16x4_t a, int16_t b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmulh_n_s16 (int16x4_t a, int16_t b) - { - int16x4_t result; -- __asm__ ("mul %0.4h,%1.4h,%2.h[0]" -+ __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]" - : "=w"(result) - : "w"(a), "x"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vmul_n_s32 (int32x2_t a, int32_t b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmulh_n_s32 (int32x2_t a, int32_t b) - { - int32x2_t result; -- __asm__ ("mul %0.2s,%1.2s,%2.s[0]" -+ __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vmul_n_u16 (uint16x4_t a, uint16_t b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmulhq_n_s16 (int16x8_t a, int16_t b) - { -- uint16x4_t result; -- __asm__ ("mul %0.4h,%1.4h,%2.h[0]" -+ int16x8_t result; -+ __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]" - : "=w"(result) - : "w"(a), "x"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vmul_n_u32 (uint32x2_t a, uint32_t b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmulhq_n_s32 (int32x4_t a, int32_t b) - { -- uint32x2_t result; -- __asm__ ("mul %0.2s,%1.2s,%2.s[0]" -+ int32x4_t result; -+ __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --#define vmull_high_lane_s16(a, b, c) \ -- __extension__ \ -- ({ \ -- int16x4_t b_ = (b); \ -- int16x8_t a_ = (a); \ -- int32x4_t result; \ -- __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \ -- : "=w"(result) \ -- : "w"(a_), "x"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovn_high_s16 (int8x8_t a, int16x8_t b) -+{ -+ int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); -+ __asm__ ("sqxtn2 %0.16b, %1.8h" -+ : "+w"(result) -+ : "w"(b) -+ : /* No clobbers */); -+ return result; -+} - --#define vmull_high_lane_s32(a, b, c) \ -- __extension__ \ -- ({ \ -- int32x2_t b_ = (b); \ -- int32x4_t a_ = (a); \ -- int64x2_t result; \ -- __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vmull_high_lane_u16(a, b, c) \ -- __extension__ \ -- ({ \ -- uint16x4_t b_ = (b); \ -- uint16x8_t a_ = (a); \ -- uint32x4_t result; \ -- __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \ -- : "=w"(result) \ -- : "w"(a_), "x"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vmull_high_lane_u32(a, b, c) \ -- __extension__ \ -- ({ \ -- uint32x2_t b_ = (b); \ -- uint32x4_t a_ = (a); \ -- uint64x2_t result; \ -- __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vmull_high_laneq_s16(a, b, c) \ -- __extension__ \ -- ({ \ -- int16x8_t b_ = (b); \ -- int16x8_t a_ = (a); \ -- int32x4_t result; \ -- __asm__ ("smull2 %0.4s, %1.8h, %2.h[%3]" \ -- : "=w"(result) \ -- : "w"(a_), "x"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vmull_high_laneq_s32(a, b, c) \ -- __extension__ \ -- ({ \ -- int32x4_t b_ = (b); \ -- int32x4_t a_ = (a); \ -- int64x2_t result; \ -- __asm__ ("smull2 %0.2d, %1.4s, %2.s[%3]" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vmull_high_laneq_u16(a, b, c) \ -- __extension__ \ -- ({ \ -- uint16x8_t b_ = (b); \ -- uint16x8_t a_ = (a); \ -- uint32x4_t result; \ -- __asm__ ("umull2 %0.4s, %1.8h, %2.h[%3]" \ -- : "=w"(result) \ -- : "w"(a_), "x"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vmull_high_laneq_u32(a, b, c) \ -- __extension__ \ -- ({ \ -- uint32x4_t b_ = (b); \ -- uint32x4_t a_ = (a); \ -- uint64x2_t result; \ -- __asm__ ("umull2 %0.2d, %1.4s, %2.s[%3]" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovn_high_s32 (int16x4_t a, int32x4_t b) -+{ -+ int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); -+ __asm__ ("sqxtn2 %0.8h, %1.4s" -+ : "+w"(result) -+ : "w"(b) -+ : /* No clobbers */); -+ return result; -+} - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmull_high_n_s16 (int16x8_t a, int16_t b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovn_high_s64 (int32x2_t a, int64x2_t b) - { -- int32x4_t result; -- __asm__ ("smull2 %0.4s,%1.8h,%2.h[0]" -- : "=w"(result) -- : "w"(a), "x"(b) -+ int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); -+ __asm__ ("sqxtn2 %0.4s, %1.2d" -+ : "+w"(result) -+ : "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vmull_high_n_s32 (int32x4_t a, int32_t b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovn_high_u16 (uint8x8_t a, uint16x8_t b) - { -- int64x2_t result; -- __asm__ ("smull2 %0.2d,%1.4s,%2.s[0]" -- : "=w"(result) -- : "w"(a), "w"(b) -+ uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); -+ __asm__ ("uqxtn2 %0.16b, %1.8h" -+ : "+w"(result) -+ : "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmull_high_n_u16 (uint16x8_t a, uint16_t b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovn_high_u32 (uint16x4_t a, uint32x4_t b) - { -- uint32x4_t result; -- __asm__ ("umull2 %0.4s,%1.8h,%2.h[0]" -- : "=w"(result) -- : "w"(a), "x"(b) -+ uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); -+ __asm__ ("uqxtn2 %0.8h, %1.4s" -+ : "+w"(result) -+ : "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vmull_high_n_u32 (uint32x4_t a, uint32_t b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovn_high_u64 (uint32x2_t a, uint64x2_t b) - { -- uint64x2_t result; -- __asm__ ("umull2 %0.2d,%1.4s,%2.s[0]" -- : "=w"(result) -- : "w"(a), "w"(b) -+ uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); -+ __asm__ ("uqxtn2 %0.4s, %1.2d" -+ : "+w"(result) -+ : "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vmull_high_p8 (poly8x16_t a, poly8x16_t b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovun_high_s16 (uint8x8_t a, int16x8_t b) - { -- poly16x8_t result; -- __asm__ ("pmull2 %0.8h,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); -+ __asm__ ("sqxtun2 %0.16b, %1.8h" -+ : "+w"(result) -+ : "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmull_high_s8 (int8x16_t a, int8x16_t b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovun_high_s32 (uint16x4_t a, int32x4_t b) - { -- int16x8_t result; -- __asm__ ("smull2 %0.8h,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -+ uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); -+ __asm__ ("sqxtun2 %0.8h, %1.4s" -+ : "+w"(result) -+ : "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmull_high_s16 (int16x8_t a, int16x8_t b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovun_high_s64 (uint32x2_t a, int64x2_t b) - { -- int32x4_t result; -- __asm__ ("smull2 %0.4s,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -+ uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); -+ __asm__ ("sqxtun2 %0.4s, %1.2d" -+ : "+w"(result) -+ : "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vmull_high_s32 (int32x4_t a, int32x4_t b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmulh_n_s16 (int16x4_t a, int16_t b) - { -- int64x2_t result; -- __asm__ ("smull2 %0.2d,%1.4s,%2.4s" -+ int16x4_t result; -+ __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]" - : "=w"(result) -- : "w"(a), "w"(b) -+ : "w"(a), "x"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmull_high_u8 (uint8x16_t a, uint8x16_t b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmulh_n_s32 (int32x2_t a, int32_t b) - { -- uint16x8_t result; -- __asm__ ("umull2 %0.8h,%1.16b,%2.16b" -+ int32x2_t result; -+ __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmull_high_u16 (uint16x8_t a, uint16x8_t b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmulhq_n_s16 (int16x8_t a, int16_t b) - { -- uint32x4_t result; -- __asm__ ("umull2 %0.4s,%1.8h,%2.8h" -+ int16x8_t result; -+ __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]" - : "=w"(result) -- : "w"(a), "w"(b) -+ : "w"(a), "x"(b) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vmull_high_u32 (uint32x4_t a, uint32x4_t b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmulhq_n_s32 (int32x4_t a, int32_t b) - { -- uint64x2_t result; -- __asm__ ("umull2 %0.2d,%1.4s,%2.4s" -+ int32x4_t result; -+ __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; - } - --#define vmull_lane_s16(a, b, c) \ -+#define vqrshrn_high_n_s16(a, b, c) \ - __extension__ \ - ({ \ -- int16x4_t b_ = (b); \ -+ int16x8_t b_ = (b); \ -+ int8x8_t a_ = (a); \ -+ int8x16_t result = vcombine_s8 \ -+ (a_, vcreate_s8 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vqrshrn_high_n_s32(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ int32x4_t b_ = (b); \ - int16x4_t a_ = (a); \ -- int32x4_t result; \ -- __asm__ ("smull %0.4s,%1.4h,%2.h[%3]" \ -- : "=w"(result) \ -- : "w"(a_), "x"(b_), "i"(c) \ -+ int16x8_t result = vcombine_s16 \ -+ (a_, vcreate_s16 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmull_lane_s32(a, b, c) \ -+#define vqrshrn_high_n_s64(a, b, c) \ - __extension__ \ - ({ \ -- int32x2_t b_ = (b); \ -+ int64x2_t b_ = (b); \ - int32x2_t a_ = (a); \ -- int64x2_t result; \ -- __asm__ ("smull %0.2d,%1.2s,%2.s[%3]" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -+ int32x4_t result = vcombine_s32 \ -+ (a_, vcreate_s32 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmull_lane_u16(a, b, c) \ -+#define vqrshrn_high_n_u16(a, b, c) \ - __extension__ \ - ({ \ -- uint16x4_t b_ = (b); \ -- uint16x4_t a_ = (a); \ -- uint32x4_t result; \ -- __asm__ ("umull %0.4s,%1.4h,%2.h[%3]" \ -- : "=w"(result) \ -- : "w"(a_), "x"(b_), "i"(c) \ -+ uint16x8_t b_ = (b); \ -+ uint8x8_t a_ = (a); \ -+ uint8x16_t result = vcombine_u8 \ -+ (a_, vcreate_u8 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmull_lane_u32(a, b, c) \ -+#define vqrshrn_high_n_u32(a, b, c) \ - __extension__ \ - ({ \ -- uint32x2_t b_ = (b); \ -+ uint32x4_t b_ = (b); \ -+ uint16x4_t a_ = (a); \ -+ uint16x8_t result = vcombine_u16 \ -+ (a_, vcreate_u16 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vqrshrn_high_n_u64(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ uint64x2_t b_ = (b); \ - uint32x2_t a_ = (a); \ -- uint64x2_t result; \ -- __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -+ uint32x4_t result = vcombine_u32 \ -+ (a_, vcreate_u32 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmull_laneq_s16(a, b, c) \ -+#define vqrshrun_high_n_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x8_t b_ = (b); \ -- int16x4_t a_ = (a); \ -- int32x4_t result; \ -- __asm__ ("smull %0.4s, %1.4h, %2.h[%3]" \ -- : "=w"(result) \ -- : "w"(a_), "x"(b_), "i"(c) \ -+ uint8x8_t a_ = (a); \ -+ uint8x16_t result = vcombine_u8 \ -+ (a_, vcreate_u8 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmull_laneq_s32(a, b, c) \ -+#define vqrshrun_high_n_s32(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ int32x4_t b_ = (b); \ -+ uint16x4_t a_ = (a); \ -+ uint16x8_t result = vcombine_u16 \ -+ (a_, vcreate_u16 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vqrshrun_high_n_s64(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ int64x2_t b_ = (b); \ -+ uint32x2_t a_ = (a); \ -+ uint32x4_t result = vcombine_u32 \ -+ (a_, vcreate_u32 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vqshrn_high_n_s16(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ int16x8_t b_ = (b); \ -+ int8x8_t a_ = (a); \ -+ int8x16_t result = vcombine_s8 \ -+ (a_, vcreate_s8 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vqshrn_high_n_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x4_t b_ = (b); \ -+ int16x4_t a_ = (a); \ -+ int16x8_t result = vcombine_s16 \ -+ (a_, vcreate_s16 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vqshrn_high_n_s64(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ int64x2_t b_ = (b); \ - int32x2_t a_ = (a); \ -- int64x2_t result; \ -- __asm__ ("smull %0.2d, %1.2s, %2.s[%3]" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -+ int32x4_t result = vcombine_s32 \ -+ (a_, vcreate_s32 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmull_laneq_u16(a, b, c) \ -+#define vqshrn_high_n_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x8_t b_ = (b); \ -- uint16x4_t a_ = (a); \ -- uint32x4_t result; \ -- __asm__ ("umull %0.4s, %1.4h, %2.h[%3]" \ -- : "=w"(result) \ -- : "w"(a_), "x"(b_), "i"(c) \ -+ uint8x8_t a_ = (a); \ -+ uint8x16_t result = vcombine_u8 \ -+ (a_, vcreate_u8 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - --#define vmull_laneq_u32(a, b, c) \ -+#define vqshrn_high_n_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x4_t b_ = (b); \ -- uint32x2_t a_ = (a); \ -- uint64x2_t result; \ -- __asm__ ("umull %0.2d, %1.2s, %2.s[%3]" \ -- : "=w"(result) \ -- : "w"(a_), "w"(b_), "i"(c) \ -+ uint16x4_t a_ = (a); \ -+ uint16x8_t result = vcombine_u16 \ -+ (a_, vcreate_u16 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmull_n_s16 (int16x4_t a, int16_t b) --{ -- int32x4_t result; -- __asm__ ("smull %0.4s,%1.4h,%2.h[0]" -- : "=w"(result) -- : "w"(a), "x"(b) -- : /* No clobbers */); -- return result; --} -+#define vqshrn_high_n_u64(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ uint64x2_t b_ = (b); \ -+ uint32x2_t a_ = (a); \ -+ uint32x4_t result = vcombine_u32 \ -+ (a_, vcreate_u32 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vmull_n_s32 (int32x2_t a, int32_t b) --{ -- int64x2_t result; -- __asm__ ("smull %0.2d,%1.2s,%2.s[0]" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -+#define vqshrun_high_n_s16(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ int16x8_t b_ = (b); \ -+ uint8x8_t a_ = (a); \ -+ uint8x16_t result = vcombine_u8 \ -+ (a_, vcreate_u8 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmull_n_u16 (uint16x4_t a, uint16_t b) --{ -- uint32x4_t result; -- __asm__ ("umull %0.4s,%1.4h,%2.h[0]" -- : "=w"(result) -- : "w"(a), "x"(b) -- : /* No clobbers */); -- return result; --} -+#define vqshrun_high_n_s32(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ int32x4_t b_ = (b); \ -+ uint16x4_t a_ = (a); \ -+ uint16x8_t result = vcombine_u16 \ -+ (a_, vcreate_u16 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vmull_n_u32 (uint32x2_t a, uint32_t b) --{ -- uint64x2_t result; -- __asm__ ("umull %0.2d,%1.2s,%2.s[0]" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -+#define vqshrun_high_n_s64(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ int64x2_t b_ = (b); \ -+ uint32x2_t a_ = (a); \ -+ uint32x4_t result = vcombine_u32 \ -+ (a_, vcreate_u32 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vmull_p8 (poly8x8_t a, poly8x8_t b) --{ -- poly16x8_t result; -- __asm__ ("pmull %0.8h, %1.8b, %2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -+#define vrshrn_high_n_s16(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ int16x8_t b_ = (b); \ -+ int8x8_t a_ = (a); \ -+ int8x16_t result = vcombine_s8 \ -+ (a_, vcreate_s8 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmull_s8 (int8x8_t a, int8x8_t b) --{ -- int16x8_t result; -- __asm__ ("smull %0.8h, %1.8b, %2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmull_s16 (int16x4_t a, int16x4_t b) --{ -- int32x4_t result; -- __asm__ ("smull %0.4s, %1.4h, %2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vmull_s32 (int32x2_t a, int32x2_t b) --{ -- int64x2_t result; -- __asm__ ("smull %0.2d, %1.2s, %2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -- --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmull_u8 (uint8x8_t a, uint8x8_t b) --{ -- uint16x8_t result; -- __asm__ ("umull %0.8h, %1.8b, %2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -+#define vrshrn_high_n_s32(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ int32x4_t b_ = (b); \ -+ int16x4_t a_ = (a); \ -+ int16x8_t result = vcombine_s16 \ -+ (a_, vcreate_s16 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmull_u16 (uint16x4_t a, uint16x4_t b) --{ -- uint32x4_t result; -- __asm__ ("umull %0.4s, %1.4h, %2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -+#define vrshrn_high_n_s64(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ int64x2_t b_ = (b); \ -+ int32x2_t a_ = (a); \ -+ int32x4_t result = vcombine_s32 \ -+ (a_, vcreate_s32 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vmull_u32 (uint32x2_t a, uint32x2_t b) --{ -- uint64x2_t result; -- __asm__ ("umull %0.2d, %1.2s, %2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -+#define vrshrn_high_n_u16(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ uint16x8_t b_ = (b); \ -+ uint8x8_t a_ = (a); \ -+ uint8x16_t result = vcombine_u8 \ -+ (a_, vcreate_u8 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vmulq_n_f32 (float32x4_t a, float32_t b) --{ -- float32x4_t result; -- __asm__ ("fmul %0.4s,%1.4s,%2.s[0]" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -+#define vrshrn_high_n_u32(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ uint32x4_t b_ = (b); \ -+ uint16x4_t a_ = (a); \ -+ uint16x8_t result = vcombine_u16 \ -+ (a_, vcreate_u16 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vmulq_n_f64 (float64x2_t a, float64_t b) --{ -- float64x2_t result; -- __asm__ ("fmul %0.2d,%1.2d,%2.d[0]" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -+#define vrshrn_high_n_u64(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ uint64x2_t b_ = (b); \ -+ uint32x2_t a_ = (a); \ -+ uint32x4_t result = vcombine_u32 \ -+ (a_, vcreate_u32 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmulq_n_s16 (int16x8_t a, int16_t b) --{ -- int16x8_t result; -- __asm__ ("mul %0.8h,%1.8h,%2.h[0]" -- : "=w"(result) -- : "w"(a), "x"(b) -- : /* No clobbers */); -- return result; --} -+#define vrshrn_n_s16(a, b) \ -+ __extension__ \ -+ ({ \ -+ int16x8_t a_ = (a); \ -+ int8x8_t result; \ -+ __asm__ ("rshrn %0.8b,%1.8h,%2" \ -+ : "=w"(result) \ -+ : "w"(a_), "i"(b) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmulq_n_s32 (int32x4_t a, int32_t b) --{ -- int32x4_t result; -- __asm__ ("mul %0.4s,%1.4s,%2.s[0]" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -+#define vrshrn_n_s32(a, b) \ -+ __extension__ \ -+ ({ \ -+ int32x4_t a_ = (a); \ -+ int16x4_t result; \ -+ __asm__ ("rshrn %0.4h,%1.4s,%2" \ -+ : "=w"(result) \ -+ : "w"(a_), "i"(b) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmulq_n_u16 (uint16x8_t a, uint16_t b) --{ -- uint16x8_t result; -- __asm__ ("mul %0.8h,%1.8h,%2.h[0]" -- : "=w"(result) -- : "w"(a), "x"(b) -- : /* No clobbers */); -- return result; --} -+#define vrshrn_n_s64(a, b) \ -+ __extension__ \ -+ ({ \ -+ int64x2_t a_ = (a); \ -+ int32x2_t result; \ -+ __asm__ ("rshrn %0.2s,%1.2d,%2" \ -+ : "=w"(result) \ -+ : "w"(a_), "i"(b) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmulq_n_u32 (uint32x4_t a, uint32_t b) --{ -- uint32x4_t result; -- __asm__ ("mul %0.4s,%1.4s,%2.s[0]" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; --} -+#define vrshrn_n_u16(a, b) \ -+ __extension__ \ -+ ({ \ -+ uint16x8_t a_ = (a); \ -+ uint8x8_t result; \ -+ __asm__ ("rshrn %0.8b,%1.8h,%2" \ -+ : "=w"(result) \ -+ : "w"(a_), "i"(b) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vmvn_p8 (poly8x8_t a) --{ -- poly8x8_t result; -- __asm__ ("mvn %0.8b,%1.8b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -+#define vrshrn_n_u32(a, b) \ -+ __extension__ \ -+ ({ \ -+ uint32x4_t a_ = (a); \ -+ uint16x4_t result; \ -+ __asm__ ("rshrn %0.4h,%1.4s,%2" \ -+ : "=w"(result) \ -+ : "w"(a_), "i"(b) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vmvn_s8 (int8x8_t a) --{ -- int8x8_t result; -- __asm__ ("mvn %0.8b,%1.8b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; --} -+#define vrshrn_n_u64(a, b) \ -+ __extension__ \ -+ ({ \ -+ uint64x2_t a_ = (a); \ -+ uint32x2_t result; \ -+ __asm__ ("rshrn %0.2s,%1.2d,%2" \ -+ : "=w"(result) \ -+ : "w"(a_), "i"(b) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vmvn_s16 (int16x4_t a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsqrte_u32 (uint32x2_t a) - { -- int16x4_t result; -- __asm__ ("mvn %0.8b,%1.8b" -+ uint32x2_t result; -+ __asm__ ("ursqrte %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vmvn_s32 (int32x2_t a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsqrteq_u32 (uint32x4_t a) - { -- int32x2_t result; -- __asm__ ("mvn %0.8b,%1.8b" -+ uint32x4_t result; -+ __asm__ ("ursqrte %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vmvn_u8 (uint8x8_t a) --{ -+#define vshrn_high_n_s16(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ int16x8_t b_ = (b); \ -+ int8x8_t a_ = (a); \ -+ int8x16_t result = vcombine_s8 \ -+ (a_, vcreate_s8 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("shrn2 %0.16b,%1.8h,#%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vshrn_high_n_s32(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ int32x4_t b_ = (b); \ -+ int16x4_t a_ = (a); \ -+ int16x8_t result = vcombine_s16 \ -+ (a_, vcreate_s16 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("shrn2 %0.8h,%1.4s,#%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vshrn_high_n_s64(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ int64x2_t b_ = (b); \ -+ int32x2_t a_ = (a); \ -+ int32x4_t result = vcombine_s32 \ -+ (a_, vcreate_s32 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("shrn2 %0.4s,%1.2d,#%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vshrn_high_n_u16(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ uint16x8_t b_ = (b); \ -+ uint8x8_t a_ = (a); \ -+ uint8x16_t result = vcombine_u8 \ -+ (a_, vcreate_u8 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("shrn2 %0.16b,%1.8h,#%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vshrn_high_n_u32(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ uint32x4_t b_ = (b); \ -+ uint16x4_t a_ = (a); \ -+ uint16x8_t result = vcombine_u16 \ -+ (a_, vcreate_u16 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("shrn2 %0.8h,%1.4s,#%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vshrn_high_n_u64(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ uint64x2_t b_ = (b); \ -+ uint32x2_t a_ = (a); \ -+ uint32x4_t result = vcombine_u32 \ -+ (a_, vcreate_u32 \ -+ (__AARCH64_UINT64_C (0x0))); \ -+ __asm__ ("shrn2 %0.4s,%1.2d,#%2" \ -+ : "+w"(result) \ -+ : "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vshrn_n_s16(a, b) \ -+ __extension__ \ -+ ({ \ -+ int16x8_t a_ = (a); \ -+ int8x8_t result; \ -+ __asm__ ("shrn %0.8b,%1.8h,%2" \ -+ : "=w"(result) \ -+ : "w"(a_), "i"(b) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vshrn_n_s32(a, b) \ -+ __extension__ \ -+ ({ \ -+ int32x4_t a_ = (a); \ -+ int16x4_t result; \ -+ __asm__ ("shrn %0.4h,%1.4s,%2" \ -+ : "=w"(result) \ -+ : "w"(a_), "i"(b) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vshrn_n_s64(a, b) \ -+ __extension__ \ -+ ({ \ -+ int64x2_t a_ = (a); \ -+ int32x2_t result; \ -+ __asm__ ("shrn %0.2s,%1.2d,%2" \ -+ : "=w"(result) \ -+ : "w"(a_), "i"(b) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vshrn_n_u16(a, b) \ -+ __extension__ \ -+ ({ \ -+ uint16x8_t a_ = (a); \ -+ uint8x8_t result; \ -+ __asm__ ("shrn %0.8b,%1.8h,%2" \ -+ : "=w"(result) \ -+ : "w"(a_), "i"(b) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vshrn_n_u32(a, b) \ -+ __extension__ \ -+ ({ \ -+ uint32x4_t a_ = (a); \ -+ uint16x4_t result; \ -+ __asm__ ("shrn %0.4h,%1.4s,%2" \ -+ : "=w"(result) \ -+ : "w"(a_), "i"(b) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vshrn_n_u64(a, b) \ -+ __extension__ \ -+ ({ \ -+ uint64x2_t a_ = (a); \ -+ uint32x2_t result; \ -+ __asm__ ("shrn %0.2s,%1.2d,%2" \ -+ : "=w"(result) \ -+ : "w"(a_), "i"(b) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vsli_n_p8(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ poly8x8_t b_ = (b); \ -+ poly8x8_t a_ = (a); \ -+ poly8x8_t result; \ -+ __asm__ ("sli %0.8b,%2.8b,%3" \ -+ : "=w"(result) \ -+ : "0"(a_), "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vsli_n_p16(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ poly16x4_t b_ = (b); \ -+ poly16x4_t a_ = (a); \ -+ poly16x4_t result; \ -+ __asm__ ("sli %0.4h,%2.4h,%3" \ -+ : "=w"(result) \ -+ : "0"(a_), "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vsliq_n_p8(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ poly8x16_t b_ = (b); \ -+ poly8x16_t a_ = (a); \ -+ poly8x16_t result; \ -+ __asm__ ("sli %0.16b,%2.16b,%3" \ -+ : "=w"(result) \ -+ : "0"(a_), "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vsliq_n_p16(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ poly16x8_t b_ = (b); \ -+ poly16x8_t a_ = (a); \ -+ poly16x8_t result; \ -+ __asm__ ("sli %0.8h,%2.8h,%3" \ -+ : "=w"(result) \ -+ : "0"(a_), "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vsri_n_p8(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ poly8x8_t b_ = (b); \ -+ poly8x8_t a_ = (a); \ -+ poly8x8_t result; \ -+ __asm__ ("sri %0.8b,%2.8b,%3" \ -+ : "=w"(result) \ -+ : "0"(a_), "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vsri_n_p16(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ poly16x4_t b_ = (b); \ -+ poly16x4_t a_ = (a); \ -+ poly16x4_t result; \ -+ __asm__ ("sri %0.4h,%2.4h,%3" \ -+ : "=w"(result) \ -+ : "0"(a_), "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vsri_n_p64(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ poly64x1_t b_ = (b); \ -+ poly64x1_t a_ = (a); \ -+ poly64x1_t result; \ -+ __asm__ ("sri %d0,%d2,%3" \ -+ : "=w"(result) \ -+ : "0"(a_), "w"(b_), "i"(c) \ -+ : /* No clobbers. */); \ -+ result; \ -+ }) -+ -+#define vsriq_n_p8(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ poly8x16_t b_ = (b); \ -+ poly8x16_t a_ = (a); \ -+ poly8x16_t result; \ -+ __asm__ ("sri %0.16b,%2.16b,%3" \ -+ : "=w"(result) \ -+ : "0"(a_), "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vsriq_n_p16(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ poly16x8_t b_ = (b); \ -+ poly16x8_t a_ = (a); \ -+ poly16x8_t result; \ -+ __asm__ ("sri %0.8h,%2.8h,%3" \ -+ : "=w"(result) \ -+ : "0"(a_), "w"(b_), "i"(c) \ -+ : /* No clobbers */); \ -+ result; \ -+ }) -+ -+#define vsriq_n_p64(a, b, c) \ -+ __extension__ \ -+ ({ \ -+ poly64x2_t b_ = (b); \ -+ poly64x2_t a_ = (a); \ -+ poly64x2_t result; \ -+ __asm__ ("sri %0.2d,%2.2d,%3" \ -+ : "=w"(result) \ -+ : "0"(a_), "w"(b_), "i"(c) \ -+ : /* No clobbers. */); \ -+ result; \ -+ }) -+ -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtst_p8 (poly8x8_t a, poly8x8_t b) -+{ -+ uint8x8_t result; -+ __asm__ ("cmtst %0.8b, %1.8b, %2.8b" -+ : "=w"(result) -+ : "w"(a), "w"(b) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtst_p16 (poly16x4_t a, poly16x4_t b) -+{ -+ uint16x4_t result; -+ __asm__ ("cmtst %0.4h, %1.4h, %2.4h" -+ : "=w"(result) -+ : "w"(a), "w"(b) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtstq_p8 (poly8x16_t a, poly8x16_t b) -+{ -+ uint8x16_t result; -+ __asm__ ("cmtst %0.16b, %1.16b, %2.16b" -+ : "=w"(result) -+ : "w"(a), "w"(b) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtstq_p16 (poly16x8_t a, poly16x8_t b) -+{ -+ uint16x8_t result; -+ __asm__ ("cmtst %0.8h, %1.8h, %2.8h" -+ : "=w"(result) -+ : "w"(a), "w"(b) -+ : /* No clobbers */); -+ return result; -+} -+ -+/* End of temporary inline asm implementations. */ -+ -+/* Start of temporary inline asm for vldn, vstn and friends. */ -+ -+/* Create struct element types for duplicating loads. -+ -+ Create 2 element structures of: -+ -+ +------+----+----+----+----+ -+ | | 8 | 16 | 32 | 64 | -+ +------+----+----+----+----+ -+ |int | Y | Y | N | N | -+ +------+----+----+----+----+ -+ |uint | Y | Y | N | N | -+ +------+----+----+----+----+ -+ |float | - | Y | N | N | -+ +------+----+----+----+----+ -+ |poly | Y | Y | - | - | -+ +------+----+----+----+----+ -+ -+ Create 3 element structures of: -+ -+ +------+----+----+----+----+ -+ | | 8 | 16 | 32 | 64 | -+ +------+----+----+----+----+ -+ |int | Y | Y | Y | Y | -+ +------+----+----+----+----+ -+ |uint | Y | Y | Y | Y | -+ +------+----+----+----+----+ -+ |float | - | Y | Y | Y | -+ +------+----+----+----+----+ -+ |poly | Y | Y | - | - | -+ +------+----+----+----+----+ -+ -+ Create 4 element structures of: -+ -+ +------+----+----+----+----+ -+ | | 8 | 16 | 32 | 64 | -+ +------+----+----+----+----+ -+ |int | Y | N | N | Y | -+ +------+----+----+----+----+ -+ |uint | Y | N | N | Y | -+ +------+----+----+----+----+ -+ |float | - | N | N | Y | -+ +------+----+----+----+----+ -+ |poly | Y | N | - | - | -+ +------+----+----+----+----+ -+ -+ This is required for casting memory reference. */ -+#define __STRUCTN(t, sz, nelem) \ -+ typedef struct t ## sz ## x ## nelem ## _t { \ -+ t ## sz ## _t val[nelem]; \ -+ } t ## sz ## x ## nelem ## _t; -+ -+/* 2-element structs. */ -+__STRUCTN (int, 8, 2) -+__STRUCTN (int, 16, 2) -+__STRUCTN (uint, 8, 2) -+__STRUCTN (uint, 16, 2) -+__STRUCTN (float, 16, 2) -+__STRUCTN (poly, 8, 2) -+__STRUCTN (poly, 16, 2) -+/* 3-element structs. */ -+__STRUCTN (int, 8, 3) -+__STRUCTN (int, 16, 3) -+__STRUCTN (int, 32, 3) -+__STRUCTN (int, 64, 3) -+__STRUCTN (uint, 8, 3) -+__STRUCTN (uint, 16, 3) -+__STRUCTN (uint, 32, 3) -+__STRUCTN (uint, 64, 3) -+__STRUCTN (float, 16, 3) -+__STRUCTN (float, 32, 3) -+__STRUCTN (float, 64, 3) -+__STRUCTN (poly, 8, 3) -+__STRUCTN (poly, 16, 3) -+/* 4-element structs. */ -+__STRUCTN (int, 8, 4) -+__STRUCTN (int, 64, 4) -+__STRUCTN (uint, 8, 4) -+__STRUCTN (uint, 64, 4) -+__STRUCTN (poly, 8, 4) -+__STRUCTN (float, 64, 4) -+#undef __STRUCTN -+ -+ -+#define __ST2_LANE_FUNC(intype, largetype, ptrtype, mode, \ -+ qmode, ptr_mode, funcsuffix, signedtype) \ -+__extension__ extern __inline void \ -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ -+vst2_lane_ ## funcsuffix (ptrtype *__ptr, \ -+ intype __b, const int __c) \ -+{ \ -+ __builtin_aarch64_simd_oi __o; \ -+ largetype __temp; \ -+ __temp.val[0] \ -+ = vcombine_##funcsuffix (__b.val[0], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __temp.val[1] \ -+ = vcombine_##funcsuffix (__b.val[1], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __o = __builtin_aarch64_set_qregoi##qmode (__o, \ -+ (signedtype) __temp.val[0], 0); \ -+ __o = __builtin_aarch64_set_qregoi##qmode (__o, \ -+ (signedtype) __temp.val[1], 1); \ -+ __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -+ __ptr, __o, __c); \ -+} -+ -+__ST2_LANE_FUNC (float16x4x2_t, float16x8x2_t, float16_t, v4hf, v8hf, hf, f16, -+ float16x8_t) -+__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v2sf, v4sf, sf, f32, -+ float32x4_t) -+__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, df, v2df, df, f64, -+ float64x2_t) -+__ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8, -+ int8x16_t) -+__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi, p16, -+ int16x8_t) -+__ST2_LANE_FUNC (poly64x1x2_t, poly64x2x2_t, poly64_t, di, v2di_ssps, di, p64, -+ poly64x2_t) -+__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8, -+ int8x16_t) -+__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16, -+ int16x8_t) -+__ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32, -+ int32x4_t) -+__ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, di, v2di, di, s64, -+ int64x2_t) -+__ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8, -+ int8x16_t) -+__ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi, u16, -+ int16x8_t) -+__ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si, u32, -+ int32x4_t) -+__ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, di, v2di, di, u64, -+ int64x2_t) -+ -+#undef __ST2_LANE_FUNC -+#define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ -+__extension__ extern __inline void \ -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ -+vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \ -+ intype __b, const int __c) \ -+{ \ -+ union { intype __i; \ -+ __builtin_aarch64_simd_oi __o; } __temp = { __b }; \ -+ __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -+ __ptr, __temp.__o, __c); \ -+} -+ -+__ST2_LANE_FUNC (float16x8x2_t, float16_t, v8hf, hf, f16) -+__ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32) -+__ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64) -+__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8) -+__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16) -+__ST2_LANE_FUNC (poly64x2x2_t, poly64_t, v2di, di, p64) -+__ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8) -+__ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16) -+__ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32) -+__ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64) -+__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8) -+__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16) -+__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32) -+__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64) -+ -+#define __ST3_LANE_FUNC(intype, largetype, ptrtype, mode, \ -+ qmode, ptr_mode, funcsuffix, signedtype) \ -+__extension__ extern __inline void \ -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ -+vst3_lane_ ## funcsuffix (ptrtype *__ptr, \ -+ intype __b, const int __c) \ -+{ \ -+ __builtin_aarch64_simd_ci __o; \ -+ largetype __temp; \ -+ __temp.val[0] \ -+ = vcombine_##funcsuffix (__b.val[0], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __temp.val[1] \ -+ = vcombine_##funcsuffix (__b.val[1], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __temp.val[2] \ -+ = vcombine_##funcsuffix (__b.val[2], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __o = __builtin_aarch64_set_qregci##qmode (__o, \ -+ (signedtype) __temp.val[0], 0); \ -+ __o = __builtin_aarch64_set_qregci##qmode (__o, \ -+ (signedtype) __temp.val[1], 1); \ -+ __o = __builtin_aarch64_set_qregci##qmode (__o, \ -+ (signedtype) __temp.val[2], 2); \ -+ __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -+ __ptr, __o, __c); \ -+} -+ -+__ST3_LANE_FUNC (float16x4x3_t, float16x8x3_t, float16_t, v4hf, v8hf, hf, f16, -+ float16x8_t) -+__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v2sf, v4sf, sf, f32, -+ float32x4_t) -+__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, df, v2df, df, f64, -+ float64x2_t) -+__ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8, -+ int8x16_t) -+__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi, p16, -+ int16x8_t) -+__ST3_LANE_FUNC (poly64x1x3_t, poly64x2x3_t, poly64_t, di, v2di_ssps, di, p64, -+ poly64x2_t) -+__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8, -+ int8x16_t) -+__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16, -+ int16x8_t) -+__ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v2si, v4si, si, s32, -+ int32x4_t) -+__ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, di, v2di, di, s64, -+ int64x2_t) -+__ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8, -+ int8x16_t) -+__ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi, u16, -+ int16x8_t) -+__ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v2si, v4si, si, u32, -+ int32x4_t) -+__ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, di, v2di, di, u64, -+ int64x2_t) -+ -+#undef __ST3_LANE_FUNC -+#define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ -+__extension__ extern __inline void \ -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ -+vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \ -+ intype __b, const int __c) \ -+{ \ -+ union { intype __i; \ -+ __builtin_aarch64_simd_ci __o; } __temp = { __b }; \ -+ __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -+ __ptr, __temp.__o, __c); \ -+} -+ -+__ST3_LANE_FUNC (float16x8x3_t, float16_t, v8hf, hf, f16) -+__ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32) -+__ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64) -+__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8) -+__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16) -+__ST3_LANE_FUNC (poly64x2x3_t, poly64_t, v2di, di, p64) -+__ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8) -+__ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16) -+__ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32) -+__ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64) -+__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8) -+__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16) -+__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32) -+__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64) -+ -+#define __ST4_LANE_FUNC(intype, largetype, ptrtype, mode, \ -+ qmode, ptr_mode, funcsuffix, signedtype) \ -+__extension__ extern __inline void \ -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ -+vst4_lane_ ## funcsuffix (ptrtype *__ptr, \ -+ intype __b, const int __c) \ -+{ \ -+ __builtin_aarch64_simd_xi __o; \ -+ largetype __temp; \ -+ __temp.val[0] \ -+ = vcombine_##funcsuffix (__b.val[0], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __temp.val[1] \ -+ = vcombine_##funcsuffix (__b.val[1], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __temp.val[2] \ -+ = vcombine_##funcsuffix (__b.val[2], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __temp.val[3] \ -+ = vcombine_##funcsuffix (__b.val[3], \ -+ vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -+ __o = __builtin_aarch64_set_qregxi##qmode (__o, \ -+ (signedtype) __temp.val[0], 0); \ -+ __o = __builtin_aarch64_set_qregxi##qmode (__o, \ -+ (signedtype) __temp.val[1], 1); \ -+ __o = __builtin_aarch64_set_qregxi##qmode (__o, \ -+ (signedtype) __temp.val[2], 2); \ -+ __o = __builtin_aarch64_set_qregxi##qmode (__o, \ -+ (signedtype) __temp.val[3], 3); \ -+ __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -+ __ptr, __o, __c); \ -+} -+ -+__ST4_LANE_FUNC (float16x4x4_t, float16x8x4_t, float16_t, v4hf, v8hf, hf, f16, -+ float16x8_t) -+__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v2sf, v4sf, sf, f32, -+ float32x4_t) -+__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, df, v2df, df, f64, -+ float64x2_t) -+__ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8, -+ int8x16_t) -+__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi, p16, -+ int16x8_t) -+__ST4_LANE_FUNC (poly64x1x4_t, poly64x2x4_t, poly64_t, di, v2di_ssps, di, p64, -+ poly64x2_t) -+__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8, -+ int8x16_t) -+__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16, -+ int16x8_t) -+__ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v2si, v4si, si, s32, -+ int32x4_t) -+__ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, di, v2di, di, s64, -+ int64x2_t) -+__ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8, -+ int8x16_t) -+__ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi, u16, -+ int16x8_t) -+__ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v2si, v4si, si, u32, -+ int32x4_t) -+__ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, di, v2di, di, u64, -+ int64x2_t) -+ -+#undef __ST4_LANE_FUNC -+#define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ -+__extension__ extern __inline void \ -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ -+vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \ -+ intype __b, const int __c) \ -+{ \ -+ union { intype __i; \ -+ __builtin_aarch64_simd_xi __o; } __temp = { __b }; \ -+ __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -+ __ptr, __temp.__o, __c); \ -+} -+ -+__ST4_LANE_FUNC (float16x8x4_t, float16_t, v8hf, hf, f16) -+__ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32) -+__ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64) -+__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8) -+__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16) -+__ST4_LANE_FUNC (poly64x2x4_t, poly64_t, v2di, di, p64) -+__ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8) -+__ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16) -+__ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32) -+__ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64) -+__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8) -+__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16) -+__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32) -+__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64) -+ -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddlv_s32 (int32x2_t a) -+{ -+ int64_t result; -+ __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : ); -+ return result; -+} -+ -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddlv_u32 (uint32x2_t a) -+{ -+ uint64_t result; -+ __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : ); -+ return result; -+} -+ -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c) -+{ -+ return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c); -+} -+ -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c) -+{ -+ return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c); -+} -+ -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c) -+{ -+ return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c); -+} -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c) -+{ -+ return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c); -+} -+ -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c) -+{ -+ return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c); -+} -+ -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c) -+{ -+ return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c); -+} -+ -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c) -+{ -+ return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c); -+} -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c) -+{ -+ return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c); -+} -+ -+/* Table intrinsics. */ -+ -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl1_p8 (poly8x16_t a, uint8x8_t b) -+{ -+ poly8x8_t result; -+ __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -+ : "=w"(result) -+ : "w"(a), "w"(b) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl1_s8 (int8x16_t a, uint8x8_t b) -+{ -+ int8x8_t result; -+ __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -+ : "=w"(result) -+ : "w"(a), "w"(b) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl1_u8 (uint8x16_t a, uint8x8_t b) -+{ -+ uint8x8_t result; -+ __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -+ : "=w"(result) -+ : "w"(a), "w"(b) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl1q_p8 (poly8x16_t a, uint8x16_t b) -+{ -+ poly8x16_t result; -+ __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" -+ : "=w"(result) -+ : "w"(a), "w"(b) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl1q_s8 (int8x16_t a, uint8x16_t b) -+{ -+ int8x16_t result; -+ __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" -+ : "=w"(result) -+ : "w"(a), "w"(b) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl1q_u8 (uint8x16_t a, uint8x16_t b) -+{ -+ uint8x16_t result; -+ __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" -+ : "=w"(result) -+ : "w"(a), "w"(b) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx) -+{ -+ int8x8_t result = r; -+ __asm__ ("tbx %0.8b,{%1.16b},%2.8b" -+ : "+w"(result) -+ : "w"(tab), "w"(idx) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx) -+{ -+ uint8x8_t result = r; -+ __asm__ ("tbx %0.8b,{%1.16b},%2.8b" -+ : "+w"(result) -+ : "w"(tab), "w"(idx) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx) -+{ -+ poly8x8_t result = r; -+ __asm__ ("tbx %0.8b,{%1.16b},%2.8b" -+ : "+w"(result) -+ : "w"(tab), "w"(idx) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx) -+{ -+ int8x16_t result = r; -+ __asm__ ("tbx %0.16b,{%1.16b},%2.16b" -+ : "+w"(result) -+ : "w"(tab), "w"(idx) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx) -+{ -+ uint8x16_t result = r; -+ __asm__ ("tbx %0.16b,{%1.16b},%2.16b" -+ : "+w"(result) -+ : "w"(tab), "w"(idx) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx) -+{ -+ poly8x16_t result = r; -+ __asm__ ("tbx %0.16b,{%1.16b},%2.16b" -+ : "+w"(result) -+ : "w"(tab), "w"(idx) -+ : /* No clobbers */); -+ return result; -+} -+ -+/* V7 legacy table intrinsics. */ -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbl1_s8 (int8x8_t tab, int8x8_t idx) -+{ -+ int8x8_t result; -+ int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0))); -+ __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -+ : "=w"(result) -+ : "w"(temp), "w"(idx) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbl1_u8 (uint8x8_t tab, uint8x8_t idx) -+{ - uint8x8_t result; -- __asm__ ("mvn %0.8b,%1.8b" -+ uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0))); -+ __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" - : "=w"(result) -- : "w"(a) -+ : "w"(temp), "w"(idx) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbl1_p8 (poly8x8_t tab, uint8x8_t idx) -+{ -+ poly8x8_t result; -+ poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0))); -+ __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -+ : "=w"(result) -+ : "w"(temp), "w"(idx) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbl2_s8 (int8x8x2_t tab, int8x8_t idx) -+{ -+ int8x8_t result; -+ int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]); -+ __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -+ : "=w"(result) -+ : "w"(temp), "w"(idx) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx) -+{ -+ uint8x8_t result; -+ uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]); -+ __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -+ : "=w"(result) -+ : "w"(temp), "w"(idx) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx) -+{ -+ poly8x8_t result; -+ poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]); -+ __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -+ : "=w"(result) -+ : "w"(temp), "w"(idx) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbl3_s8 (int8x8x3_t tab, int8x8_t idx) -+{ -+ int8x8_t result; -+ int8x16x2_t temp; -+ __builtin_aarch64_simd_oi __o; -+ temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); -+ temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0))); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, -+ (int8x16_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, -+ (int8x16_t) temp.val[1], 1); -+ result = __builtin_aarch64_tbl3v8qi (__o, idx); -+ return result; -+} -+ -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx) -+{ -+ uint8x8_t result; -+ uint8x16x2_t temp; -+ __builtin_aarch64_simd_oi __o; -+ temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); -+ temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0))); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, -+ (int8x16_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, -+ (int8x16_t) temp.val[1], 1); -+ result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); -+ return result; -+} -+ -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx) -+{ -+ poly8x8_t result; -+ poly8x16x2_t temp; -+ __builtin_aarch64_simd_oi __o; -+ temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); -+ temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0))); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, -+ (int8x16_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, -+ (int8x16_t) temp.val[1], 1); -+ result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); -+ return result; -+} -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbl4_s8 (int8x8x4_t tab, int8x8_t idx) -+{ -+ int8x8_t result; -+ int8x16x2_t temp; -+ __builtin_aarch64_simd_oi __o; -+ temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); -+ temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, -+ (int8x16_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, -+ (int8x16_t) temp.val[1], 1); -+ result = __builtin_aarch64_tbl3v8qi (__o, idx); -+ return result; -+} -+ -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx) -+{ -+ uint8x8_t result; -+ uint8x16x2_t temp; -+ __builtin_aarch64_simd_oi __o; -+ temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); -+ temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, -+ (int8x16_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, -+ (int8x16_t) temp.val[1], 1); -+ result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); -+ return result; -+} -+ -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx) -+{ -+ poly8x8_t result; -+ poly8x16x2_t temp; -+ __builtin_aarch64_simd_oi __o; -+ temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); -+ temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, -+ (int8x16_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, -+ (int8x16_t) temp.val[1], 1); -+ result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); -+ return result; -+} -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx) -+{ -+ int8x8_t result = r; -+ int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]); -+ __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" -+ : "+w"(result) -+ : "w"(temp), "w"(idx) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx) -+{ -+ uint8x8_t result = r; -+ uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]); -+ __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" -+ : "+w"(result) -+ : "w"(temp), "w"(idx) -+ : /* No clobbers */); -+ return result; -+} -+ -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx) -+{ -+ poly8x8_t result = r; -+ poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]); -+ __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" -+ : "+w"(result) -+ : "w"(temp), "w"(idx) - : /* No clobbers */); - return result; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vmvn_u16 (uint16x4_t a) -+/* End of temporary inline asm. */ -+ -+/* Start of optimal implementations in approved order. */ -+ -+/* vabd. */ -+ -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabds_f32 (float32_t __a, float32_t __b) -+{ -+ return __builtin_aarch64_fabdsf (__a, __b); -+} -+ -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabdd_f64 (float64_t __a, float64_t __b) -+{ -+ return __builtin_aarch64_fabddf (__a, __b); -+} -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabd_f32 (float32x2_t __a, float32x2_t __b) -+{ -+ return __builtin_aarch64_fabdv2sf (__a, __b); -+} -+ -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabd_f64 (float64x1_t __a, float64x1_t __b) -+{ -+ return (float64x1_t) {vabdd_f64 (vget_lane_f64 (__a, 0), -+ vget_lane_f64 (__b, 0))}; -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabdq_f32 (float32x4_t __a, float32x4_t __b) -+{ -+ return __builtin_aarch64_fabdv4sf (__a, __b); -+} -+ -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabdq_f64 (float64x2_t __a, float64x2_t __b) -+{ -+ return __builtin_aarch64_fabdv2df (__a, __b); -+} -+ -+/* vabs */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabs_f32 (float32x2_t __a) -+{ -+ return __builtin_aarch64_absv2sf (__a); -+} -+ -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabs_f64 (float64x1_t __a) -+{ -+ return (float64x1_t) {__builtin_fabs (__a[0])}; -+} -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabs_s8 (int8x8_t __a) -+{ -+ return __builtin_aarch64_absv8qi (__a); -+} -+ -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabs_s16 (int16x4_t __a) -+{ -+ return __builtin_aarch64_absv4hi (__a); -+} -+ -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabs_s32 (int32x2_t __a) -+{ -+ return __builtin_aarch64_absv2si (__a); -+} -+ -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabs_s64 (int64x1_t __a) -+{ -+ return (int64x1_t) {__builtin_aarch64_absdi (__a[0])}; -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabsq_f32 (float32x4_t __a) -+{ -+ return __builtin_aarch64_absv4sf (__a); -+} -+ -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabsq_f64 (float64x2_t __a) -+{ -+ return __builtin_aarch64_absv2df (__a); -+} -+ -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabsq_s8 (int8x16_t __a) -+{ -+ return __builtin_aarch64_absv16qi (__a); -+} -+ -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabsq_s16 (int16x8_t __a) -+{ -+ return __builtin_aarch64_absv8hi (__a); -+} -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabsq_s32 (int32x4_t __a) -+{ -+ return __builtin_aarch64_absv4si (__a); -+} -+ -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabsq_s64 (int64x2_t __a) -+{ -+ return __builtin_aarch64_absv2di (__a); -+} -+ -+/* vadd */ -+ -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddd_s64 (int64_t __a, int64_t __b) -+{ -+ return __a + __b; -+} -+ -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddd_u64 (uint64_t __a, uint64_t __b) -+{ -+ return __a + __b; -+} -+ -+/* vaddv */ -+ -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddv_s8 (int8x8_t __a) -+{ -+ return __builtin_aarch64_reduc_plus_scal_v8qi (__a); -+} -+ -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddv_s16 (int16x4_t __a) -+{ -+ return __builtin_aarch64_reduc_plus_scal_v4hi (__a); -+} -+ -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddv_s32 (int32x2_t __a) -+{ -+ return __builtin_aarch64_reduc_plus_scal_v2si (__a); -+} -+ -+__extension__ extern __inline uint8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddv_u8 (uint8x8_t __a) -+{ -+ return (uint8_t) __builtin_aarch64_reduc_plus_scal_v8qi ((int8x8_t) __a); -+} -+ -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddv_u16 (uint16x4_t __a) -+{ -+ return (uint16_t) __builtin_aarch64_reduc_plus_scal_v4hi ((int16x4_t) __a); -+} -+ -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddv_u32 (uint32x2_t __a) -+{ -+ return (int32_t) __builtin_aarch64_reduc_plus_scal_v2si ((int32x2_t) __a); -+} -+ -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddvq_s8 (int8x16_t __a) -+{ -+ return __builtin_aarch64_reduc_plus_scal_v16qi (__a); -+} -+ -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddvq_s16 (int16x8_t __a) -+{ -+ return __builtin_aarch64_reduc_plus_scal_v8hi (__a); -+} -+ -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddvq_s32 (int32x4_t __a) -+{ -+ return __builtin_aarch64_reduc_plus_scal_v4si (__a); -+} -+ -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddvq_s64 (int64x2_t __a) -+{ -+ return __builtin_aarch64_reduc_plus_scal_v2di (__a); -+} -+ -+__extension__ extern __inline uint8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddvq_u8 (uint8x16_t __a) -+{ -+ return (uint8_t) __builtin_aarch64_reduc_plus_scal_v16qi ((int8x16_t) __a); -+} -+ -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddvq_u16 (uint16x8_t __a) -+{ -+ return (uint16_t) __builtin_aarch64_reduc_plus_scal_v8hi ((int16x8_t) __a); -+} -+ -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddvq_u32 (uint32x4_t __a) -+{ -+ return (uint32_t) __builtin_aarch64_reduc_plus_scal_v4si ((int32x4_t) __a); -+} -+ -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddvq_u64 (uint64x2_t __a) -+{ -+ return (uint64_t) __builtin_aarch64_reduc_plus_scal_v2di ((int64x2_t) __a); -+} -+ -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddv_f32 (float32x2_t __a) -+{ -+ return __builtin_aarch64_reduc_plus_scal_v2sf (__a); -+} -+ -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddvq_f32 (float32x4_t __a) -+{ -+ return __builtin_aarch64_reduc_plus_scal_v4sf (__a); -+} -+ -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddvq_f64 (float64x2_t __a) -+{ -+ return __builtin_aarch64_reduc_plus_scal_v2df (__a); -+} -+ -+/* vbsl */ -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbsl_f16 (uint16x4_t __a, float16x4_t __b, float16x4_t __c) -+{ -+ return __builtin_aarch64_simd_bslv4hf_suss (__a, __b, __c); -+} -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c) -+{ -+ return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c); -+} -+ -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbsl_f64 (uint64x1_t __a, float64x1_t __b, float64x1_t __c) -+{ -+ return (float64x1_t) -+ { __builtin_aarch64_simd_bsldf_suss (__a[0], __b[0], __c[0]) }; -+} -+ -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c) -+{ -+ return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c); -+} -+ -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c) -+{ -+ return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c); -+} -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbsl_p64 (uint64x1_t __a, poly64x1_t __b, poly64x1_t __c) -+{ -+ return (poly64x1_t) -+ {__builtin_aarch64_simd_bsldi_pupp (__a[0], __b[0], __c[0])}; -+} -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c) -+{ -+ return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c); -+} -+ -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c) -+{ -+ return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c); -+} -+ -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c) -+{ -+ return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c); -+} -+ -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c) -+{ -+ return (int64x1_t) -+ {__builtin_aarch64_simd_bsldi_suss (__a[0], __b[0], __c[0])}; -+} -+ -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) -+{ -+ return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) -+{ -+ return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c); -+} -+ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) -+{ -+ return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c); -+} -+ -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c) -+{ -+ return (uint64x1_t) -+ {__builtin_aarch64_simd_bsldi_uuuu (__a[0], __b[0], __c[0])}; -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbslq_f16 (uint16x8_t __a, float16x8_t __b, float16x8_t __c) -+{ -+ return __builtin_aarch64_simd_bslv8hf_suss (__a, __b, __c); -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c) -+{ -+ return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c); -+} -+ -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c) -+{ -+ return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c); -+} -+ -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c) -+{ -+ return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c); -+} -+ -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c) -+{ -+ return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c); -+} -+ -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c) -+{ -+ return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c); -+} -+ -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c) -+{ -+ return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c); -+} -+ -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbslq_p64 (uint64x2_t __a, poly64x2_t __b, poly64x2_t __c) -+{ -+ return __builtin_aarch64_simd_bslv2di_pupp (__a, __b, __c); -+} -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c) -+{ -+ return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c); -+} -+ -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c) -+{ -+ return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c); -+} -+ -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) -+{ -+ return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) -+{ -+ return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c); -+} -+ -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) -+{ -+ return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c); -+} -+ -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) -+{ -+ return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c); -+} -+ -+/* ARMv8.1-A instrinsics. */ -+#pragma GCC push_options -+#pragma GCC target ("arch=armv8.1-a") -+ -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) -+{ -+ return __builtin_aarch64_sqrdmlahv4hi (__a, __b, __c); -+} -+ -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) -+{ -+ return __builtin_aarch64_sqrdmlahv2si (__a, __b, __c); -+} -+ -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) -+{ -+ return __builtin_aarch64_sqrdmlahv8hi (__a, __b, __c); -+} -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) -+{ -+ return __builtin_aarch64_sqrdmlahv4si (__a, __b, __c); -+} -+ -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) -+{ -+ return __builtin_aarch64_sqrdmlshv4hi (__a, __b, __c); -+} -+ -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) -+{ -+ return __builtin_aarch64_sqrdmlshv2si (__a, __b, __c); -+} -+ -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) -+{ -+ return __builtin_aarch64_sqrdmlshv8hi (__a, __b, __c); -+} -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) -+{ -+ return __builtin_aarch64_sqrdmlshv4si (__a, __b, __c); -+} -+ -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlah_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlah_laneqv4hi (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlah_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlah_laneqv2si (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlahq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlah_laneqv8hi (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlahq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlah_laneqv4si (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlsh_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlsh_laneqv4hi (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlsh_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlsh_laneqv2si (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlshq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlsh_laneqv8hi (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlshq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlsh_laneqv4si (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlah_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlah_lanev4hi (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlah_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlah_lanev2si (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlahq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlah_lanev8hi (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlahq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlah_lanev4si (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlahh_s16 (int16_t __a, int16_t __b, int16_t __c) -+{ -+ return (int16_t) __builtin_aarch64_sqrdmlahhi (__a, __b, __c); -+} -+ -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlahh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlah_lanehi (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlahh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlah_laneqhi (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlahs_s32 (int32_t __a, int32_t __b, int32_t __c) -+{ -+ return (int32_t) __builtin_aarch64_sqrdmlahsi (__a, __b, __c); -+} -+ -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlahs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlah_lanesi (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlahs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlah_laneqsi (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlsh_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlsh_lanev4hi (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlsh_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlsh_lanev2si (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlshq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlsh_lanev8hi (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlshq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlsh_lanev4si (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlshh_s16 (int16_t __a, int16_t __b, int16_t __c) -+{ -+ return (int16_t) __builtin_aarch64_sqrdmlshhi (__a, __b, __c); -+} -+ -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlshh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlsh_lanehi (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlshh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlsh_laneqhi (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlshs_s32 (int32_t __a, int32_t __b, int32_t __c) -+{ -+ return (int32_t) __builtin_aarch64_sqrdmlshsi (__a, __b, __c); -+} -+ -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlshs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlsh_lanesi (__a, __b, __c, __d); -+} -+ -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmlshs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d) -+{ -+ return __builtin_aarch64_sqrdmlsh_laneqsi (__a, __b, __c, __d); -+} -+#pragma GCC pop_options -+ -+#pragma GCC push_options -+#pragma GCC target ("+nothing+crypto") -+/* vaes */ -+ -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaeseq_u8 (uint8x16_t data, uint8x16_t key) - { -- uint16x4_t result; -- __asm__ ("mvn %0.8b,%1.8b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return __builtin_aarch64_crypto_aesev16qi_uuu (data, key); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vmvn_u32 (uint32x2_t a) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaesdq_u8 (uint8x16_t data, uint8x16_t key) - { -- uint32x2_t result; -- __asm__ ("mvn %0.8b,%1.8b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vmvnq_p8 (poly8x16_t a) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaesmcq_u8 (uint8x16_t data) - { -- poly8x16_t result; -- __asm__ ("mvn %0.16b,%1.16b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return __builtin_aarch64_crypto_aesmcv16qi_uu (data); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vmvnq_s8 (int8x16_t a) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaesimcq_u8 (uint8x16_t data) - { -- int8x16_t result; -- __asm__ ("mvn %0.16b,%1.16b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return __builtin_aarch64_crypto_aesimcv16qi_uu (data); - } -+#pragma GCC pop_options -+ -+/* vcage */ - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmvnq_s16 (int16x8_t a) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcage_f64 (float64x1_t __a, float64x1_t __b) - { -- int16x8_t result; -- __asm__ ("mvn %0.16b,%1.16b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return vabs_f64 (__a) >= vabs_f64 (__b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmvnq_s32 (int32x4_t a) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcages_f32 (float32_t __a, float32_t __b) - { -- int32x4_t result; -- __asm__ ("mvn %0.16b,%1.16b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vmvnq_u8 (uint8x16_t a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcage_f32 (float32x2_t __a, float32x2_t __b) - { -- uint8x16_t result; -- __asm__ ("mvn %0.16b,%1.16b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return vabs_f32 (__a) >= vabs_f32 (__b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmvnq_u16 (uint16x8_t a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcageq_f32 (float32x4_t __a, float32x4_t __b) - { -- uint16x8_t result; -- __asm__ ("mvn %0.16b,%1.16b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return vabsq_f32 (__a) >= vabsq_f32 (__b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmvnq_u32 (uint32x4_t a) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcaged_f64 (float64_t __a, float64_t __b) - { -- uint32x4_t result; -- __asm__ ("mvn %0.16b,%1.16b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0; -+} -+ -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcageq_f64 (float64x2_t __a, float64x2_t __b) -+{ -+ return vabsq_f64 (__a) >= vabsq_f64 (__b); - } - -+/* vcagt */ - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vpadal_s8 (int16x4_t a, int8x8_t b) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcagts_f32 (float32_t __a, float32_t __b) - { -- int16x4_t result; -- __asm__ ("sadalp %0.4h,%2.8b" -- : "=w"(result) -- : "0"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0; -+} -+ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcagt_f32 (float32x2_t __a, float32x2_t __b) -+{ -+ return vabs_f32 (__a) > vabs_f32 (__b); -+} -+ -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcagt_f64 (float64x1_t __a, float64x1_t __b) -+{ -+ return vabs_f64 (__a) > vabs_f64 (__b); -+} -+ -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcagtq_f32 (float32x4_t __a, float32x4_t __b) -+{ -+ return vabsq_f32 (__a) > vabsq_f32 (__b); -+} -+ -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcagtd_f64 (float64_t __a, float64_t __b) -+{ -+ return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0; -+} -+ -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcagtq_f64 (float64x2_t __a, float64x2_t __b) -+{ -+ return vabsq_f64 (__a) > vabsq_f64 (__b); -+} -+ -+/* vcale */ -+ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcale_f32 (float32x2_t __a, float32x2_t __b) -+{ -+ return vabs_f32 (__a) <= vabs_f32 (__b); -+} -+ -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcale_f64 (float64x1_t __a, float64x1_t __b) -+{ -+ return vabs_f64 (__a) <= vabs_f64 (__b); -+} -+ -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcaled_f64 (float64_t __a, float64_t __b) -+{ -+ return __builtin_fabs (__a) <= __builtin_fabs (__b) ? -1 : 0; -+} -+ -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcales_f32 (float32_t __a, float32_t __b) -+{ -+ return __builtin_fabsf (__a) <= __builtin_fabsf (__b) ? -1 : 0; -+} -+ -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcaleq_f32 (float32x4_t __a, float32x4_t __b) -+{ -+ return vabsq_f32 (__a) <= vabsq_f32 (__b); -+} -+ -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcaleq_f64 (float64x2_t __a, float64x2_t __b) -+{ -+ return vabsq_f64 (__a) <= vabsq_f64 (__b); -+} -+ -+/* vcalt */ -+ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcalt_f32 (float32x2_t __a, float32x2_t __b) -+{ -+ return vabs_f32 (__a) < vabs_f32 (__b); -+} -+ -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcalt_f64 (float64x1_t __a, float64x1_t __b) -+{ -+ return vabs_f64 (__a) < vabs_f64 (__b); -+} -+ -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcaltd_f64 (float64_t __a, float64_t __b) -+{ -+ return __builtin_fabs (__a) < __builtin_fabs (__b) ? -1 : 0; -+} -+ -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcaltq_f32 (float32x4_t __a, float32x4_t __b) -+{ -+ return vabsq_f32 (__a) < vabsq_f32 (__b); -+} -+ -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcaltq_f64 (float64x2_t __a, float64x2_t __b) -+{ -+ return vabsq_f64 (__a) < vabsq_f64 (__b); -+} -+ -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcalts_f32 (float32_t __a, float32_t __b) -+{ -+ return __builtin_fabsf (__a) < __builtin_fabsf (__b) ? -1 : 0; -+} -+ -+/* vceq - vector. */ -+ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceq_f32 (float32x2_t __a, float32x2_t __b) -+{ -+ return (uint32x2_t) (__a == __b); -+} -+ -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceq_f64 (float64x1_t __a, float64x1_t __b) -+{ -+ return (uint64x1_t) (__a == __b); -+} -+ -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceq_p8 (poly8x8_t __a, poly8x8_t __b) -+{ -+ return (uint8x8_t) (__a == __b); -+} -+ -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceq_p64 (poly64x1_t __a, poly64x1_t __b) -+{ -+ return (uint64x1_t) (__a == __b); -+} -+ -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceq_s8 (int8x8_t __a, int8x8_t __b) -+{ -+ return (uint8x8_t) (__a == __b); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceq_s16 (int16x4_t __a, int16x4_t __b) -+{ -+ return (uint16x4_t) (__a == __b); -+} -+ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceq_s32 (int32x2_t __a, int32x2_t __b) -+{ -+ return (uint32x2_t) (__a == __b); -+} -+ -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceq_s64 (int64x1_t __a, int64x1_t __b) -+{ -+ return (uint64x1_t) (__a == __b); -+} -+ -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceq_u8 (uint8x8_t __a, uint8x8_t __b) -+{ -+ return (__a == __b); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceq_u16 (uint16x4_t __a, uint16x4_t __b) -+{ -+ return (__a == __b); -+} -+ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceq_u32 (uint32x2_t __a, uint32x2_t __b) -+{ -+ return (__a == __b); -+} -+ -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceq_u64 (uint64x1_t __a, uint64x1_t __b) -+{ -+ return (__a == __b); -+} -+ -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqq_f32 (float32x4_t __a, float32x4_t __b) -+{ -+ return (uint32x4_t) (__a == __b); -+} -+ -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqq_f64 (float64x2_t __a, float64x2_t __b) -+{ -+ return (uint64x2_t) (__a == __b); -+} -+ -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqq_p8 (poly8x16_t __a, poly8x16_t __b) -+{ -+ return (uint8x16_t) (__a == __b); -+} -+ -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqq_s8 (int8x16_t __a, int8x16_t __b) -+{ -+ return (uint8x16_t) (__a == __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vpadal_s16 (int32x2_t a, int16x4_t b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqq_s16 (int16x8_t __a, int16x8_t __b) - { -- int32x2_t result; -- __asm__ ("sadalp %0.2s,%2.4h" -- : "=w"(result) -- : "0"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint16x8_t) (__a == __b); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vpadal_s32 (int64x1_t a, int32x2_t b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqq_s32 (int32x4_t __a, int32x4_t __b) - { -- int64x1_t result; -- __asm__ ("sadalp %0.1d,%2.2s" -- : "=w"(result) -- : "0"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint32x4_t) (__a == __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vpadal_u8 (uint16x4_t a, uint8x8_t b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqq_s64 (int64x2_t __a, int64x2_t __b) - { -- uint16x4_t result; -- __asm__ ("uadalp %0.4h,%2.8b" -- : "=w"(result) -- : "0"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint64x2_t) (__a == __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vpadal_u16 (uint32x2_t a, uint16x4_t b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- uint32x2_t result; -- __asm__ ("uadalp %0.2s,%2.4h" -- : "=w"(result) -- : "0"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (__a == __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vpadal_u32 (uint64x1_t a, uint32x2_t b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- uint64x1_t result; -- __asm__ ("uadalp %0.1d,%2.2s" -- : "=w"(result) -- : "0"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (__a == __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vpadalq_s8 (int16x8_t a, int8x16_t b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- int16x8_t result; -- __asm__ ("sadalp %0.8h,%2.16b" -- : "=w"(result) -- : "0"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (__a == __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vpadalq_s16 (int32x4_t a, int16x8_t b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqq_u64 (uint64x2_t __a, uint64x2_t __b) - { -- int32x4_t result; -- __asm__ ("sadalp %0.4s,%2.8h" -- : "=w"(result) -- : "0"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (__a == __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vpadalq_s32 (int64x2_t a, int32x4_t b) -+/* vceq - scalar. */ -+ -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqs_f32 (float32_t __a, float32_t __b) - { -- int64x2_t result; -- __asm__ ("sadalp %0.2d,%2.4s" -- : "=w"(result) -- : "0"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return __a == __b ? -1 : 0; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vpadalq_u8 (uint16x8_t a, uint8x16_t b) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqd_s64 (int64_t __a, int64_t __b) - { -- uint16x8_t result; -- __asm__ ("uadalp %0.8h,%2.16b" -- : "=w"(result) -- : "0"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return __a == __b ? -1ll : 0ll; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vpadalq_u16 (uint32x4_t a, uint16x8_t b) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqd_u64 (uint64_t __a, uint64_t __b) - { -- uint32x4_t result; -- __asm__ ("uadalp %0.4s,%2.8h" -- : "=w"(result) -- : "0"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return __a == __b ? -1ll : 0ll; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vpadalq_u32 (uint64x2_t a, uint32x4_t b) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqd_f64 (float64_t __a, float64_t __b) - { -- uint64x2_t result; -- __asm__ ("uadalp %0.2d,%2.4s" -- : "=w"(result) -- : "0"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return __a == __b ? -1ll : 0ll; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vpadd_f32 (float32x2_t a, float32x2_t b) -+/* vceqz - vector. */ -+ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqz_f32 (float32x2_t __a) - { -- float32x2_t result; -- __asm__ ("faddp %0.2s,%1.2s,%2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint32x2_t) (__a == 0.0f); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vpaddl_s8 (int8x8_t a) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqz_f64 (float64x1_t __a) - { -- int16x4_t result; -- __asm__ ("saddlp %0.4h,%1.8b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return (uint64x1_t) (__a == (float64x1_t) {0.0}); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vpaddl_s16 (int16x4_t a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqz_p8 (poly8x8_t __a) - { -- int32x2_t result; -- __asm__ ("saddlp %0.2s,%1.4h" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return (uint8x8_t) (__a == 0); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vpaddl_s32 (int32x2_t a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqz_s8 (int8x8_t __a) - { -- int64x1_t result; -- __asm__ ("saddlp %0.1d,%1.2s" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return (uint8x8_t) (__a == 0); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vpaddl_u8 (uint8x8_t a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqz_s16 (int16x4_t __a) - { -- uint16x4_t result; -- __asm__ ("uaddlp %0.4h,%1.8b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return (uint16x4_t) (__a == 0); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vpaddl_u16 (uint16x4_t a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqz_s32 (int32x2_t __a) - { -- uint32x2_t result; -- __asm__ ("uaddlp %0.2s,%1.4h" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return (uint32x2_t) (__a == 0); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vpaddl_u32 (uint32x2_t a) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqz_s64 (int64x1_t __a) - { -- uint64x1_t result; -- __asm__ ("uaddlp %0.1d,%1.2s" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return (uint64x1_t) (__a == __AARCH64_INT64_C (0)); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vpaddlq_s8 (int8x16_t a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqz_u8 (uint8x8_t __a) - { -- int16x8_t result; -- __asm__ ("saddlp %0.8h,%1.16b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return (__a == 0); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vpaddlq_s16 (int16x8_t a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqz_u16 (uint16x4_t __a) - { -- int32x4_t result; -- __asm__ ("saddlp %0.4s,%1.8h" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return (__a == 0); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vpaddlq_s32 (int32x4_t a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqz_u32 (uint32x2_t __a) - { -- int64x2_t result; -- __asm__ ("saddlp %0.2d,%1.4s" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return (__a == 0); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vpaddlq_u8 (uint8x16_t a) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqz_u64 (uint64x1_t __a) - { -- uint16x8_t result; -- __asm__ ("uaddlp %0.8h,%1.16b" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return (__a == __AARCH64_UINT64_C (0)); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vpaddlq_u16 (uint16x8_t a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqzq_f32 (float32x4_t __a) - { -- uint32x4_t result; -- __asm__ ("uaddlp %0.4s,%1.8h" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return (uint32x4_t) (__a == 0.0f); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vpaddlq_u32 (uint32x4_t a) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqzq_f64 (float64x2_t __a) - { -- uint64x2_t result; -- __asm__ ("uaddlp %0.2d,%1.4s" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return (uint64x2_t) (__a == 0.0f); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vpaddq_f32 (float32x4_t a, float32x4_t b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqzq_p8 (poly8x16_t __a) - { -- float32x4_t result; -- __asm__ ("faddp %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint8x16_t) (__a == 0); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vpaddq_f64 (float64x2_t a, float64x2_t b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqzq_s8 (int8x16_t __a) - { -- float64x2_t result; -- __asm__ ("faddp %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint8x16_t) (__a == 0); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vpaddq_s8 (int8x16_t a, int8x16_t b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqzq_s16 (int16x8_t __a) - { -- int8x16_t result; -- __asm__ ("addp %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint16x8_t) (__a == 0); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vpaddq_s16 (int16x8_t a, int16x8_t b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqzq_s32 (int32x4_t __a) - { -- int16x8_t result; -- __asm__ ("addp %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint32x4_t) (__a == 0); -+} -+ -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqzq_s64 (int64x2_t __a) -+{ -+ return (uint64x2_t) (__a == __AARCH64_INT64_C (0)); -+} -+ -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqzq_u8 (uint8x16_t __a) -+{ -+ return (__a == 0); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vpaddq_s32 (int32x4_t a, int32x4_t b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqzq_u16 (uint16x8_t __a) - { -- int32x4_t result; -- __asm__ ("addp %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (__a == 0); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vpaddq_s64 (int64x2_t a, int64x2_t b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqzq_u32 (uint32x4_t __a) - { -- int64x2_t result; -- __asm__ ("addp %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (__a == 0); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vpaddq_u8 (uint8x16_t a, uint8x16_t b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqzq_u64 (uint64x2_t __a) - { -- uint8x16_t result; -- __asm__ ("addp %0.16b,%1.16b,%2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (__a == __AARCH64_UINT64_C (0)); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vpaddq_u16 (uint16x8_t a, uint16x8_t b) -+/* vceqz - scalar. */ -+ -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqzs_f32 (float32_t __a) - { -- uint16x8_t result; -- __asm__ ("addp %0.8h,%1.8h,%2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return __a == 0.0f ? -1 : 0; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vpaddq_u32 (uint32x4_t a, uint32x4_t b) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqzd_s64 (int64_t __a) - { -- uint32x4_t result; -- __asm__ ("addp %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return __a == 0 ? -1ll : 0ll; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vpaddq_u64 (uint64x2_t a, uint64x2_t b) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqzd_u64 (uint64_t __a) - { -- uint64x2_t result; -- __asm__ ("addp %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return __a == 0 ? -1ll : 0ll; - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vpadds_f32 (float32x2_t a) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqzd_f64 (float64_t __a) - { -- float32_t result; -- __asm__ ("faddp %s0,%1.2s" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return __a == 0.0 ? -1ll : 0ll; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vqdmulh_n_s16 (int16x4_t a, int16_t b) -+/* vcge - vector. */ -+ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcge_f32 (float32x2_t __a, float32x2_t __b) - { -- int16x4_t result; -- __asm__ ("sqdmulh %0.4h,%1.4h,%2.h[0]" -- : "=w"(result) -- : "w"(a), "x"(b) -- : /* No clobbers */); -- return result; -+ return (uint32x2_t) (__a >= __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vqdmulh_n_s32 (int32x2_t a, int32_t b) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcge_f64 (float64x1_t __a, float64x1_t __b) - { -- int32x2_t result; -- __asm__ ("sqdmulh %0.2s,%1.2s,%2.s[0]" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint64x1_t) (__a >= __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vqdmulhq_n_s16 (int16x8_t a, int16_t b) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcge_s8 (int8x8_t __a, int8x8_t __b) - { -- int16x8_t result; -- __asm__ ("sqdmulh %0.8h,%1.8h,%2.h[0]" -- : "=w"(result) -- : "w"(a), "x"(b) -- : /* No clobbers */); -- return result; -+ return (uint8x8_t) (__a >= __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmulhq_n_s32 (int32x4_t a, int32_t b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcge_s16 (int16x4_t __a, int16x4_t __b) - { -- int32x4_t result; -- __asm__ ("sqdmulh %0.4s,%1.4s,%2.s[0]" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint16x4_t) (__a >= __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vqmovn_high_s16 (int8x8_t a, int16x8_t b) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcge_s32 (int32x2_t __a, int32x2_t __b) - { -- int8x16_t result = vcombine_s8 (a, vcreate_s8 (__AARCH64_UINT64_C (0x0))); -- __asm__ ("sqxtn2 %0.16b, %1.8h" -- : "+w"(result) -- : "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint32x2_t) (__a >= __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vqmovn_high_s32 (int16x4_t a, int32x4_t b) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcge_s64 (int64x1_t __a, int64x1_t __b) - { -- int16x8_t result = vcombine_s16 (a, vcreate_s16 (__AARCH64_UINT64_C (0x0))); -- __asm__ ("sqxtn2 %0.8h, %1.4s" -- : "+w"(result) -- : "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint64x1_t) (__a >= __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqmovn_high_s64 (int32x2_t a, int64x2_t b) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcge_u8 (uint8x8_t __a, uint8x8_t __b) - { -- int32x4_t result = vcombine_s32 (a, vcreate_s32 (__AARCH64_UINT64_C (0x0))); -- __asm__ ("sqxtn2 %0.4s, %1.2d" -- : "+w"(result) -- : "w"(b) -- : /* No clobbers */); -- return result; -+ return (__a >= __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vqmovn_high_u16 (uint8x8_t a, uint16x8_t b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcge_u16 (uint16x4_t __a, uint16x4_t __b) - { -- uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); -- __asm__ ("uqxtn2 %0.16b, %1.8h" -- : "+w"(result) -- : "w"(b) -- : /* No clobbers */); -- return result; -+ return (__a >= __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vqmovn_high_u32 (uint16x4_t a, uint32x4_t b) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcge_u32 (uint32x2_t __a, uint32x2_t __b) - { -- uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); -- __asm__ ("uqxtn2 %0.8h, %1.4s" -- : "+w"(result) -- : "w"(b) -- : /* No clobbers */); -- return result; -+ return (__a >= __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vqmovn_high_u64 (uint32x2_t a, uint64x2_t b) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcge_u64 (uint64x1_t __a, uint64x1_t __b) - { -- uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); -- __asm__ ("uqxtn2 %0.4s, %1.2d" -- : "+w"(result) -- : "w"(b) -- : /* No clobbers */); -- return result; -+ return (__a >= __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vqmovun_high_s16 (uint8x8_t a, int16x8_t b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgeq_f32 (float32x4_t __a, float32x4_t __b) - { -- uint8x16_t result = vcombine_u8 (a, vcreate_u8 (__AARCH64_UINT64_C (0x0))); -- __asm__ ("sqxtun2 %0.16b, %1.8h" -- : "+w"(result) -- : "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint32x4_t) (__a >= __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vqmovun_high_s32 (uint16x4_t a, int32x4_t b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgeq_f64 (float64x2_t __a, float64x2_t __b) - { -- uint16x8_t result = vcombine_u16 (a, vcreate_u16 (__AARCH64_UINT64_C (0x0))); -- __asm__ ("sqxtun2 %0.8h, %1.4s" -- : "+w"(result) -- : "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint64x2_t) (__a >= __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vqmovun_high_s64 (uint32x2_t a, int64x2_t b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgeq_s8 (int8x16_t __a, int8x16_t __b) - { -- uint32x4_t result = vcombine_u32 (a, vcreate_u32 (__AARCH64_UINT64_C (0x0))); -- __asm__ ("sqxtun2 %0.4s, %1.2d" -- : "+w"(result) -- : "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint8x16_t) (__a >= __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vqrdmulh_n_s16 (int16x4_t a, int16_t b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgeq_s16 (int16x8_t __a, int16x8_t __b) - { -- int16x4_t result; -- __asm__ ("sqrdmulh %0.4h,%1.4h,%2.h[0]" -- : "=w"(result) -- : "w"(a), "x"(b) -- : /* No clobbers */); -- return result; -+ return (uint16x8_t) (__a >= __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vqrdmulh_n_s32 (int32x2_t a, int32_t b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgeq_s32 (int32x4_t __a, int32x4_t __b) - { -- int32x2_t result; -- __asm__ ("sqrdmulh %0.2s,%1.2s,%2.s[0]" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint32x4_t) (__a >= __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vqrdmulhq_n_s16 (int16x8_t a, int16_t b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgeq_s64 (int64x2_t __a, int64x2_t __b) - { -- int16x8_t result; -- __asm__ ("sqrdmulh %0.8h,%1.8h,%2.h[0]" -- : "=w"(result) -- : "w"(a), "x"(b) -- : /* No clobbers */); -- return result; -+ return (uint64x2_t) (__a >= __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqrdmulhq_n_s32 (int32x4_t a, int32_t b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgeq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- int32x4_t result; -- __asm__ ("sqrdmulh %0.4s,%1.4s,%2.s[0]" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (__a >= __b); - } - --#define vqrshrn_high_n_s16(a, b, c) \ -- __extension__ \ -- ({ \ -- int16x8_t b_ = (b); \ -- int8x8_t a_ = (a); \ -- int8x16_t result = vcombine_s8 \ -- (a_, vcreate_s8 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("sqrshrn2 %0.16b, %1.8h, #%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgeq_u16 (uint16x8_t __a, uint16x8_t __b) -+{ -+ return (__a >= __b); -+} - --#define vqrshrn_high_n_s32(a, b, c) \ -- __extension__ \ -- ({ \ -- int32x4_t b_ = (b); \ -- int16x4_t a_ = (a); \ -- int16x8_t result = vcombine_s16 \ -- (a_, vcreate_s16 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("sqrshrn2 %0.8h, %1.4s, #%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgeq_u32 (uint32x4_t __a, uint32x4_t __b) -+{ -+ return (__a >= __b); -+} - --#define vqrshrn_high_n_s64(a, b, c) \ -- __extension__ \ -- ({ \ -- int64x2_t b_ = (b); \ -- int32x2_t a_ = (a); \ -- int32x4_t result = vcombine_s32 \ -- (a_, vcreate_s32 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("sqrshrn2 %0.4s, %1.2d, #%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgeq_u64 (uint64x2_t __a, uint64x2_t __b) -+{ -+ return (__a >= __b); -+} - --#define vqrshrn_high_n_u16(a, b, c) \ -- __extension__ \ -- ({ \ -- uint16x8_t b_ = (b); \ -- uint8x8_t a_ = (a); \ -- uint8x16_t result = vcombine_u8 \ -- (a_, vcreate_u8 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("uqrshrn2 %0.16b, %1.8h, #%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+/* vcge - scalar. */ - --#define vqrshrn_high_n_u32(a, b, c) \ -- __extension__ \ -- ({ \ -- uint32x4_t b_ = (b); \ -- uint16x4_t a_ = (a); \ -- uint16x8_t result = vcombine_u16 \ -- (a_, vcreate_u16 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("uqrshrn2 %0.8h, %1.4s, #%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcges_f32 (float32_t __a, float32_t __b) -+{ -+ return __a >= __b ? -1 : 0; -+} - --#define vqrshrn_high_n_u64(a, b, c) \ -- __extension__ \ -- ({ \ -- uint64x2_t b_ = (b); \ -- uint32x2_t a_ = (a); \ -- uint32x4_t result = vcombine_u32 \ -- (a_, vcreate_u32 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("uqrshrn2 %0.4s, %1.2d, #%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcged_s64 (int64_t __a, int64_t __b) -+{ -+ return __a >= __b ? -1ll : 0ll; -+} - --#define vqrshrun_high_n_s16(a, b, c) \ -- __extension__ \ -- ({ \ -- int16x8_t b_ = (b); \ -- uint8x8_t a_ = (a); \ -- uint8x16_t result = vcombine_u8 \ -- (a_, vcreate_u8 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("sqrshrun2 %0.16b, %1.8h, #%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcged_u64 (uint64_t __a, uint64_t __b) -+{ -+ return __a >= __b ? -1ll : 0ll; -+} - --#define vqrshrun_high_n_s32(a, b, c) \ -- __extension__ \ -- ({ \ -- int32x4_t b_ = (b); \ -- uint16x4_t a_ = (a); \ -- uint16x8_t result = vcombine_u16 \ -- (a_, vcreate_u16 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("sqrshrun2 %0.8h, %1.4s, #%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcged_f64 (float64_t __a, float64_t __b) -+{ -+ return __a >= __b ? -1ll : 0ll; -+} - --#define vqrshrun_high_n_s64(a, b, c) \ -- __extension__ \ -- ({ \ -- int64x2_t b_ = (b); \ -- uint32x2_t a_ = (a); \ -- uint32x4_t result = vcombine_u32 \ -- (a_, vcreate_u32 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("sqrshrun2 %0.4s, %1.2d, #%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+/* vcgez - vector. */ - --#define vqshrn_high_n_s16(a, b, c) \ -- __extension__ \ -- ({ \ -- int16x8_t b_ = (b); \ -- int8x8_t a_ = (a); \ -- int8x16_t result = vcombine_s8 \ -- (a_, vcreate_s8 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("sqshrn2 %0.16b, %1.8h, #%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgez_f32 (float32x2_t __a) -+{ -+ return (uint32x2_t) (__a >= 0.0f); -+} - --#define vqshrn_high_n_s32(a, b, c) \ -- __extension__ \ -- ({ \ -- int32x4_t b_ = (b); \ -- int16x4_t a_ = (a); \ -- int16x8_t result = vcombine_s16 \ -- (a_, vcreate_s16 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("sqshrn2 %0.8h, %1.4s, #%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgez_f64 (float64x1_t __a) -+{ -+ return (uint64x1_t) (__a[0] >= (float64x1_t) {0.0}); -+} - --#define vqshrn_high_n_s64(a, b, c) \ -- __extension__ \ -- ({ \ -- int64x2_t b_ = (b); \ -- int32x2_t a_ = (a); \ -- int32x4_t result = vcombine_s32 \ -- (a_, vcreate_s32 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("sqshrn2 %0.4s, %1.2d, #%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgez_s8 (int8x8_t __a) -+{ -+ return (uint8x8_t) (__a >= 0); -+} - --#define vqshrn_high_n_u16(a, b, c) \ -- __extension__ \ -- ({ \ -- uint16x8_t b_ = (b); \ -- uint8x8_t a_ = (a); \ -- uint8x16_t result = vcombine_u8 \ -- (a_, vcreate_u8 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("uqshrn2 %0.16b, %1.8h, #%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgez_s16 (int16x4_t __a) -+{ -+ return (uint16x4_t) (__a >= 0); -+} - --#define vqshrn_high_n_u32(a, b, c) \ -- __extension__ \ -- ({ \ -- uint32x4_t b_ = (b); \ -- uint16x4_t a_ = (a); \ -- uint16x8_t result = vcombine_u16 \ -- (a_, vcreate_u16 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("uqshrn2 %0.8h, %1.4s, #%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgez_s32 (int32x2_t __a) -+{ -+ return (uint32x2_t) (__a >= 0); -+} - --#define vqshrn_high_n_u64(a, b, c) \ -- __extension__ \ -- ({ \ -- uint64x2_t b_ = (b); \ -- uint32x2_t a_ = (a); \ -- uint32x4_t result = vcombine_u32 \ -- (a_, vcreate_u32 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("uqshrn2 %0.4s, %1.2d, #%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgez_s64 (int64x1_t __a) -+{ -+ return (uint64x1_t) (__a >= __AARCH64_INT64_C (0)); -+} - --#define vqshrun_high_n_s16(a, b, c) \ -- __extension__ \ -- ({ \ -- int16x8_t b_ = (b); \ -- uint8x8_t a_ = (a); \ -- uint8x16_t result = vcombine_u8 \ -- (a_, vcreate_u8 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("sqshrun2 %0.16b, %1.8h, #%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgezq_f32 (float32x4_t __a) -+{ -+ return (uint32x4_t) (__a >= 0.0f); -+} -+ -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgezq_f64 (float64x2_t __a) -+{ -+ return (uint64x2_t) (__a >= 0.0); -+} -+ -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgezq_s8 (int8x16_t __a) -+{ -+ return (uint8x16_t) (__a >= 0); -+} - --#define vqshrun_high_n_s32(a, b, c) \ -- __extension__ \ -- ({ \ -- int32x4_t b_ = (b); \ -- uint16x4_t a_ = (a); \ -- uint16x8_t result = vcombine_u16 \ -- (a_, vcreate_u16 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("sqshrun2 %0.8h, %1.4s, #%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgezq_s16 (int16x8_t __a) -+{ -+ return (uint16x8_t) (__a >= 0); -+} - --#define vqshrun_high_n_s64(a, b, c) \ -- __extension__ \ -- ({ \ -- int64x2_t b_ = (b); \ -- uint32x2_t a_ = (a); \ -- uint32x4_t result = vcombine_u32 \ -- (a_, vcreate_u32 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("sqshrun2 %0.4s, %1.2d, #%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgezq_s32 (int32x4_t __a) -+{ -+ return (uint32x4_t) (__a >= 0); -+} - --#define vrshrn_high_n_s16(a, b, c) \ -- __extension__ \ -- ({ \ -- int16x8_t b_ = (b); \ -- int8x8_t a_ = (a); \ -- int8x16_t result = vcombine_s8 \ -- (a_, vcreate_s8 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgezq_s64 (int64x2_t __a) -+{ -+ return (uint64x2_t) (__a >= __AARCH64_INT64_C (0)); -+} - --#define vrshrn_high_n_s32(a, b, c) \ -- __extension__ \ -- ({ \ -- int32x4_t b_ = (b); \ -- int16x4_t a_ = (a); \ -- int16x8_t result = vcombine_s16 \ -- (a_, vcreate_s16 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+/* vcgez - scalar. */ - --#define vrshrn_high_n_s64(a, b, c) \ -- __extension__ \ -- ({ \ -- int64x2_t b_ = (b); \ -- int32x2_t a_ = (a); \ -- int32x4_t result = vcombine_s32 \ -- (a_, vcreate_s32 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgezs_f32 (float32_t __a) -+{ -+ return __a >= 0.0f ? -1 : 0; -+} - --#define vrshrn_high_n_u16(a, b, c) \ -- __extension__ \ -- ({ \ -- uint16x8_t b_ = (b); \ -- uint8x8_t a_ = (a); \ -- uint8x16_t result = vcombine_u8 \ -- (a_, vcreate_u8 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("rshrn2 %0.16b,%1.8h,#%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgezd_s64 (int64_t __a) -+{ -+ return __a >= 0 ? -1ll : 0ll; -+} - --#define vrshrn_high_n_u32(a, b, c) \ -- __extension__ \ -- ({ \ -- uint32x4_t b_ = (b); \ -- uint16x4_t a_ = (a); \ -- uint16x8_t result = vcombine_u16 \ -- (a_, vcreate_u16 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("rshrn2 %0.8h,%1.4s,#%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgezd_f64 (float64_t __a) -+{ -+ return __a >= 0.0 ? -1ll : 0ll; -+} - --#define vrshrn_high_n_u64(a, b, c) \ -- __extension__ \ -- ({ \ -- uint64x2_t b_ = (b); \ -- uint32x2_t a_ = (a); \ -- uint32x4_t result = vcombine_u32 \ -- (a_, vcreate_u32 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("rshrn2 %0.4s,%1.2d,#%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+/* vcgt - vector. */ - --#define vrshrn_n_s16(a, b) \ -- __extension__ \ -- ({ \ -- int16x8_t a_ = (a); \ -- int8x8_t result; \ -- __asm__ ("rshrn %0.8b,%1.8h,%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgt_f32 (float32x2_t __a, float32x2_t __b) -+{ -+ return (uint32x2_t) (__a > __b); -+} - --#define vrshrn_n_s32(a, b) \ -- __extension__ \ -- ({ \ -- int32x4_t a_ = (a); \ -- int16x4_t result; \ -- __asm__ ("rshrn %0.4h,%1.4s,%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgt_f64 (float64x1_t __a, float64x1_t __b) -+{ -+ return (uint64x1_t) (__a > __b); -+} - --#define vrshrn_n_s64(a, b) \ -- __extension__ \ -- ({ \ -- int64x2_t a_ = (a); \ -- int32x2_t result; \ -- __asm__ ("rshrn %0.2s,%1.2d,%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgt_s8 (int8x8_t __a, int8x8_t __b) -+{ -+ return (uint8x8_t) (__a > __b); -+} - --#define vrshrn_n_u16(a, b) \ -- __extension__ \ -- ({ \ -- uint16x8_t a_ = (a); \ -- uint8x8_t result; \ -- __asm__ ("rshrn %0.8b,%1.8h,%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgt_s16 (int16x4_t __a, int16x4_t __b) -+{ -+ return (uint16x4_t) (__a > __b); -+} - --#define vrshrn_n_u32(a, b) \ -- __extension__ \ -- ({ \ -- uint32x4_t a_ = (a); \ -- uint16x4_t result; \ -- __asm__ ("rshrn %0.4h,%1.4s,%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgt_s32 (int32x2_t __a, int32x2_t __b) -+{ -+ return (uint32x2_t) (__a > __b); -+} - --#define vrshrn_n_u64(a, b) \ -- __extension__ \ -- ({ \ -- uint64x2_t a_ = (a); \ -- uint32x2_t result; \ -- __asm__ ("rshrn %0.2s,%1.2d,%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgt_s64 (int64x1_t __a, int64x1_t __b) -+{ -+ return (uint64x1_t) (__a > __b); -+} - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vrsqrte_f32 (float32x2_t a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgt_u8 (uint8x8_t __a, uint8x8_t __b) - { -- float32x2_t result; -- __asm__ ("frsqrte %0.2s,%1.2s" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return (__a > __b); - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vrsqrte_f64 (float64x1_t a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgt_u16 (uint16x4_t __a, uint16x4_t __b) - { -- float64x1_t result; -- __asm__ ("frsqrte %d0,%d1" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return (__a > __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vrsqrte_u32 (uint32x2_t a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgt_u32 (uint32x2_t __a, uint32x2_t __b) - { -- uint32x2_t result; -- __asm__ ("ursqrte %0.2s,%1.2s" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return (__a > __b); - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vrsqrted_f64 (float64_t a) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgt_u64 (uint64x1_t __a, uint64x1_t __b) - { -- float64_t result; -- __asm__ ("frsqrte %d0,%d1" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return (__a > __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vrsqrteq_f32 (float32x4_t a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtq_f32 (float32x4_t __a, float32x4_t __b) - { -- float32x4_t result; -- __asm__ ("frsqrte %0.4s,%1.4s" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return (uint32x4_t) (__a > __b); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vrsqrteq_f64 (float64x2_t a) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtq_f64 (float64x2_t __a, float64x2_t __b) - { -- float64x2_t result; -- __asm__ ("frsqrte %0.2d,%1.2d" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return (uint64x2_t) (__a > __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vrsqrteq_u32 (uint32x4_t a) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtq_s8 (int8x16_t __a, int8x16_t __b) - { -- uint32x4_t result; -- __asm__ ("ursqrte %0.4s,%1.4s" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return (uint8x16_t) (__a > __b); - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vrsqrtes_f32 (float32_t a) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtq_s16 (int16x8_t __a, int16x8_t __b) - { -- float32_t result; -- __asm__ ("frsqrte %s0,%s1" -- : "=w"(result) -- : "w"(a) -- : /* No clobbers */); -- return result; -+ return (uint16x8_t) (__a > __b); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vrsqrts_f32 (float32x2_t a, float32x2_t b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtq_s32 (int32x4_t __a, int32x4_t __b) - { -- float32x2_t result; -- __asm__ ("frsqrts %0.2s,%1.2s,%2.2s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint32x4_t) (__a > __b); - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vrsqrtsd_f64 (float64_t a, float64_t b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtq_s64 (int64x2_t __a, int64x2_t __b) - { -- float64_t result; -- __asm__ ("frsqrts %d0,%d1,%d2" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint64x2_t) (__a > __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vrsqrtsq_f32 (float32x4_t a, float32x4_t b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- float32x4_t result; -- __asm__ ("frsqrts %0.4s,%1.4s,%2.4s" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (__a > __b); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vrsqrtsq_f64 (float64x2_t a, float64x2_t b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- float64x2_t result; -- __asm__ ("frsqrts %0.2d,%1.2d,%2.2d" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (__a > __b); - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vrsqrtss_f32 (float32_t a, float32_t b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- float32_t result; -- __asm__ ("frsqrts %s0,%s1,%s2" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (__a > __b); - } - --#define vshrn_high_n_s16(a, b, c) \ -- __extension__ \ -- ({ \ -- int16x8_t b_ = (b); \ -- int8x8_t a_ = (a); \ -- int8x16_t result = vcombine_s8 \ -- (a_, vcreate_s8 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("shrn2 %0.16b,%1.8h,#%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vshrn_high_n_s32(a, b, c) \ -- __extension__ \ -- ({ \ -- int32x4_t b_ = (b); \ -- int16x4_t a_ = (a); \ -- int16x8_t result = vcombine_s16 \ -- (a_, vcreate_s16 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("shrn2 %0.8h,%1.4s,#%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vshrn_high_n_s64(a, b, c) \ -- __extension__ \ -- ({ \ -- int64x2_t b_ = (b); \ -- int32x2_t a_ = (a); \ -- int32x4_t result = vcombine_s32 \ -- (a_, vcreate_s32 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("shrn2 %0.4s,%1.2d,#%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vshrn_high_n_u16(a, b, c) \ -- __extension__ \ -- ({ \ -- uint16x8_t b_ = (b); \ -- uint8x8_t a_ = (a); \ -- uint8x16_t result = vcombine_u8 \ -- (a_, vcreate_u8 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("shrn2 %0.16b,%1.8h,#%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vshrn_high_n_u32(a, b, c) \ -- __extension__ \ -- ({ \ -- uint32x4_t b_ = (b); \ -- uint16x4_t a_ = (a); \ -- uint16x8_t result = vcombine_u16 \ -- (a_, vcreate_u16 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("shrn2 %0.8h,%1.4s,#%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -- --#define vshrn_high_n_u64(a, b, c) \ -- __extension__ \ -- ({ \ -- uint64x2_t b_ = (b); \ -- uint32x2_t a_ = (a); \ -- uint32x4_t result = vcombine_u32 \ -- (a_, vcreate_u32 \ -- (__AARCH64_UINT64_C (0x0))); \ -- __asm__ ("shrn2 %0.4s,%1.2d,#%2" \ -- : "+w"(result) \ -- : "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtq_u64 (uint64x2_t __a, uint64x2_t __b) -+{ -+ return (__a > __b); -+} - --#define vshrn_n_s16(a, b) \ -- __extension__ \ -- ({ \ -- int16x8_t a_ = (a); \ -- int8x8_t result; \ -- __asm__ ("shrn %0.8b,%1.8h,%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+/* vcgt - scalar. */ - --#define vshrn_n_s32(a, b) \ -- __extension__ \ -- ({ \ -- int32x4_t a_ = (a); \ -- int16x4_t result; \ -- __asm__ ("shrn %0.4h,%1.4s,%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgts_f32 (float32_t __a, float32_t __b) -+{ -+ return __a > __b ? -1 : 0; -+} - --#define vshrn_n_s64(a, b) \ -- __extension__ \ -- ({ \ -- int64x2_t a_ = (a); \ -- int32x2_t result; \ -- __asm__ ("shrn %0.2s,%1.2d,%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtd_s64 (int64_t __a, int64_t __b) -+{ -+ return __a > __b ? -1ll : 0ll; -+} - --#define vshrn_n_u16(a, b) \ -- __extension__ \ -- ({ \ -- uint16x8_t a_ = (a); \ -- uint8x8_t result; \ -- __asm__ ("shrn %0.8b,%1.8h,%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtd_u64 (uint64_t __a, uint64_t __b) -+{ -+ return __a > __b ? -1ll : 0ll; -+} - --#define vshrn_n_u32(a, b) \ -- __extension__ \ -- ({ \ -- uint32x4_t a_ = (a); \ -- uint16x4_t result; \ -- __asm__ ("shrn %0.4h,%1.4s,%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtd_f64 (float64_t __a, float64_t __b) -+{ -+ return __a > __b ? -1ll : 0ll; -+} - --#define vshrn_n_u64(a, b) \ -- __extension__ \ -- ({ \ -- uint64x2_t a_ = (a); \ -- uint32x2_t result; \ -- __asm__ ("shrn %0.2s,%1.2d,%2" \ -- : "=w"(result) \ -- : "w"(a_), "i"(b) \ -- : /* No clobbers */); \ -- result; \ -- }) -+/* vcgtz - vector. */ - --#define vsli_n_p8(a, b, c) \ -- __extension__ \ -- ({ \ -- poly8x8_t b_ = (b); \ -- poly8x8_t a_ = (a); \ -- poly8x8_t result; \ -- __asm__ ("sli %0.8b,%2.8b,%3" \ -- : "=w"(result) \ -- : "0"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtz_f32 (float32x2_t __a) -+{ -+ return (uint32x2_t) (__a > 0.0f); -+} - --#define vsli_n_p16(a, b, c) \ -- __extension__ \ -- ({ \ -- poly16x4_t b_ = (b); \ -- poly16x4_t a_ = (a); \ -- poly16x4_t result; \ -- __asm__ ("sli %0.4h,%2.4h,%3" \ -- : "=w"(result) \ -- : "0"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtz_f64 (float64x1_t __a) -+{ -+ return (uint64x1_t) (__a > (float64x1_t) {0.0}); -+} - --#define vsliq_n_p8(a, b, c) \ -- __extension__ \ -- ({ \ -- poly8x16_t b_ = (b); \ -- poly8x16_t a_ = (a); \ -- poly8x16_t result; \ -- __asm__ ("sli %0.16b,%2.16b,%3" \ -- : "=w"(result) \ -- : "0"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtz_s8 (int8x8_t __a) -+{ -+ return (uint8x8_t) (__a > 0); -+} - --#define vsliq_n_p16(a, b, c) \ -- __extension__ \ -- ({ \ -- poly16x8_t b_ = (b); \ -- poly16x8_t a_ = (a); \ -- poly16x8_t result; \ -- __asm__ ("sli %0.8h,%2.8h,%3" \ -- : "=w"(result) \ -- : "0"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtz_s16 (int16x4_t __a) -+{ -+ return (uint16x4_t) (__a > 0); -+} - --#define vsri_n_p8(a, b, c) \ -- __extension__ \ -- ({ \ -- poly8x8_t b_ = (b); \ -- poly8x8_t a_ = (a); \ -- poly8x8_t result; \ -- __asm__ ("sri %0.8b,%2.8b,%3" \ -- : "=w"(result) \ -- : "0"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtz_s32 (int32x2_t __a) -+{ -+ return (uint32x2_t) (__a > 0); -+} - --#define vsri_n_p16(a, b, c) \ -- __extension__ \ -- ({ \ -- poly16x4_t b_ = (b); \ -- poly16x4_t a_ = (a); \ -- poly16x4_t result; \ -- __asm__ ("sri %0.4h,%2.4h,%3" \ -- : "=w"(result) \ -- : "0"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtz_s64 (int64x1_t __a) -+{ -+ return (uint64x1_t) (__a > __AARCH64_INT64_C (0)); -+} - --#define vsriq_n_p8(a, b, c) \ -- __extension__ \ -- ({ \ -- poly8x16_t b_ = (b); \ -- poly8x16_t a_ = (a); \ -- poly8x16_t result; \ -- __asm__ ("sri %0.16b,%2.16b,%3" \ -- : "=w"(result) \ -- : "0"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtzq_f32 (float32x4_t __a) -+{ -+ return (uint32x4_t) (__a > 0.0f); -+} - --#define vsriq_n_p16(a, b, c) \ -- __extension__ \ -- ({ \ -- poly16x8_t b_ = (b); \ -- poly16x8_t a_ = (a); \ -- poly16x8_t result; \ -- __asm__ ("sri %0.8h,%2.8h,%3" \ -- : "=w"(result) \ -- : "0"(a_), "w"(b_), "i"(c) \ -- : /* No clobbers */); \ -- result; \ -- }) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtzq_f64 (float64x2_t __a) -+{ -+ return (uint64x2_t) (__a > 0.0); -+} - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vtst_p8 (poly8x8_t a, poly8x8_t b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtzq_s8 (int8x16_t __a) - { -- uint8x8_t result; -- __asm__ ("cmtst %0.8b, %1.8b, %2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint8x16_t) (__a > 0); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vtst_p16 (poly16x4_t a, poly16x4_t b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtzq_s16 (int16x8_t __a) - { -- uint16x4_t result; -- __asm__ ("cmtst %0.4h, %1.4h, %2.4h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint16x8_t) (__a > 0); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vtstq_p8 (poly8x16_t a, poly8x16_t b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtzq_s32 (int32x4_t __a) - { -- uint8x16_t result; -- __asm__ ("cmtst %0.16b, %1.16b, %2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint32x4_t) (__a > 0); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vtstq_p16 (poly16x8_t a, poly16x8_t b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtzq_s64 (int64x2_t __a) - { -- uint16x8_t result; -- __asm__ ("cmtst %0.8h, %1.8h, %2.8h" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint64x2_t) (__a > __AARCH64_INT64_C (0)); - } - --/* End of temporary inline asm implementations. */ -+/* vcgtz - scalar. */ - --/* Start of temporary inline asm for vldn, vstn and friends. */ -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtzs_f32 (float32_t __a) -+{ -+ return __a > 0.0f ? -1 : 0; -+} - --/* Create struct element types for duplicating loads. -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtzd_s64 (int64_t __a) -+{ -+ return __a > 0 ? -1ll : 0ll; -+} - -- Create 2 element structures of: -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtzd_f64 (float64_t __a) -+{ -+ return __a > 0.0 ? -1ll : 0ll; -+} - -- +------+----+----+----+----+ -- | | 8 | 16 | 32 | 64 | -- +------+----+----+----+----+ -- |int | Y | Y | N | N | -- +------+----+----+----+----+ -- |uint | Y | Y | N | N | -- +------+----+----+----+----+ -- |float | - | Y | N | N | -- +------+----+----+----+----+ -- |poly | Y | Y | - | - | -- +------+----+----+----+----+ -+/* vcle - vector. */ - -- Create 3 element structures of: -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcle_f32 (float32x2_t __a, float32x2_t __b) -+{ -+ return (uint32x2_t) (__a <= __b); -+} - -- +------+----+----+----+----+ -- | | 8 | 16 | 32 | 64 | -- +------+----+----+----+----+ -- |int | Y | Y | Y | Y | -- +------+----+----+----+----+ -- |uint | Y | Y | Y | Y | -- +------+----+----+----+----+ -- |float | - | Y | Y | Y | -- +------+----+----+----+----+ -- |poly | Y | Y | - | - | -- +------+----+----+----+----+ -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcle_f64 (float64x1_t __a, float64x1_t __b) -+{ -+ return (uint64x1_t) (__a <= __b); -+} - -- Create 4 element structures of: -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcle_s8 (int8x8_t __a, int8x8_t __b) -+{ -+ return (uint8x8_t) (__a <= __b); -+} - -- +------+----+----+----+----+ -- | | 8 | 16 | 32 | 64 | -- +------+----+----+----+----+ -- |int | Y | N | N | Y | -- +------+----+----+----+----+ -- |uint | Y | N | N | Y | -- +------+----+----+----+----+ -- |float | - | N | N | Y | -- +------+----+----+----+----+ -- |poly | Y | N | - | - | -- +------+----+----+----+----+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcle_s16 (int16x4_t __a, int16x4_t __b) -+{ -+ return (uint16x4_t) (__a <= __b); -+} - -- This is required for casting memory reference. */ --#define __STRUCTN(t, sz, nelem) \ -- typedef struct t ## sz ## x ## nelem ## _t { \ -- t ## sz ## _t val[nelem]; \ -- } t ## sz ## x ## nelem ## _t; -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcle_s32 (int32x2_t __a, int32x2_t __b) -+{ -+ return (uint32x2_t) (__a <= __b); -+} - --/* 2-element structs. */ --__STRUCTN (int, 8, 2) --__STRUCTN (int, 16, 2) --__STRUCTN (uint, 8, 2) --__STRUCTN (uint, 16, 2) --__STRUCTN (float, 16, 2) --__STRUCTN (poly, 8, 2) --__STRUCTN (poly, 16, 2) --/* 3-element structs. */ --__STRUCTN (int, 8, 3) --__STRUCTN (int, 16, 3) --__STRUCTN (int, 32, 3) --__STRUCTN (int, 64, 3) --__STRUCTN (uint, 8, 3) --__STRUCTN (uint, 16, 3) --__STRUCTN (uint, 32, 3) --__STRUCTN (uint, 64, 3) --__STRUCTN (float, 16, 3) --__STRUCTN (float, 32, 3) --__STRUCTN (float, 64, 3) --__STRUCTN (poly, 8, 3) --__STRUCTN (poly, 16, 3) --/* 4-element structs. */ --__STRUCTN (int, 8, 4) --__STRUCTN (int, 64, 4) --__STRUCTN (uint, 8, 4) --__STRUCTN (uint, 64, 4) --__STRUCTN (poly, 8, 4) --__STRUCTN (float, 64, 4) --#undef __STRUCTN -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcle_s64 (int64x1_t __a, int64x1_t __b) -+{ -+ return (uint64x1_t) (__a <= __b); -+} - -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcle_u8 (uint8x8_t __a, uint8x8_t __b) -+{ -+ return (__a <= __b); -+} - --#define __ST2_LANE_FUNC(intype, largetype, ptrtype, mode, \ -- qmode, ptr_mode, funcsuffix, signedtype) \ --__extension__ static __inline void \ --__attribute__ ((__always_inline__)) \ --vst2_lane_ ## funcsuffix (ptrtype *__ptr, \ -- intype __b, const int __c) \ --{ \ -- __builtin_aarch64_simd_oi __o; \ -- largetype __temp; \ -- __temp.val[0] \ -- = vcombine_##funcsuffix (__b.val[0], \ -- vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -- __temp.val[1] \ -- = vcombine_##funcsuffix (__b.val[1], \ -- vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -- __o = __builtin_aarch64_set_qregoi##qmode (__o, \ -- (signedtype) __temp.val[0], 0); \ -- __o = __builtin_aarch64_set_qregoi##qmode (__o, \ -- (signedtype) __temp.val[1], 1); \ -- __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -- __ptr, __o, __c); \ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcle_u16 (uint16x4_t __a, uint16x4_t __b) -+{ -+ return (__a <= __b); - } - --__ST2_LANE_FUNC (float16x4x2_t, float16x8x2_t, float16_t, v4hf, v8hf, hf, f16, -- float16x8_t) --__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v2sf, v4sf, sf, f32, -- float32x4_t) --__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, df, v2df, df, f64, -- float64x2_t) --__ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8, -- int8x16_t) --__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi, p16, -- int16x8_t) --__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8, -- int8x16_t) --__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16, -- int16x8_t) --__ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32, -- int32x4_t) --__ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, di, v2di, di, s64, -- int64x2_t) --__ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8, -- int8x16_t) --__ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi, u16, -- int16x8_t) --__ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si, u32, -- int32x4_t) --__ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, di, v2di, di, u64, -- int64x2_t) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcle_u32 (uint32x2_t __a, uint32x2_t __b) -+{ -+ return (__a <= __b); -+} - --#undef __ST2_LANE_FUNC --#define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ --__extension__ static __inline void \ --__attribute__ ((__always_inline__)) \ --vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \ -- intype __b, const int __c) \ --{ \ -- union { intype __i; \ -- __builtin_aarch64_simd_oi __o; } __temp = { __b }; \ -- __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -- __ptr, __temp.__o, __c); \ -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcle_u64 (uint64x1_t __a, uint64x1_t __b) -+{ -+ return (__a <= __b); - } - --__ST2_LANE_FUNC (float16x8x2_t, float16_t, v8hf, hf, f16) --__ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32) --__ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64) --__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8) --__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16) --__ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8) --__ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16) --__ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32) --__ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64) --__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8) --__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16) --__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32) --__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcleq_f32 (float32x4_t __a, float32x4_t __b) -+{ -+ return (uint32x4_t) (__a <= __b); -+} - --#define __ST3_LANE_FUNC(intype, largetype, ptrtype, mode, \ -- qmode, ptr_mode, funcsuffix, signedtype) \ --__extension__ static __inline void \ --__attribute__ ((__always_inline__)) \ --vst3_lane_ ## funcsuffix (ptrtype *__ptr, \ -- intype __b, const int __c) \ --{ \ -- __builtin_aarch64_simd_ci __o; \ -- largetype __temp; \ -- __temp.val[0] \ -- = vcombine_##funcsuffix (__b.val[0], \ -- vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -- __temp.val[1] \ -- = vcombine_##funcsuffix (__b.val[1], \ -- vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -- __temp.val[2] \ -- = vcombine_##funcsuffix (__b.val[2], \ -- vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -- __o = __builtin_aarch64_set_qregci##qmode (__o, \ -- (signedtype) __temp.val[0], 0); \ -- __o = __builtin_aarch64_set_qregci##qmode (__o, \ -- (signedtype) __temp.val[1], 1); \ -- __o = __builtin_aarch64_set_qregci##qmode (__o, \ -- (signedtype) __temp.val[2], 2); \ -- __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -- __ptr, __o, __c); \ -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcleq_f64 (float64x2_t __a, float64x2_t __b) -+{ -+ return (uint64x2_t) (__a <= __b); - } - --__ST3_LANE_FUNC (float16x4x3_t, float16x8x3_t, float16_t, v4hf, v8hf, hf, f16, -- float16x8_t) --__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v2sf, v4sf, sf, f32, -- float32x4_t) --__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, df, v2df, df, f64, -- float64x2_t) --__ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8, -- int8x16_t) --__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi, p16, -- int16x8_t) --__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8, -- int8x16_t) --__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16, -- int16x8_t) --__ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v2si, v4si, si, s32, -- int32x4_t) --__ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, di, v2di, di, s64, -- int64x2_t) --__ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8, -- int8x16_t) --__ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi, u16, -- int16x8_t) --__ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v2si, v4si, si, u32, -- int32x4_t) --__ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, di, v2di, di, u64, -- int64x2_t) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcleq_s8 (int8x16_t __a, int8x16_t __b) -+{ -+ return (uint8x16_t) (__a <= __b); -+} - --#undef __ST3_LANE_FUNC --#define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ --__extension__ static __inline void \ --__attribute__ ((__always_inline__)) \ --vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \ -- intype __b, const int __c) \ --{ \ -- union { intype __i; \ -- __builtin_aarch64_simd_ci __o; } __temp = { __b }; \ -- __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -- __ptr, __temp.__o, __c); \ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcleq_s16 (int16x8_t __a, int16x8_t __b) -+{ -+ return (uint16x8_t) (__a <= __b); - } - --__ST3_LANE_FUNC (float16x8x3_t, float16_t, v8hf, hf, f16) --__ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32) --__ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64) --__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8) --__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16) --__ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8) --__ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16) --__ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32) --__ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64) --__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8) --__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16) --__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32) --__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcleq_s32 (int32x4_t __a, int32x4_t __b) -+{ -+ return (uint32x4_t) (__a <= __b); -+} - --#define __ST4_LANE_FUNC(intype, largetype, ptrtype, mode, \ -- qmode, ptr_mode, funcsuffix, signedtype) \ --__extension__ static __inline void \ --__attribute__ ((__always_inline__)) \ --vst4_lane_ ## funcsuffix (ptrtype *__ptr, \ -- intype __b, const int __c) \ --{ \ -- __builtin_aarch64_simd_xi __o; \ -- largetype __temp; \ -- __temp.val[0] \ -- = vcombine_##funcsuffix (__b.val[0], \ -- vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -- __temp.val[1] \ -- = vcombine_##funcsuffix (__b.val[1], \ -- vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -- __temp.val[2] \ -- = vcombine_##funcsuffix (__b.val[2], \ -- vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -- __temp.val[3] \ -- = vcombine_##funcsuffix (__b.val[3], \ -- vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ -- __o = __builtin_aarch64_set_qregxi##qmode (__o, \ -- (signedtype) __temp.val[0], 0); \ -- __o = __builtin_aarch64_set_qregxi##qmode (__o, \ -- (signedtype) __temp.val[1], 1); \ -- __o = __builtin_aarch64_set_qregxi##qmode (__o, \ -- (signedtype) __temp.val[2], 2); \ -- __o = __builtin_aarch64_set_qregxi##qmode (__o, \ -- (signedtype) __temp.val[3], 3); \ -- __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -- __ptr, __o, __c); \ -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcleq_s64 (int64x2_t __a, int64x2_t __b) -+{ -+ return (uint64x2_t) (__a <= __b); - } - --__ST4_LANE_FUNC (float16x4x4_t, float16x8x4_t, float16_t, v4hf, v8hf, hf, f16, -- float16x8_t) --__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v2sf, v4sf, sf, f32, -- float32x4_t) --__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, df, v2df, df, f64, -- float64x2_t) --__ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8, -- int8x16_t) --__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi, p16, -- int16x8_t) --__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8, -- int8x16_t) --__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16, -- int16x8_t) --__ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v2si, v4si, si, s32, -- int32x4_t) --__ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, di, v2di, di, s64, -- int64x2_t) --__ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8, -- int8x16_t) --__ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi, u16, -- int16x8_t) --__ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v2si, v4si, si, u32, -- int32x4_t) --__ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, di, v2di, di, u64, -- int64x2_t) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcleq_u8 (uint8x16_t __a, uint8x16_t __b) -+{ -+ return (__a <= __b); -+} - --#undef __ST4_LANE_FUNC --#define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ --__extension__ static __inline void \ --__attribute__ ((__always_inline__)) \ --vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \ -- intype __b, const int __c) \ --{ \ -- union { intype __i; \ -- __builtin_aarch64_simd_xi __o; } __temp = { __b }; \ -- __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ -- __ptr, __temp.__o, __c); \ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcleq_u16 (uint16x8_t __a, uint16x8_t __b) -+{ -+ return (__a <= __b); -+} -+ -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcleq_u32 (uint32x4_t __a, uint32x4_t __b) -+{ -+ return (__a <= __b); -+} -+ -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcleq_u64 (uint64x2_t __a, uint64x2_t __b) -+{ -+ return (__a <= __b); - } - --__ST4_LANE_FUNC (float16x8x4_t, float16_t, v8hf, hf, f16) --__ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32) --__ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64) --__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8) --__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16) --__ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8) --__ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16) --__ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32) --__ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64) --__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8) --__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16) --__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32) --__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64) -+/* vcle - scalar. */ - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vaddlv_s32 (int32x2_t a) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcles_f32 (float32_t __a, float32_t __b) - { -- int64_t result; -- __asm__ ("saddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : ); -- return result; -+ return __a <= __b ? -1 : 0; - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vaddlv_u32 (uint32x2_t a) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcled_s64 (int64_t __a, int64_t __b) - { -- uint64_t result; -- __asm__ ("uaddlp %0.1d, %1.2s" : "=w"(result) : "w"(a) : ); -- return result; -+ return __a <= __b ? -1ll : 0ll; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcled_u64 (uint64_t __a, uint64_t __b) - { -- return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c); -+ return __a <= __b ? -1ll : 0ll; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcled_f64 (float64_t __a, float64_t __b) - { -- return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c); -+ return __a <= __b ? -1ll : 0ll; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c) -+/* vclez - vector. */ -+ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclez_f32 (float32x2_t __a) - { -- return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c); -+ return (uint32x2_t) (__a <= 0.0f); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclez_f64 (float64x1_t __a) - { -- return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c); -+ return (uint64x1_t) (__a <= (float64x1_t) {0.0}); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclez_s8 (int8x8_t __a) - { -- return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c); -+ return (uint8x8_t) (__a <= 0); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclez_s16 (int16x4_t __a) - { -- return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c); -+ return (uint16x4_t) (__a <= 0); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclez_s32 (int32x2_t __a) - { -- return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c); -+ return (uint32x2_t) (__a <= 0); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclez_s64 (int64x1_t __a) - { -- return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c); -+ return (uint64x1_t) (__a <= __AARCH64_INT64_C (0)); - } - --/* Table intrinsics. */ -- --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vqtbl1_p8 (poly8x16_t a, uint8x8_t b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclezq_f32 (float32x4_t __a) - { -- poly8x8_t result; -- __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint32x4_t) (__a <= 0.0f); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vqtbl1_s8 (int8x16_t a, uint8x8_t b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclezq_f64 (float64x2_t __a) - { -- int8x8_t result; -- __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint64x2_t) (__a <= 0.0); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vqtbl1_u8 (uint8x16_t a, uint8x8_t b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclezq_s8 (int8x16_t __a) - { -- uint8x8_t result; -- __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint8x16_t) (__a <= 0); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vqtbl1q_p8 (poly8x16_t a, uint8x16_t b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclezq_s16 (int16x8_t __a) - { -- poly8x16_t result; -- __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint16x8_t) (__a <= 0); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vqtbl1q_s8 (int8x16_t a, uint8x16_t b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclezq_s32 (int32x4_t __a) - { -- int8x16_t result; -- __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint32x4_t) (__a <= 0); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vqtbl1q_u8 (uint8x16_t a, uint8x16_t b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclezq_s64 (int64x2_t __a) - { -- uint8x16_t result; -- __asm__ ("tbl %0.16b, {%1.16b}, %2.16b" -- : "=w"(result) -- : "w"(a), "w"(b) -- : /* No clobbers */); -- return result; -+ return (uint64x2_t) (__a <= __AARCH64_INT64_C (0)); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vqtbx1_s8 (int8x8_t r, int8x16_t tab, uint8x8_t idx) -+/* vclez - scalar. */ -+ -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclezs_f32 (float32_t __a) - { -- int8x8_t result = r; -- __asm__ ("tbx %0.8b,{%1.16b},%2.8b" -- : "+w"(result) -- : "w"(tab), "w"(idx) -- : /* No clobbers */); -- return result; -+ return __a <= 0.0f ? -1 : 0; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vqtbx1_u8 (uint8x8_t r, uint8x16_t tab, uint8x8_t idx) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclezd_s64 (int64_t __a) - { -- uint8x8_t result = r; -- __asm__ ("tbx %0.8b,{%1.16b},%2.8b" -- : "+w"(result) -- : "w"(tab), "w"(idx) -- : /* No clobbers */); -- return result; -+ return __a <= 0 ? -1ll : 0ll; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vqtbx1_p8 (poly8x8_t r, poly8x16_t tab, uint8x8_t idx) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclezd_f64 (float64_t __a) - { -- poly8x8_t result = r; -- __asm__ ("tbx %0.8b,{%1.16b},%2.8b" -- : "+w"(result) -- : "w"(tab), "w"(idx) -- : /* No clobbers */); -- return result; -+ return __a <= 0.0 ? -1ll : 0ll; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vqtbx1q_s8 (int8x16_t r, int8x16_t tab, uint8x16_t idx) -+/* vclt - vector. */ -+ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclt_f32 (float32x2_t __a, float32x2_t __b) - { -- int8x16_t result = r; -- __asm__ ("tbx %0.16b,{%1.16b},%2.16b" -- : "+w"(result) -- : "w"(tab), "w"(idx) -- : /* No clobbers */); -- return result; -+ return (uint32x2_t) (__a < __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vqtbx1q_u8 (uint8x16_t r, uint8x16_t tab, uint8x16_t idx) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclt_f64 (float64x1_t __a, float64x1_t __b) - { -- uint8x16_t result = r; -- __asm__ ("tbx %0.16b,{%1.16b},%2.16b" -- : "+w"(result) -- : "w"(tab), "w"(idx) -- : /* No clobbers */); -- return result; -+ return (uint64x1_t) (__a < __b); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vqtbx1q_p8 (poly8x16_t r, poly8x16_t tab, uint8x16_t idx) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclt_s8 (int8x8_t __a, int8x8_t __b) - { -- poly8x16_t result = r; -- __asm__ ("tbx %0.16b,{%1.16b},%2.16b" -- : "+w"(result) -- : "w"(tab), "w"(idx) -- : /* No clobbers */); -- return result; -+ return (uint8x8_t) (__a < __b); - } - --/* V7 legacy table intrinsics. */ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclt_s16 (int16x4_t __a, int16x4_t __b) -+{ -+ return (uint16x4_t) (__a < __b); -+} - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vtbl1_s8 (int8x8_t tab, int8x8_t idx) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclt_s32 (int32x2_t __a, int32x2_t __b) - { -- int8x8_t result; -- int8x16_t temp = vcombine_s8 (tab, vcreate_s8 (__AARCH64_UINT64_C (0x0))); -- __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -- : "=w"(result) -- : "w"(temp), "w"(idx) -- : /* No clobbers */); -- return result; -+ return (uint32x2_t) (__a < __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vtbl1_u8 (uint8x8_t tab, uint8x8_t idx) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclt_s64 (int64x1_t __a, int64x1_t __b) - { -- uint8x8_t result; -- uint8x16_t temp = vcombine_u8 (tab, vcreate_u8 (__AARCH64_UINT64_C (0x0))); -- __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -- : "=w"(result) -- : "w"(temp), "w"(idx) -- : /* No clobbers */); -- return result; -+ return (uint64x1_t) (__a < __b); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vtbl1_p8 (poly8x8_t tab, uint8x8_t idx) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclt_u8 (uint8x8_t __a, uint8x8_t __b) - { -- poly8x8_t result; -- poly8x16_t temp = vcombine_p8 (tab, vcreate_p8 (__AARCH64_UINT64_C (0x0))); -- __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -- : "=w"(result) -- : "w"(temp), "w"(idx) -- : /* No clobbers */); -- return result; -+ return (__a < __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vtbl2_s8 (int8x8x2_t tab, int8x8_t idx) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclt_u16 (uint16x4_t __a, uint16x4_t __b) - { -- int8x8_t result; -- int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]); -- __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -- : "=w"(result) -- : "w"(temp), "w"(idx) -- : /* No clobbers */); -- return result; -+ return (__a < __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vtbl2_u8 (uint8x8x2_t tab, uint8x8_t idx) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclt_u32 (uint32x2_t __a, uint32x2_t __b) - { -- uint8x8_t result; -- uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]); -- __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -- : "=w"(result) -- : "w"(temp), "w"(idx) -- : /* No clobbers */); -- return result; -+ return (__a < __b); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vtbl2_p8 (poly8x8x2_t tab, uint8x8_t idx) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclt_u64 (uint64x1_t __a, uint64x1_t __b) - { -- poly8x8_t result; -- poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]); -- __asm__ ("tbl %0.8b, {%1.16b}, %2.8b" -- : "=w"(result) -- : "w"(temp), "w"(idx) -- : /* No clobbers */); -- return result; -+ return (__a < __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vtbl3_s8 (int8x8x3_t tab, int8x8_t idx) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltq_f32 (float32x4_t __a, float32x4_t __b) - { -- int8x8_t result; -- int8x16x2_t temp; -- __builtin_aarch64_simd_oi __o; -- temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); -- temp.val[1] = vcombine_s8 (tab.val[2], vcreate_s8 (__AARCH64_UINT64_C (0x0))); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[1], 1); -- result = __builtin_aarch64_tbl3v8qi (__o, idx); -- return result; -+ return (uint32x4_t) (__a < __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vtbl3_u8 (uint8x8x3_t tab, uint8x8_t idx) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltq_f64 (float64x2_t __a, float64x2_t __b) - { -- uint8x8_t result; -- uint8x16x2_t temp; -- __builtin_aarch64_simd_oi __o; -- temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); -- temp.val[1] = vcombine_u8 (tab.val[2], vcreate_u8 (__AARCH64_UINT64_C (0x0))); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[1], 1); -- result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); -- return result; -+ return (uint64x2_t) (__a < __b); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vtbl3_p8 (poly8x8x3_t tab, uint8x8_t idx) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltq_s8 (int8x16_t __a, int8x16_t __b) - { -- poly8x8_t result; -- poly8x16x2_t temp; -- __builtin_aarch64_simd_oi __o; -- temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); -- temp.val[1] = vcombine_p8 (tab.val[2], vcreate_p8 (__AARCH64_UINT64_C (0x0))); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[1], 1); -- result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); -- return result; -+ return (uint8x16_t) (__a < __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vtbl4_s8 (int8x8x4_t tab, int8x8_t idx) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltq_s16 (int16x8_t __a, int16x8_t __b) - { -- int8x8_t result; -- int8x16x2_t temp; -- __builtin_aarch64_simd_oi __o; -- temp.val[0] = vcombine_s8 (tab.val[0], tab.val[1]); -- temp.val[1] = vcombine_s8 (tab.val[2], tab.val[3]); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[1], 1); -- result = __builtin_aarch64_tbl3v8qi (__o, idx); -- return result; -+ return (uint16x8_t) (__a < __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vtbl4_u8 (uint8x8x4_t tab, uint8x8_t idx) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltq_s32 (int32x4_t __a, int32x4_t __b) - { -- uint8x8_t result; -- uint8x16x2_t temp; -- __builtin_aarch64_simd_oi __o; -- temp.val[0] = vcombine_u8 (tab.val[0], tab.val[1]); -- temp.val[1] = vcombine_u8 (tab.val[2], tab.val[3]); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[1], 1); -- result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); -- return result; -+ return (uint32x4_t) (__a < __b); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vtbl4_p8 (poly8x8x4_t tab, uint8x8_t idx) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltq_s64 (int64x2_t __a, int64x2_t __b) - { -- poly8x8_t result; -- poly8x16x2_t temp; -- __builtin_aarch64_simd_oi __o; -- temp.val[0] = vcombine_p8 (tab.val[0], tab.val[1]); -- temp.val[1] = vcombine_p8 (tab.val[2], tab.val[3]); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[1], 1); -- result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); -- return result; -+ return (uint64x2_t) (__a < __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vtbx2_s8 (int8x8_t r, int8x8x2_t tab, int8x8_t idx) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- int8x8_t result = r; -- int8x16_t temp = vcombine_s8 (tab.val[0], tab.val[1]); -- __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" -- : "+w"(result) -- : "w"(temp), "w"(idx) -- : /* No clobbers */); -- return result; -+ return (__a < __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vtbx2_u8 (uint8x8_t r, uint8x8x2_t tab, uint8x8_t idx) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- uint8x8_t result = r; -- uint8x16_t temp = vcombine_u8 (tab.val[0], tab.val[1]); -- __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" -- : "+w"(result) -- : "w"(temp), "w"(idx) -- : /* No clobbers */); -- return result; -+ return (__a < __b); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vtbx2_p8 (poly8x8_t r, poly8x8x2_t tab, uint8x8_t idx) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- poly8x8_t result = r; -- poly8x16_t temp = vcombine_p8 (tab.val[0], tab.val[1]); -- __asm__ ("tbx %0.8b, {%1.16b}, %2.8b" -- : "+w"(result) -- : "w"(temp), "w"(idx) -- : /* No clobbers */); -- return result; -+ return (__a < __b); - } - --/* End of temporary inline asm. */ -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltq_u64 (uint64x2_t __a, uint64x2_t __b) -+{ -+ return (__a < __b); -+} - --/* Start of optimal implementations in approved order. */ -+/* vclt - scalar. */ - --/* vabs */ -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclts_f32 (float32_t __a, float32_t __b) -+{ -+ return __a < __b ? -1 : 0; -+} - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vabs_f32 (float32x2_t __a) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltd_s64 (int64_t __a, int64_t __b) - { -- return __builtin_aarch64_absv2sf (__a); -+ return __a < __b ? -1ll : 0ll; - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vabs_f64 (float64x1_t __a) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltd_u64 (uint64_t __a, uint64_t __b) - { -- return (float64x1_t) {__builtin_fabs (__a[0])}; -+ return __a < __b ? -1ll : 0ll; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vabs_s8 (int8x8_t __a) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltd_f64 (float64_t __a, float64_t __b) - { -- return __builtin_aarch64_absv8qi (__a); -+ return __a < __b ? -1ll : 0ll; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vabs_s16 (int16x4_t __a) -+/* vcltz - vector. */ -+ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltz_f32 (float32x2_t __a) - { -- return __builtin_aarch64_absv4hi (__a); -+ return (uint32x2_t) (__a < 0.0f); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vabs_s32 (int32x2_t __a) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltz_f64 (float64x1_t __a) - { -- return __builtin_aarch64_absv2si (__a); -+ return (uint64x1_t) (__a < (float64x1_t) {0.0}); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vabs_s64 (int64x1_t __a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltz_s8 (int8x8_t __a) - { -- return (int64x1_t) {__builtin_aarch64_absdi (__a[0])}; -+ return (uint8x8_t) (__a < 0); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vabsq_f32 (float32x4_t __a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltz_s16 (int16x4_t __a) - { -- return __builtin_aarch64_absv4sf (__a); -+ return (uint16x4_t) (__a < 0); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vabsq_f64 (float64x2_t __a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltz_s32 (int32x2_t __a) - { -- return __builtin_aarch64_absv2df (__a); -+ return (uint32x2_t) (__a < 0); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vabsq_s8 (int8x16_t __a) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltz_s64 (int64x1_t __a) - { -- return __builtin_aarch64_absv16qi (__a); -+ return (uint64x1_t) (__a < __AARCH64_INT64_C (0)); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vabsq_s16 (int16x8_t __a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltzq_f32 (float32x4_t __a) - { -- return __builtin_aarch64_absv8hi (__a); -+ return (uint32x4_t) (__a < 0.0f); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vabsq_s32 (int32x4_t __a) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltzq_f64 (float64x2_t __a) - { -- return __builtin_aarch64_absv4si (__a); -+ return (uint64x2_t) (__a < 0.0); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vabsq_s64 (int64x2_t __a) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltzq_s8 (int8x16_t __a) - { -- return __builtin_aarch64_absv2di (__a); -+ return (uint8x16_t) (__a < 0); - } - --/* vadd */ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltzq_s16 (int16x8_t __a) -+{ -+ return (uint16x8_t) (__a < 0); -+} - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vaddd_s64 (int64_t __a, int64_t __b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltzq_s32 (int32x4_t __a) - { -- return __a + __b; -+ return (uint32x4_t) (__a < 0); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vaddd_u64 (uint64_t __a, uint64_t __b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltzq_s64 (int64x2_t __a) - { -- return __a + __b; -+ return (uint64x2_t) (__a < __AARCH64_INT64_C (0)); - } - --/* vaddv */ -+/* vcltz - scalar. */ - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vaddv_s8 (int8x8_t __a) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltzs_f32 (float32_t __a) - { -- return __builtin_aarch64_reduc_plus_scal_v8qi (__a); -+ return __a < 0.0f ? -1 : 0; - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vaddv_s16 (int16x4_t __a) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltzd_s64 (int64_t __a) - { -- return __builtin_aarch64_reduc_plus_scal_v4hi (__a); -+ return __a < 0 ? -1ll : 0ll; - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vaddv_s32 (int32x2_t __a) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltzd_f64 (float64_t __a) - { -- return __builtin_aarch64_reduc_plus_scal_v2si (__a); -+ return __a < 0.0 ? -1ll : 0ll; - } - --__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) --vaddv_u8 (uint8x8_t __a) -+/* vcls. */ -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcls_s8 (int8x8_t __a) - { -- return (uint8_t) __builtin_aarch64_reduc_plus_scal_v8qi ((int8x8_t) __a); -+ return __builtin_aarch64_clrsbv8qi (__a); - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) --vaddv_u16 (uint16x4_t __a) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcls_s16 (int16x4_t __a) - { -- return (uint16_t) __builtin_aarch64_reduc_plus_scal_v4hi ((int16x4_t) __a); -+ return __builtin_aarch64_clrsbv4hi (__a); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vaddv_u32 (uint32x2_t __a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcls_s32 (int32x2_t __a) - { -- return (int32_t) __builtin_aarch64_reduc_plus_scal_v2si ((int32x2_t) __a); -+ return __builtin_aarch64_clrsbv2si (__a); - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vaddvq_s8 (int8x16_t __a) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclsq_s8 (int8x16_t __a) - { -- return __builtin_aarch64_reduc_plus_scal_v16qi (__a); -+ return __builtin_aarch64_clrsbv16qi (__a); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vaddvq_s16 (int16x8_t __a) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclsq_s16 (int16x8_t __a) - { -- return __builtin_aarch64_reduc_plus_scal_v8hi (__a); -+ return __builtin_aarch64_clrsbv8hi (__a); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vaddvq_s32 (int32x4_t __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclsq_s32 (int32x4_t __a) - { -- return __builtin_aarch64_reduc_plus_scal_v4si (__a); -+ return __builtin_aarch64_clrsbv4si (__a); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vaddvq_s64 (int64x2_t __a) -+/* vclz. */ -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclz_s8 (int8x8_t __a) - { -- return __builtin_aarch64_reduc_plus_scal_v2di (__a); -+ return __builtin_aarch64_clzv8qi (__a); - } - --__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) --vaddvq_u8 (uint8x16_t __a) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclz_s16 (int16x4_t __a) - { -- return (uint8_t) __builtin_aarch64_reduc_plus_scal_v16qi ((int8x16_t) __a); -+ return __builtin_aarch64_clzv4hi (__a); - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) --vaddvq_u16 (uint16x8_t __a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclz_s32 (int32x2_t __a) - { -- return (uint16_t) __builtin_aarch64_reduc_plus_scal_v8hi ((int16x8_t) __a); -+ return __builtin_aarch64_clzv2si (__a); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vaddvq_u32 (uint32x4_t __a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclz_u8 (uint8x8_t __a) - { -- return (uint32_t) __builtin_aarch64_reduc_plus_scal_v4si ((int32x4_t) __a); -+ return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vaddvq_u64 (uint64x2_t __a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclz_u16 (uint16x4_t __a) - { -- return (uint64_t) __builtin_aarch64_reduc_plus_scal_v2di ((int64x2_t) __a); -+ return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a); - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vaddv_f32 (float32x2_t __a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclz_u32 (uint32x2_t __a) - { -- return __builtin_aarch64_reduc_plus_scal_v2sf (__a); -+ return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a); - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vaddvq_f32 (float32x4_t __a) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclzq_s8 (int8x16_t __a) - { -- return __builtin_aarch64_reduc_plus_scal_v4sf (__a); -+ return __builtin_aarch64_clzv16qi (__a); - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vaddvq_f64 (float64x2_t __a) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclzq_s16 (int16x8_t __a) - { -- return __builtin_aarch64_reduc_plus_scal_v2df (__a); -+ return __builtin_aarch64_clzv8hi (__a); - } - --/* vbsl */ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclzq_s32 (int32x4_t __a) -+{ -+ return __builtin_aarch64_clzv4si (__a); -+} - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclzq_u8 (uint8x16_t __a) - { -- return __builtin_aarch64_simd_bslv2sf_suss (__a, __b, __c); -+ return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a); - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vbsl_f64 (uint64x1_t __a, float64x1_t __b, float64x1_t __c) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclzq_u16 (uint16x8_t __a) - { -- return (float64x1_t) -- { __builtin_aarch64_simd_bsldf_suss (__a[0], __b[0], __c[0]) }; -+ return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclzq_u32 (uint32x4_t __a) - { -- return __builtin_aarch64_simd_bslv8qi_pupp (__a, __b, __c); -+ return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c) -+/* vcnt. */ -+ -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcnt_p8 (poly8x8_t __a) - { -- return __builtin_aarch64_simd_bslv4hi_pupp (__a, __b, __c); -+ return (poly8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcnt_s8 (int8x8_t __a) - { -- return __builtin_aarch64_simd_bslv8qi_suss (__a, __b, __c); -+ return __builtin_aarch64_popcountv8qi (__a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcnt_u8 (uint8x8_t __a) - { -- return __builtin_aarch64_simd_bslv4hi_suss (__a, __b, __c); -+ return (uint8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcntq_p8 (poly8x16_t __a) - { -- return __builtin_aarch64_simd_bslv2si_suss (__a, __b, __c); -+ return (poly8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcntq_s8 (int8x16_t __a) - { -- return (int64x1_t) -- {__builtin_aarch64_simd_bsldi_suss (__a[0], __b[0], __c[0])}; -+ return __builtin_aarch64_popcountv16qi (__a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcntq_u8 (uint8x16_t __a) - { -- return __builtin_aarch64_simd_bslv8qi_uuuu (__a, __b, __c); -+ return (uint8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) -+/* vcopy_lane. */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_lane_f32 (float32x2_t __a, const int __lane1, -+ float32x2_t __b, const int __lane2) - { -- return __builtin_aarch64_simd_bslv4hi_uuuu (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_lane_f64 (float64x1_t __a, const int __lane1, -+ float64x1_t __b, const int __lane2) - { -- return __builtin_aarch64_simd_bslv2si_uuuu (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_lane_p8 (poly8x8_t __a, const int __lane1, -+ poly8x8_t __b, const int __lane2) - { -- return (uint64x1_t) -- {__builtin_aarch64_simd_bsldi_uuuu (__a[0], __b[0], __c[0])}; -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_lane_p16 (poly16x4_t __a, const int __lane1, -+ poly16x4_t __b, const int __lane2) - { -- return __builtin_aarch64_simd_bslv4sf_suss (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vbslq_f64 (uint64x2_t __a, float64x2_t __b, float64x2_t __c) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_lane_p64 (poly64x1_t __a, const int __lane1, -+ poly64x1_t __b, const int __lane2) - { -- return __builtin_aarch64_simd_bslv2df_suss (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_lane_s8 (int8x8_t __a, const int __lane1, -+ int8x8_t __b, const int __lane2) - { -- return __builtin_aarch64_simd_bslv16qi_pupp (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_lane_s16 (int16x4_t __a, const int __lane1, -+ int16x4_t __b, const int __lane2) - { -- return __builtin_aarch64_simd_bslv8hi_pupp (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_lane_s32 (int32x2_t __a, const int __lane1, -+ int32x2_t __b, const int __lane2) - { -- return __builtin_aarch64_simd_bslv16qi_suss (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_lane_s64 (int64x1_t __a, const int __lane1, -+ int64x1_t __b, const int __lane2) - { -- return __builtin_aarch64_simd_bslv8hi_suss (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_lane_u8 (uint8x8_t __a, const int __lane1, -+ uint8x8_t __b, const int __lane2) - { -- return __builtin_aarch64_simd_bslv4si_suss (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_lane_u16 (uint16x4_t __a, const int __lane1, -+ uint16x4_t __b, const int __lane2) - { -- return __builtin_aarch64_simd_bslv2di_suss (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_lane_u32 (uint32x2_t __a, const int __lane1, -+ uint32x2_t __b, const int __lane2) - { -- return __builtin_aarch64_simd_bslv16qi_uuuu (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_lane_u64 (uint64x1_t __a, const int __lane1, -+ uint64x1_t __b, const int __lane2) - { -- return __builtin_aarch64_simd_bslv8hi_uuuu (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) -+/* vcopy_laneq. */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_laneq_f32 (float32x2_t __a, const int __lane1, -+ float32x4_t __b, const int __lane2) - { -- return __builtin_aarch64_simd_bslv4si_uuuu (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_laneq_f64 (float64x1_t __a, const int __lane1, -+ float64x2_t __b, const int __lane2) - { -- return __builtin_aarch64_simd_bslv2di_uuuu (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --/* ARMv8.1 instrinsics. */ --#pragma GCC push_options --#pragma GCC target ("arch=armv8.1-a") -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_laneq_p8 (poly8x8_t __a, const int __lane1, -+ poly8x16_t __b, const int __lane2) -+{ -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); -+} - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_laneq_p16 (poly16x4_t __a, const int __lane1, -+ poly16x8_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlahv4hi (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_laneq_p64 (poly64x1_t __a, const int __lane1, -+ poly64x2_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlahv2si (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_laneq_s8 (int8x8_t __a, const int __lane1, -+ int8x16_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlahv8hi (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_laneq_s16 (int16x4_t __a, const int __lane1, -+ int16x8_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlahv4si (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_laneq_s32 (int32x2_t __a, const int __lane1, -+ int32x4_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlshv4hi (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_laneq_s64 (int64x1_t __a, const int __lane1, -+ int64x2_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlshv2si (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_laneq_u8 (uint8x8_t __a, const int __lane1, -+ uint8x16_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlshv8hi (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_laneq_u16 (uint16x4_t __a, const int __lane1, -+ uint16x8_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlshv4si (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vqrdmlah_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_laneq_u32 (uint32x2_t __a, const int __lane1, -+ uint32x4_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlah_laneqv4hi (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vqrdmlah_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopy_laneq_u64 (uint64x1_t __a, const int __lane1, -+ uint64x2_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlah_laneqv2si (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vqrdmlahq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d) -+/* vcopyq_lane. */ -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_lane_f32 (float32x4_t __a, const int __lane1, -+ float32x2_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlah_laneqv8hi (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqrdmlahq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_lane_f64 (float64x2_t __a, const int __lane1, -+ float64x1_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlah_laneqv4si (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vqrdmlsh_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_lane_p8 (poly8x16_t __a, const int __lane1, -+ poly8x8_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlsh_laneqv4hi (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vqrdmlsh_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_lane_p16 (poly16x8_t __a, const int __lane1, -+ poly16x4_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlsh_laneqv2si (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vqrdmlshq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_lane_p64 (poly64x2_t __a, const int __lane1, -+ poly64x1_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlsh_laneqv8hi (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqrdmlshq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_lane_s8 (int8x16_t __a, const int __lane1, -+ int8x8_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlsh_laneqv4si (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vqrdmlah_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_lane_s16 (int16x8_t __a, const int __lane1, -+ int16x4_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlah_lanev4hi (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vqrdmlah_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_lane_s32 (int32x4_t __a, const int __lane1, -+ int32x2_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlah_lanev2si (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vqrdmlahq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_lane_s64 (int64x2_t __a, const int __lane1, -+ int64x1_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlah_lanev8hi (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqrdmlahq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_lane_u8 (uint8x16_t __a, const int __lane1, -+ uint8x8_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlah_lanev4si (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqrdmlahh_s16 (int16_t __a, int16_t __b, int16_t __c) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_lane_u16 (uint16x8_t __a, const int __lane1, -+ uint16x4_t __b, const int __lane2) - { -- return (int16_t) __builtin_aarch64_sqrdmlahhi (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqrdmlahh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_lane_u32 (uint32x4_t __a, const int __lane1, -+ uint32x2_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlah_lanehi (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqrdmlahh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_lane_u64 (uint64x2_t __a, const int __lane1, -+ uint64x1_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlah_laneqhi (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqrdmlahs_s32 (int32_t __a, int32_t __b, int32_t __c) -+/* vcopyq_laneq. */ -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_laneq_f32 (float32x4_t __a, const int __lane1, -+ float32x4_t __b, const int __lane2) - { -- return (int32_t) __builtin_aarch64_sqrdmlahsi (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqrdmlahs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_laneq_f64 (float64x2_t __a, const int __lane1, -+ float64x2_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlah_lanesi (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqrdmlahs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_laneq_p8 (poly8x16_t __a, const int __lane1, -+ poly8x16_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlah_laneqsi (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vqrdmlsh_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_laneq_p16 (poly16x8_t __a, const int __lane1, -+ poly16x8_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlsh_lanev4hi (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vqrdmlsh_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_laneq_p64 (poly64x2_t __a, const int __lane1, -+ poly64x2_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlsh_lanev2si (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vqrdmlshq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_laneq_s8 (int8x16_t __a, const int __lane1, -+ int8x16_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlsh_lanev8hi (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqrdmlshq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_laneq_s16 (int16x8_t __a, const int __lane1, -+ int16x8_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlsh_lanev4si (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqrdmlshh_s16 (int16_t __a, int16_t __b, int16_t __c) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_laneq_s32 (int32x4_t __a, const int __lane1, -+ int32x4_t __b, const int __lane2) - { -- return (int16_t) __builtin_aarch64_sqrdmlshhi (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqrdmlshh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_laneq_s64 (int64x2_t __a, const int __lane1, -+ int64x2_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlsh_lanehi (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqrdmlshh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_laneq_u8 (uint8x16_t __a, const int __lane1, -+ uint8x16_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlsh_laneqhi (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqrdmlshs_s32 (int32_t __a, int32_t __b, int32_t __c) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_laneq_u16 (uint16x8_t __a, const int __lane1, -+ uint16x8_t __b, const int __lane2) - { -- return (int32_t) __builtin_aarch64_sqrdmlshsi (__a, __b, __c); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqrdmlshs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_laneq_u32 (uint32x4_t __a, const int __lane1, -+ uint32x4_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlsh_lanesi (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqrdmlshs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcopyq_laneq_u64 (uint64x2_t __a, const int __lane1, -+ uint64x2_t __b, const int __lane2) - { -- return __builtin_aarch64_sqrdmlsh_laneqsi (__a, __b, __c, __d); -+ return __aarch64_vset_lane_any (__aarch64_vget_lane_any (__b, __lane2), -+ __a, __lane1); - } --#pragma GCC pop_options - --#pragma GCC push_options --#pragma GCC target ("+nothing+crypto") --/* vaes */ -+/* vcvt (double -> float). */ - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vaeseq_u8 (uint8x16_t data, uint8x16_t key) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_f16_f32 (float32x4_t __a) - { -- return __builtin_aarch64_crypto_aesev16qi_uuu (data, key); -+ return __builtin_aarch64_float_truncate_lo_v4hf (__a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vaesdq_u8 (uint8x16_t data, uint8x16_t key) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_high_f16_f32 (float16x4_t __a, float32x4_t __b) - { -- return __builtin_aarch64_crypto_aesdv16qi_uuu (data, key); -+ return __builtin_aarch64_float_truncate_hi_v8hf (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vaesmcq_u8 (uint8x16_t data) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_f32_f64 (float64x2_t __a) - { -- return __builtin_aarch64_crypto_aesmcv16qi_uu (data); -+ return __builtin_aarch64_float_truncate_lo_v2sf (__a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vaesimcq_u8 (uint8x16_t data) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b) - { -- return __builtin_aarch64_crypto_aesimcv16qi_uu (data); -+ return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b); - } --#pragma GCC pop_options - --/* vcage */ -+/* vcvt (float -> double). */ - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcage_f64 (float64x1_t __a, float64x1_t __b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_f32_f16 (float16x4_t __a) - { -- return vabs_f64 (__a) >= vabs_f64 (__b); -+ return __builtin_aarch64_float_extend_lo_v4sf (__a); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vcages_f32 (float32_t __a, float32_t __b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_f64_f32 (float32x2_t __a) - { -- return __builtin_fabsf (__a) >= __builtin_fabsf (__b) ? -1 : 0; -+ -+ return __builtin_aarch64_float_extend_lo_v2df (__a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcage_f32 (float32x2_t __a, float32x2_t __b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_high_f32_f16 (float16x8_t __a) - { -- return vabs_f32 (__a) >= vabs_f32 (__b); -+ return __builtin_aarch64_vec_unpacks_hi_v8hf (__a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcageq_f32 (float32x4_t __a, float32x4_t __b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_high_f64_f32 (float32x4_t __a) - { -- return vabsq_f32 (__a) >= vabsq_f32 (__b); -+ return __builtin_aarch64_vec_unpacks_hi_v4sf (__a); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcaged_f64 (float64_t __a, float64_t __b) -+/* vcvt (<u>fixed-point -> float). */ -+ -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtd_n_f64_s64 (int64_t __a, const int __b) - { -- return __builtin_fabs (__a) >= __builtin_fabs (__b) ? -1 : 0; -+ return __builtin_aarch64_scvtfdi (__a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcageq_f64 (float64x2_t __a, float64x2_t __b) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtd_n_f64_u64 (uint64_t __a, const int __b) - { -- return vabsq_f64 (__a) >= vabsq_f64 (__b); -+ return __builtin_aarch64_ucvtfdi_sus (__a, __b); - } - --/* vcagt */ -- --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vcagts_f32 (float32_t __a, float32_t __b) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvts_n_f32_s32 (int32_t __a, const int __b) - { -- return __builtin_fabsf (__a) > __builtin_fabsf (__b) ? -1 : 0; -+ return __builtin_aarch64_scvtfsi (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcagt_f32 (float32x2_t __a, float32x2_t __b) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvts_n_f32_u32 (uint32_t __a, const int __b) - { -- return vabs_f32 (__a) > vabs_f32 (__b); -+ return __builtin_aarch64_ucvtfsi_sus (__a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcagt_f64 (float64x1_t __a, float64x1_t __b) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_n_f32_s32 (int32x2_t __a, const int __b) - { -- return vabs_f64 (__a) > vabs_f64 (__b); -+ return __builtin_aarch64_scvtfv2si (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcagtq_f32 (float32x4_t __a, float32x4_t __b) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_n_f32_u32 (uint32x2_t __a, const int __b) - { -- return vabsq_f32 (__a) > vabsq_f32 (__b); -+ return __builtin_aarch64_ucvtfv2si_sus (__a, __b); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcagtd_f64 (float64_t __a, float64_t __b) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_n_f64_s64 (int64x1_t __a, const int __b) - { -- return __builtin_fabs (__a) > __builtin_fabs (__b) ? -1 : 0; -+ return (float64x1_t) -+ { __builtin_aarch64_scvtfdi (vget_lane_s64 (__a, 0), __b) }; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcagtq_f64 (float64x2_t __a, float64x2_t __b) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_n_f64_u64 (uint64x1_t __a, const int __b) - { -- return vabsq_f64 (__a) > vabsq_f64 (__b); -+ return (float64x1_t) -+ { __builtin_aarch64_ucvtfdi_sus (vget_lane_u64 (__a, 0), __b) }; - } - --/* vcale */ -- --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcale_f32 (float32x2_t __a, float32x2_t __b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_n_f32_s32 (int32x4_t __a, const int __b) - { -- return vabs_f32 (__a) <= vabs_f32 (__b); -+ return __builtin_aarch64_scvtfv4si (__a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcale_f64 (float64x1_t __a, float64x1_t __b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_n_f32_u32 (uint32x4_t __a, const int __b) - { -- return vabs_f64 (__a) <= vabs_f64 (__b); -+ return __builtin_aarch64_ucvtfv4si_sus (__a, __b); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcaled_f64 (float64_t __a, float64_t __b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_n_f64_s64 (int64x2_t __a, const int __b) - { -- return __builtin_fabs (__a) <= __builtin_fabs (__b) ? -1 : 0; -+ return __builtin_aarch64_scvtfv2di (__a, __b); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vcales_f32 (float32_t __a, float32_t __b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_n_f64_u64 (uint64x2_t __a, const int __b) - { -- return __builtin_fabsf (__a) <= __builtin_fabsf (__b) ? -1 : 0; -+ return __builtin_aarch64_ucvtfv2di_sus (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcaleq_f32 (float32x4_t __a, float32x4_t __b) -+/* vcvt (float -> <u>fixed-point). */ -+ -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtd_n_s64_f64 (float64_t __a, const int __b) - { -- return vabsq_f32 (__a) <= vabsq_f32 (__b); -+ return __builtin_aarch64_fcvtzsdf (__a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcaleq_f64 (float64x2_t __a, float64x2_t __b) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtd_n_u64_f64 (float64_t __a, const int __b) - { -- return vabsq_f64 (__a) <= vabsq_f64 (__b); -+ return __builtin_aarch64_fcvtzudf_uss (__a, __b); - } - --/* vcalt */ -- --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcalt_f32 (float32x2_t __a, float32x2_t __b) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvts_n_s32_f32 (float32_t __a, const int __b) - { -- return vabs_f32 (__a) < vabs_f32 (__b); -+ return __builtin_aarch64_fcvtzssf (__a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcalt_f64 (float64x1_t __a, float64x1_t __b) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvts_n_u32_f32 (float32_t __a, const int __b) - { -- return vabs_f64 (__a) < vabs_f64 (__b); -+ return __builtin_aarch64_fcvtzusf_uss (__a, __b); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcaltd_f64 (float64_t __a, float64_t __b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_n_s32_f32 (float32x2_t __a, const int __b) - { -- return __builtin_fabs (__a) < __builtin_fabs (__b) ? -1 : 0; -+ return __builtin_aarch64_fcvtzsv2sf (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcaltq_f32 (float32x4_t __a, float32x4_t __b) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_n_u32_f32 (float32x2_t __a, const int __b) - { -- return vabsq_f32 (__a) < vabsq_f32 (__b); -+ return __builtin_aarch64_fcvtzuv2sf_uss (__a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcaltq_f64 (float64x2_t __a, float64x2_t __b) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_n_s64_f64 (float64x1_t __a, const int __b) - { -- return vabsq_f64 (__a) < vabsq_f64 (__b); -+ return (int64x1_t) -+ { __builtin_aarch64_fcvtzsdf (vget_lane_f64 (__a, 0), __b) }; - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vcalts_f32 (float32_t __a, float32_t __b) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_n_u64_f64 (float64x1_t __a, const int __b) - { -- return __builtin_fabsf (__a) < __builtin_fabsf (__b) ? -1 : 0; -+ return (uint64x1_t) -+ { __builtin_aarch64_fcvtzudf_uss (vget_lane_f64 (__a, 0), __b) }; - } - --/* vceq - vector. */ -- --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vceq_f32 (float32x2_t __a, float32x2_t __b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_n_s32_f32 (float32x4_t __a, const int __b) - { -- return (uint32x2_t) (__a == __b); -+ return __builtin_aarch64_fcvtzsv4sf (__a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vceq_f64 (float64x1_t __a, float64x1_t __b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_n_u32_f32 (float32x4_t __a, const int __b) - { -- return (uint64x1_t) (__a == __b); -+ return __builtin_aarch64_fcvtzuv4sf_uss (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vceq_p8 (poly8x8_t __a, poly8x8_t __b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_n_s64_f64 (float64x2_t __a, const int __b) - { -- return (uint8x8_t) (__a == __b); -+ return __builtin_aarch64_fcvtzsv2df (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vceq_s8 (int8x8_t __a, int8x8_t __b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_n_u64_f64 (float64x2_t __a, const int __b) - { -- return (uint8x8_t) (__a == __b); -+ return __builtin_aarch64_fcvtzuv2df_uss (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vceq_s16 (int16x4_t __a, int16x4_t __b) -+/* vcvt (<u>int -> float) */ -+ -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtd_f64_s64 (int64_t __a) - { -- return (uint16x4_t) (__a == __b); -+ return (float64_t) __a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vceq_s32 (int32x2_t __a, int32x2_t __b) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtd_f64_u64 (uint64_t __a) - { -- return (uint32x2_t) (__a == __b); -+ return (float64_t) __a; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vceq_s64 (int64x1_t __a, int64x1_t __b) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvts_f32_s32 (int32_t __a) - { -- return (uint64x1_t) (__a == __b); -+ return (float32_t) __a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vceq_u8 (uint8x8_t __a, uint8x8_t __b) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvts_f32_u32 (uint32_t __a) - { -- return (__a == __b); -+ return (float32_t) __a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vceq_u16 (uint16x4_t __a, uint16x4_t __b) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_f32_s32 (int32x2_t __a) - { -- return (__a == __b); -+ return __builtin_aarch64_floatv2siv2sf (__a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vceq_u32 (uint32x2_t __a, uint32x2_t __b) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_f32_u32 (uint32x2_t __a) - { -- return (__a == __b); -+ return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vceq_u64 (uint64x1_t __a, uint64x1_t __b) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_f64_s64 (int64x1_t __a) - { -- return (__a == __b); -+ return (float64x1_t) { vget_lane_s64 (__a, 0) }; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vceqq_f32 (float32x4_t __a, float32x4_t __b) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_f64_u64 (uint64x1_t __a) - { -- return (uint32x4_t) (__a == __b); -+ return (float64x1_t) { vget_lane_u64 (__a, 0) }; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vceqq_f64 (float64x2_t __a, float64x2_t __b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_f32_s32 (int32x4_t __a) - { -- return (uint64x2_t) (__a == __b); -+ return __builtin_aarch64_floatv4siv4sf (__a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vceqq_p8 (poly8x16_t __a, poly8x16_t __b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_f32_u32 (uint32x4_t __a) - { -- return (uint8x16_t) (__a == __b); -+ return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vceqq_s8 (int8x16_t __a, int8x16_t __b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_f64_s64 (int64x2_t __a) - { -- return (uint8x16_t) (__a == __b); -+ return __builtin_aarch64_floatv2div2df (__a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vceqq_s16 (int16x8_t __a, int16x8_t __b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_f64_u64 (uint64x2_t __a) - { -- return (uint16x8_t) (__a == __b); -+ return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vceqq_s32 (int32x4_t __a, int32x4_t __b) -+/* vcvt (float -> <u>int) */ -+ -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtd_s64_f64 (float64_t __a) - { -- return (uint32x4_t) (__a == __b); -+ return (int64_t) __a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vceqq_s64 (int64x2_t __a, int64x2_t __b) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtd_u64_f64 (float64_t __a) - { -- return (uint64x2_t) (__a == __b); -+ return (uint64_t) __a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vceqq_u8 (uint8x16_t __a, uint8x16_t __b) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvts_s32_f32 (float32_t __a) - { -- return (__a == __b); -+ return (int32_t) __a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vceqq_u16 (uint16x8_t __a, uint16x8_t __b) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvts_u32_f32 (float32_t __a) - { -- return (__a == __b); -+ return (uint32_t) __a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vceqq_u32 (uint32x4_t __a, uint32x4_t __b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_s32_f32 (float32x2_t __a) - { -- return (__a == __b); -+ return __builtin_aarch64_lbtruncv2sfv2si (__a); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vceqq_u64 (uint64x2_t __a, uint64x2_t __b) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_u32_f32 (float32x2_t __a) - { -- return (__a == __b); -+ return __builtin_aarch64_lbtruncuv2sfv2si_us (__a); - } - --/* vceq - scalar. */ -- --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vceqs_f32 (float32_t __a, float32_t __b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_s32_f32 (float32x4_t __a) - { -- return __a == __b ? -1 : 0; -+ return __builtin_aarch64_lbtruncv4sfv4si (__a); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vceqd_s64 (int64_t __a, int64_t __b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_u32_f32 (float32x4_t __a) - { -- return __a == __b ? -1ll : 0ll; -+ return __builtin_aarch64_lbtruncuv4sfv4si_us (__a); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vceqd_u64 (uint64_t __a, uint64_t __b) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_s64_f64 (float64x1_t __a) - { -- return __a == __b ? -1ll : 0ll; -+ return (int64x1_t) {vcvtd_s64_f64 (__a[0])}; - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vceqd_f64 (float64_t __a, float64_t __b) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_u64_f64 (float64x1_t __a) - { -- return __a == __b ? -1ll : 0ll; -+ return (uint64x1_t) {vcvtd_u64_f64 (__a[0])}; - } - --/* vceqz - vector. */ -- --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vceqz_f32 (float32x2_t __a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_s64_f64 (float64x2_t __a) - { -- return (uint32x2_t) (__a == 0.0f); -+ return __builtin_aarch64_lbtruncv2dfv2di (__a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vceqz_f64 (float64x1_t __a) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_u64_f64 (float64x2_t __a) - { -- return (uint64x1_t) (__a == (float64x1_t) {0.0}); -+ return __builtin_aarch64_lbtruncuv2dfv2di_us (__a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vceqz_p8 (poly8x8_t __a) -+/* vcvta */ -+ -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtad_s64_f64 (float64_t __a) - { -- return (uint8x8_t) (__a == 0); -+ return __builtin_aarch64_lrounddfdi (__a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vceqz_s8 (int8x8_t __a) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtad_u64_f64 (float64_t __a) - { -- return (uint8x8_t) (__a == 0); -+ return __builtin_aarch64_lroundudfdi_us (__a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vceqz_s16 (int16x4_t __a) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtas_s32_f32 (float32_t __a) - { -- return (uint16x4_t) (__a == 0); -+ return __builtin_aarch64_lroundsfsi (__a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vceqz_s32 (int32x2_t __a) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtas_u32_f32 (float32_t __a) - { -- return (uint32x2_t) (__a == 0); -+ return __builtin_aarch64_lroundusfsi_us (__a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vceqz_s64 (int64x1_t __a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvta_s32_f32 (float32x2_t __a) - { -- return (uint64x1_t) (__a == __AARCH64_INT64_C (0)); -+ return __builtin_aarch64_lroundv2sfv2si (__a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vceqz_u8 (uint8x8_t __a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvta_u32_f32 (float32x2_t __a) - { -- return (__a == 0); -+ return __builtin_aarch64_lrounduv2sfv2si_us (__a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vceqz_u16 (uint16x4_t __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtaq_s32_f32 (float32x4_t __a) - { -- return (__a == 0); -+ return __builtin_aarch64_lroundv4sfv4si (__a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vceqz_u32 (uint32x2_t __a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtaq_u32_f32 (float32x4_t __a) - { -- return (__a == 0); -+ return __builtin_aarch64_lrounduv4sfv4si_us (__a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vceqz_u64 (uint64x1_t __a) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvta_s64_f64 (float64x1_t __a) - { -- return (__a == __AARCH64_UINT64_C (0)); -+ return (int64x1_t) {vcvtad_s64_f64 (__a[0])}; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vceqzq_f32 (float32x4_t __a) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvta_u64_f64 (float64x1_t __a) - { -- return (uint32x4_t) (__a == 0.0f); -+ return (uint64x1_t) {vcvtad_u64_f64 (__a[0])}; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vceqzq_f64 (float64x2_t __a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtaq_s64_f64 (float64x2_t __a) - { -- return (uint64x2_t) (__a == 0.0f); -+ return __builtin_aarch64_lroundv2dfv2di (__a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vceqzq_p8 (poly8x16_t __a) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtaq_u64_f64 (float64x2_t __a) - { -- return (uint8x16_t) (__a == 0); -+ return __builtin_aarch64_lrounduv2dfv2di_us (__a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vceqzq_s8 (int8x16_t __a) -+/* vcvtm */ -+ -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtmd_s64_f64 (float64_t __a) - { -- return (uint8x16_t) (__a == 0); -+ return __builtin_llfloor (__a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vceqzq_s16 (int16x8_t __a) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtmd_u64_f64 (float64_t __a) - { -- return (uint16x8_t) (__a == 0); -+ return __builtin_aarch64_lfloorudfdi_us (__a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vceqzq_s32 (int32x4_t __a) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtms_s32_f32 (float32_t __a) - { -- return (uint32x4_t) (__a == 0); -+ return __builtin_ifloorf (__a); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vceqzq_s64 (int64x2_t __a) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtms_u32_f32 (float32_t __a) - { -- return (uint64x2_t) (__a == __AARCH64_INT64_C (0)); -+ return __builtin_aarch64_lfloorusfsi_us (__a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vceqzq_u8 (uint8x16_t __a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtm_s32_f32 (float32x2_t __a) - { -- return (__a == 0); -+ return __builtin_aarch64_lfloorv2sfv2si (__a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vceqzq_u16 (uint16x8_t __a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtm_u32_f32 (float32x2_t __a) - { -- return (__a == 0); -+ return __builtin_aarch64_lflooruv2sfv2si_us (__a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vceqzq_u32 (uint32x4_t __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtmq_s32_f32 (float32x4_t __a) - { -- return (__a == 0); -+ return __builtin_aarch64_lfloorv4sfv4si (__a); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vceqzq_u64 (uint64x2_t __a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtmq_u32_f32 (float32x4_t __a) - { -- return (__a == __AARCH64_UINT64_C (0)); -+ return __builtin_aarch64_lflooruv4sfv4si_us (__a); - } - --/* vceqz - scalar. */ -- --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vceqzs_f32 (float32_t __a) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtm_s64_f64 (float64x1_t __a) - { -- return __a == 0.0f ? -1 : 0; -+ return (int64x1_t) {vcvtmd_s64_f64 (__a[0])}; - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vceqzd_s64 (int64_t __a) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtm_u64_f64 (float64x1_t __a) - { -- return __a == 0 ? -1ll : 0ll; -+ return (uint64x1_t) {vcvtmd_u64_f64 (__a[0])}; - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vceqzd_u64 (uint64_t __a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtmq_s64_f64 (float64x2_t __a) - { -- return __a == 0 ? -1ll : 0ll; -+ return __builtin_aarch64_lfloorv2dfv2di (__a); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vceqzd_f64 (float64_t __a) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtmq_u64_f64 (float64x2_t __a) - { -- return __a == 0.0 ? -1ll : 0ll; -+ return __builtin_aarch64_lflooruv2dfv2di_us (__a); - } - --/* vcge - vector. */ -+/* vcvtn */ - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcge_f32 (float32x2_t __a, float32x2_t __b) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtnd_s64_f64 (float64_t __a) - { -- return (uint32x2_t) (__a >= __b); -+ return __builtin_aarch64_lfrintndfdi (__a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcge_f64 (float64x1_t __a, float64x1_t __b) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtnd_u64_f64 (float64_t __a) - { -- return (uint64x1_t) (__a >= __b); -+ return __builtin_aarch64_lfrintnudfdi_us (__a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vcge_s8 (int8x8_t __a, int8x8_t __b) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtns_s32_f32 (float32_t __a) - { -- return (uint8x8_t) (__a >= __b); -+ return __builtin_aarch64_lfrintnsfsi (__a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vcge_s16 (int16x4_t __a, int16x4_t __b) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtns_u32_f32 (float32_t __a) - { -- return (uint16x4_t) (__a >= __b); -+ return __builtin_aarch64_lfrintnusfsi_us (__a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcge_s32 (int32x2_t __a, int32x2_t __b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtn_s32_f32 (float32x2_t __a) - { -- return (uint32x2_t) (__a >= __b); -+ return __builtin_aarch64_lfrintnv2sfv2si (__a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcge_s64 (int64x1_t __a, int64x1_t __b) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtn_u32_f32 (float32x2_t __a) - { -- return (uint64x1_t) (__a >= __b); -+ return __builtin_aarch64_lfrintnuv2sfv2si_us (__a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vcge_u8 (uint8x8_t __a, uint8x8_t __b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtnq_s32_f32 (float32x4_t __a) - { -- return (__a >= __b); -+ return __builtin_aarch64_lfrintnv4sfv4si (__a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vcge_u16 (uint16x4_t __a, uint16x4_t __b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtnq_u32_f32 (float32x4_t __a) - { -- return (__a >= __b); -+ return __builtin_aarch64_lfrintnuv4sfv4si_us (__a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcge_u32 (uint32x2_t __a, uint32x2_t __b) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtn_s64_f64 (float64x1_t __a) - { -- return (__a >= __b); -+ return (int64x1_t) {vcvtnd_s64_f64 (__a[0])}; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcge_u64 (uint64x1_t __a, uint64x1_t __b) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtn_u64_f64 (float64x1_t __a) - { -- return (__a >= __b); -+ return (uint64x1_t) {vcvtnd_u64_f64 (__a[0])}; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcgeq_f32 (float32x4_t __a, float32x4_t __b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtnq_s64_f64 (float64x2_t __a) - { -- return (uint32x4_t) (__a >= __b); -+ return __builtin_aarch64_lfrintnv2dfv2di (__a); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcgeq_f64 (float64x2_t __a, float64x2_t __b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtnq_u64_f64 (float64x2_t __a) - { -- return (uint64x2_t) (__a >= __b); -+ return __builtin_aarch64_lfrintnuv2dfv2di_us (__a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vcgeq_s8 (int8x16_t __a, int8x16_t __b) -+/* vcvtp */ -+ -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtpd_s64_f64 (float64_t __a) - { -- return (uint8x16_t) (__a >= __b); -+ return __builtin_llceil (__a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vcgeq_s16 (int16x8_t __a, int16x8_t __b) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtpd_u64_f64 (float64_t __a) - { -- return (uint16x8_t) (__a >= __b); -+ return __builtin_aarch64_lceiludfdi_us (__a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcgeq_s32 (int32x4_t __a, int32x4_t __b) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtps_s32_f32 (float32_t __a) - { -- return (uint32x4_t) (__a >= __b); -+ return __builtin_iceilf (__a); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcgeq_s64 (int64x2_t __a, int64x2_t __b) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtps_u32_f32 (float32_t __a) - { -- return (uint64x2_t) (__a >= __b); -+ return __builtin_aarch64_lceilusfsi_us (__a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vcgeq_u8 (uint8x16_t __a, uint8x16_t __b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtp_s32_f32 (float32x2_t __a) - { -- return (__a >= __b); -+ return __builtin_aarch64_lceilv2sfv2si (__a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vcgeq_u16 (uint16x8_t __a, uint16x8_t __b) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtp_u32_f32 (float32x2_t __a) - { -- return (__a >= __b); -+ return __builtin_aarch64_lceiluv2sfv2si_us (__a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcgeq_u32 (uint32x4_t __a, uint32x4_t __b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtpq_s32_f32 (float32x4_t __a) - { -- return (__a >= __b); -+ return __builtin_aarch64_lceilv4sfv4si (__a); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcgeq_u64 (uint64x2_t __a, uint64x2_t __b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtpq_u32_f32 (float32x4_t __a) - { -- return (__a >= __b); -+ return __builtin_aarch64_lceiluv4sfv4si_us (__a); - } - --/* vcge - scalar. */ -- --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vcges_f32 (float32_t __a, float32_t __b) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtp_s64_f64 (float64x1_t __a) - { -- return __a >= __b ? -1 : 0; -+ return (int64x1_t) {vcvtpd_s64_f64 (__a[0])}; - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcged_s64 (int64_t __a, int64_t __b) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtp_u64_f64 (float64x1_t __a) - { -- return __a >= __b ? -1ll : 0ll; -+ return (uint64x1_t) {vcvtpd_u64_f64 (__a[0])}; - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcged_u64 (uint64_t __a, uint64_t __b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtpq_s64_f64 (float64x2_t __a) - { -- return __a >= __b ? -1ll : 0ll; -+ return __builtin_aarch64_lceilv2dfv2di (__a); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcged_f64 (float64_t __a, float64_t __b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtpq_u64_f64 (float64x2_t __a) - { -- return __a >= __b ? -1ll : 0ll; -+ return __builtin_aarch64_lceiluv2dfv2di_us (__a); - } - --/* vcgez - vector. */ -+/* vdup_n */ - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcgez_f32 (float32x2_t __a) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_n_f16 (float16_t __a) - { -- return (uint32x2_t) (__a >= 0.0f); -+ return (float16x4_t) {__a, __a, __a, __a}; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcgez_f64 (float64x1_t __a) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_n_f32 (float32_t __a) - { -- return (uint64x1_t) (__a[0] >= (float64x1_t) {0.0}); -+ return (float32x2_t) {__a, __a}; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vcgez_s8 (int8x8_t __a) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_n_f64 (float64_t __a) - { -- return (uint8x8_t) (__a >= 0); -+ return (float64x1_t) {__a}; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vcgez_s16 (int16x4_t __a) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_n_p8 (poly8_t __a) - { -- return (uint16x4_t) (__a >= 0); -+ return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcgez_s32 (int32x2_t __a) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_n_p16 (poly16_t __a) - { -- return (uint32x2_t) (__a >= 0); -+ return (poly16x4_t) {__a, __a, __a, __a}; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcgez_s64 (int64x1_t __a) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_n_p64 (poly64_t __a) - { -- return (uint64x1_t) (__a >= __AARCH64_INT64_C (0)); -+ return (poly64x1_t) {__a}; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcgezq_f32 (float32x4_t __a) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_n_s8 (int8_t __a) - { -- return (uint32x4_t) (__a >= 0.0f); -+ return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcgezq_f64 (float64x2_t __a) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_n_s16 (int16_t __a) - { -- return (uint64x2_t) (__a >= 0.0); -+ return (int16x4_t) {__a, __a, __a, __a}; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vcgezq_s8 (int8x16_t __a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_n_s32 (int32_t __a) - { -- return (uint8x16_t) (__a >= 0); -+ return (int32x2_t) {__a, __a}; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vcgezq_s16 (int16x8_t __a) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_n_s64 (int64_t __a) - { -- return (uint16x8_t) (__a >= 0); -+ return (int64x1_t) {__a}; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcgezq_s32 (int32x4_t __a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_n_u8 (uint8_t __a) - { -- return (uint32x4_t) (__a >= 0); -+ return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcgezq_s64 (int64x2_t __a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_n_u16 (uint16_t __a) - { -- return (uint64x2_t) (__a >= __AARCH64_INT64_C (0)); -+ return (uint16x4_t) {__a, __a, __a, __a}; - } - --/* vcgez - scalar. */ -- --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vcgezs_f32 (float32_t __a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_n_u32 (uint32_t __a) - { -- return __a >= 0.0f ? -1 : 0; -+ return (uint32x2_t) {__a, __a}; - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcgezd_s64 (int64_t __a) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_n_u64 (uint64_t __a) - { -- return __a >= 0 ? -1ll : 0ll; -+ return (uint64x1_t) {__a}; - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcgezd_f64 (float64_t __a) -+/* vdupq_n */ -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_n_f16 (float16_t __a) - { -- return __a >= 0.0 ? -1ll : 0ll; -+ return (float16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; - } - --/* vcgt - vector. */ -- --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcgt_f32 (float32x2_t __a, float32x2_t __b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_n_f32 (float32_t __a) - { -- return (uint32x2_t) (__a > __b); -+ return (float32x4_t) {__a, __a, __a, __a}; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcgt_f64 (float64x1_t __a, float64x1_t __b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_n_f64 (float64_t __a) - { -- return (uint64x1_t) (__a > __b); -+ return (float64x2_t) {__a, __a}; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vcgt_s8 (int8x8_t __a, int8x8_t __b) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_n_p8 (uint32_t __a) - { -- return (uint8x8_t) (__a > __b); -+ return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a, -+ __a, __a, __a, __a, __a, __a, __a, __a}; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vcgt_s16 (int16x4_t __a, int16x4_t __b) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_n_p16 (uint32_t __a) - { -- return (uint16x4_t) (__a > __b); -+ return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcgt_s32 (int32x2_t __a, int32x2_t __b) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_n_p64 (uint64_t __a) - { -- return (uint32x2_t) (__a > __b); -+ return (poly64x2_t) {__a, __a}; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcgt_s64 (int64x1_t __a, int64x1_t __b) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_n_s8 (int32_t __a) - { -- return (uint64x1_t) (__a > __b); -+ return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a, -+ __a, __a, __a, __a, __a, __a, __a, __a}; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vcgt_u8 (uint8x8_t __a, uint8x8_t __b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_n_s16 (int32_t __a) - { -- return (__a > __b); -+ return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vcgt_u16 (uint16x4_t __a, uint16x4_t __b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_n_s32 (int32_t __a) - { -- return (__a > __b); -+ return (int32x4_t) {__a, __a, __a, __a}; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcgt_u32 (uint32x2_t __a, uint32x2_t __b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_n_s64 (int64_t __a) - { -- return (__a > __b); -+ return (int64x2_t) {__a, __a}; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcgt_u64 (uint64x1_t __a, uint64x1_t __b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_n_u8 (uint32_t __a) - { -- return (__a > __b); -+ return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a, -+ __a, __a, __a, __a, __a, __a, __a, __a}; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcgtq_f32 (float32x4_t __a, float32x4_t __b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_n_u16 (uint32_t __a) - { -- return (uint32x4_t) (__a > __b); -+ return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcgtq_f64 (float64x2_t __a, float64x2_t __b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_n_u32 (uint32_t __a) - { -- return (uint64x2_t) (__a > __b); -+ return (uint32x4_t) {__a, __a, __a, __a}; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vcgtq_s8 (int8x16_t __a, int8x16_t __b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_n_u64 (uint64_t __a) - { -- return (uint8x16_t) (__a > __b); -+ return (uint64x2_t) {__a, __a}; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vcgtq_s16 (int16x8_t __a, int16x8_t __b) -+/* vdup_lane */ -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_lane_f16 (float16x4_t __a, const int __b) - { -- return (uint16x8_t) (__a > __b); -+ return __aarch64_vdup_lane_f16 (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcgtq_s32 (int32x4_t __a, int32x4_t __b) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_lane_f32 (float32x2_t __a, const int __b) - { -- return (uint32x4_t) (__a > __b); -+ return __aarch64_vdup_lane_f32 (__a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcgtq_s64 (int64x2_t __a, int64x2_t __b) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_lane_f64 (float64x1_t __a, const int __b) - { -- return (uint64x2_t) (__a > __b); -+ return __aarch64_vdup_lane_f64 (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vcgtq_u8 (uint8x16_t __a, uint8x16_t __b) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_lane_p8 (poly8x8_t __a, const int __b) - { -- return (__a > __b); -+ return __aarch64_vdup_lane_p8 (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vcgtq_u16 (uint16x8_t __a, uint16x8_t __b) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_lane_p16 (poly16x4_t __a, const int __b) - { -- return (__a > __b); -+ return __aarch64_vdup_lane_p16 (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcgtq_u32 (uint32x4_t __a, uint32x4_t __b) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_lane_p64 (poly64x1_t __a, const int __b) - { -- return (__a > __b); -+ return __aarch64_vdup_lane_p64 (__a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcgtq_u64 (uint64x2_t __a, uint64x2_t __b) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_lane_s8 (int8x8_t __a, const int __b) - { -- return (__a > __b); -+ return __aarch64_vdup_lane_s8 (__a, __b); - } - --/* vcgt - scalar. */ -- --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vcgts_f32 (float32_t __a, float32_t __b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_lane_s16 (int16x4_t __a, const int __b) - { -- return __a > __b ? -1 : 0; -+ return __aarch64_vdup_lane_s16 (__a, __b); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcgtd_s64 (int64_t __a, int64_t __b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_lane_s32 (int32x2_t __a, const int __b) - { -- return __a > __b ? -1ll : 0ll; -+ return __aarch64_vdup_lane_s32 (__a, __b); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcgtd_u64 (uint64_t __a, uint64_t __b) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_lane_s64 (int64x1_t __a, const int __b) - { -- return __a > __b ? -1ll : 0ll; -+ return __aarch64_vdup_lane_s64 (__a, __b); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcgtd_f64 (float64_t __a, float64_t __b) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_lane_u8 (uint8x8_t __a, const int __b) - { -- return __a > __b ? -1ll : 0ll; -+ return __aarch64_vdup_lane_u8 (__a, __b); - } - --/* vcgtz - vector. */ -- --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcgtz_f32 (float32x2_t __a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_lane_u16 (uint16x4_t __a, const int __b) - { -- return (uint32x2_t) (__a > 0.0f); -+ return __aarch64_vdup_lane_u16 (__a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcgtz_f64 (float64x1_t __a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_lane_u32 (uint32x2_t __a, const int __b) - { -- return (uint64x1_t) (__a > (float64x1_t) {0.0}); -+ return __aarch64_vdup_lane_u32 (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vcgtz_s8 (int8x8_t __a) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_lane_u64 (uint64x1_t __a, const int __b) - { -- return (uint8x8_t) (__a > 0); -+ return __aarch64_vdup_lane_u64 (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vcgtz_s16 (int16x4_t __a) -+/* vdup_laneq */ -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_laneq_f16 (float16x8_t __a, const int __b) - { -- return (uint16x4_t) (__a > 0); -+ return __aarch64_vdup_laneq_f16 (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcgtz_s32 (int32x2_t __a) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_laneq_f32 (float32x4_t __a, const int __b) - { -- return (uint32x2_t) (__a > 0); -+ return __aarch64_vdup_laneq_f32 (__a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcgtz_s64 (int64x1_t __a) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_laneq_f64 (float64x2_t __a, const int __b) - { -- return (uint64x1_t) (__a > __AARCH64_INT64_C (0)); -+ return __aarch64_vdup_laneq_f64 (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcgtzq_f32 (float32x4_t __a) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_laneq_p8 (poly8x16_t __a, const int __b) - { -- return (uint32x4_t) (__a > 0.0f); -+ return __aarch64_vdup_laneq_p8 (__a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcgtzq_f64 (float64x2_t __a) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_laneq_p16 (poly16x8_t __a, const int __b) - { -- return (uint64x2_t) (__a > 0.0); -+ return __aarch64_vdup_laneq_p16 (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vcgtzq_s8 (int8x16_t __a) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_laneq_p64 (poly64x2_t __a, const int __b) - { -- return (uint8x16_t) (__a > 0); -+ return __aarch64_vdup_laneq_p64 (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vcgtzq_s16 (int16x8_t __a) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_laneq_s8 (int8x16_t __a, const int __b) - { -- return (uint16x8_t) (__a > 0); -+ return __aarch64_vdup_laneq_s8 (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcgtzq_s32 (int32x4_t __a) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_laneq_s16 (int16x8_t __a, const int __b) - { -- return (uint32x4_t) (__a > 0); -+ return __aarch64_vdup_laneq_s16 (__a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcgtzq_s64 (int64x2_t __a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_laneq_s32 (int32x4_t __a, const int __b) - { -- return (uint64x2_t) (__a > __AARCH64_INT64_C (0)); -+ return __aarch64_vdup_laneq_s32 (__a, __b); - } - --/* vcgtz - scalar. */ -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_laneq_s64 (int64x2_t __a, const int __b) -+{ -+ return __aarch64_vdup_laneq_s64 (__a, __b); -+} - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vcgtzs_f32 (float32_t __a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_laneq_u8 (uint8x16_t __a, const int __b) - { -- return __a > 0.0f ? -1 : 0; -+ return __aarch64_vdup_laneq_u8 (__a, __b); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcgtzd_s64 (int64_t __a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_laneq_u16 (uint16x8_t __a, const int __b) - { -- return __a > 0 ? -1ll : 0ll; -+ return __aarch64_vdup_laneq_u16 (__a, __b); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcgtzd_f64 (float64_t __a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_laneq_u32 (uint32x4_t __a, const int __b) - { -- return __a > 0.0 ? -1ll : 0ll; -+ return __aarch64_vdup_laneq_u32 (__a, __b); - } - --/* vcle - vector. */ -- --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcle_f32 (float32x2_t __a, float32x2_t __b) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_laneq_u64 (uint64x2_t __a, const int __b) - { -- return (uint32x2_t) (__a <= __b); -+ return __aarch64_vdup_laneq_u64 (__a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcle_f64 (float64x1_t __a, float64x1_t __b) -+/* vdupq_lane */ -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_lane_f16 (float16x4_t __a, const int __b) - { -- return (uint64x1_t) (__a <= __b); -+ return __aarch64_vdupq_lane_f16 (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vcle_s8 (int8x8_t __a, int8x8_t __b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_lane_f32 (float32x2_t __a, const int __b) - { -- return (uint8x8_t) (__a <= __b); -+ return __aarch64_vdupq_lane_f32 (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vcle_s16 (int16x4_t __a, int16x4_t __b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_lane_f64 (float64x1_t __a, const int __b) - { -- return (uint16x4_t) (__a <= __b); -+ return __aarch64_vdupq_lane_f64 (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcle_s32 (int32x2_t __a, int32x2_t __b) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_lane_p8 (poly8x8_t __a, const int __b) - { -- return (uint32x2_t) (__a <= __b); -+ return __aarch64_vdupq_lane_p8 (__a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcle_s64 (int64x1_t __a, int64x1_t __b) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_lane_p16 (poly16x4_t __a, const int __b) - { -- return (uint64x1_t) (__a <= __b); -+ return __aarch64_vdupq_lane_p16 (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vcle_u8 (uint8x8_t __a, uint8x8_t __b) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_lane_p64 (poly64x1_t __a, const int __b) - { -- return (__a <= __b); -+ return __aarch64_vdupq_lane_p64 (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vcle_u16 (uint16x4_t __a, uint16x4_t __b) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_lane_s8 (int8x8_t __a, const int __b) - { -- return (__a <= __b); -+ return __aarch64_vdupq_lane_s8 (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcle_u32 (uint32x2_t __a, uint32x2_t __b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_lane_s16 (int16x4_t __a, const int __b) - { -- return (__a <= __b); -+ return __aarch64_vdupq_lane_s16 (__a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcle_u64 (uint64x1_t __a, uint64x1_t __b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_lane_s32 (int32x2_t __a, const int __b) - { -- return (__a <= __b); -+ return __aarch64_vdupq_lane_s32 (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcleq_f32 (float32x4_t __a, float32x4_t __b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_lane_s64 (int64x1_t __a, const int __b) - { -- return (uint32x4_t) (__a <= __b); -+ return __aarch64_vdupq_lane_s64 (__a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcleq_f64 (float64x2_t __a, float64x2_t __b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_lane_u8 (uint8x8_t __a, const int __b) - { -- return (uint64x2_t) (__a <= __b); -+ return __aarch64_vdupq_lane_u8 (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vcleq_s8 (int8x16_t __a, int8x16_t __b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_lane_u16 (uint16x4_t __a, const int __b) - { -- return (uint8x16_t) (__a <= __b); -+ return __aarch64_vdupq_lane_u16 (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vcleq_s16 (int16x8_t __a, int16x8_t __b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_lane_u32 (uint32x2_t __a, const int __b) - { -- return (uint16x8_t) (__a <= __b); -+ return __aarch64_vdupq_lane_u32 (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcleq_s32 (int32x4_t __a, int32x4_t __b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_lane_u64 (uint64x1_t __a, const int __b) - { -- return (uint32x4_t) (__a <= __b); -+ return __aarch64_vdupq_lane_u64 (__a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcleq_s64 (int64x2_t __a, int64x2_t __b) -+/* vdupq_laneq */ -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_laneq_f16 (float16x8_t __a, const int __b) - { -- return (uint64x2_t) (__a <= __b); -+ return __aarch64_vdupq_laneq_f16 (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vcleq_u8 (uint8x16_t __a, uint8x16_t __b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_laneq_f32 (float32x4_t __a, const int __b) - { -- return (__a <= __b); -+ return __aarch64_vdupq_laneq_f32 (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vcleq_u16 (uint16x8_t __a, uint16x8_t __b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_laneq_f64 (float64x2_t __a, const int __b) - { -- return (__a <= __b); -+ return __aarch64_vdupq_laneq_f64 (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcleq_u32 (uint32x4_t __a, uint32x4_t __b) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_laneq_p8 (poly8x16_t __a, const int __b) - { -- return (__a <= __b); -+ return __aarch64_vdupq_laneq_p8 (__a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcleq_u64 (uint64x2_t __a, uint64x2_t __b) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_laneq_p16 (poly16x8_t __a, const int __b) - { -- return (__a <= __b); -+ return __aarch64_vdupq_laneq_p16 (__a, __b); - } - --/* vcle - scalar. */ -- --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vcles_f32 (float32_t __a, float32_t __b) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_laneq_p64 (poly64x2_t __a, const int __b) - { -- return __a <= __b ? -1 : 0; -+ return __aarch64_vdupq_laneq_p64 (__a, __b); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcled_s64 (int64_t __a, int64_t __b) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_laneq_s8 (int8x16_t __a, const int __b) - { -- return __a <= __b ? -1ll : 0ll; -+ return __aarch64_vdupq_laneq_s8 (__a, __b); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcled_u64 (uint64_t __a, uint64_t __b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_laneq_s16 (int16x8_t __a, const int __b) - { -- return __a <= __b ? -1ll : 0ll; -+ return __aarch64_vdupq_laneq_s16 (__a, __b); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcled_f64 (float64_t __a, float64_t __b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_laneq_s32 (int32x4_t __a, const int __b) - { -- return __a <= __b ? -1ll : 0ll; -+ return __aarch64_vdupq_laneq_s32 (__a, __b); - } - --/* vclez - vector. */ -- --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vclez_f32 (float32x2_t __a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_laneq_s64 (int64x2_t __a, const int __b) - { -- return (uint32x2_t) (__a <= 0.0f); -+ return __aarch64_vdupq_laneq_s64 (__a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vclez_f64 (float64x1_t __a) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_laneq_u8 (uint8x16_t __a, const int __b) - { -- return (uint64x1_t) (__a <= (float64x1_t) {0.0}); -+ return __aarch64_vdupq_laneq_u8 (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vclez_s8 (int8x8_t __a) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_laneq_u16 (uint16x8_t __a, const int __b) - { -- return (uint8x8_t) (__a <= 0); -+ return __aarch64_vdupq_laneq_u16 (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vclez_s16 (int16x4_t __a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_laneq_u32 (uint32x4_t __a, const int __b) - { -- return (uint16x4_t) (__a <= 0); -+ return __aarch64_vdupq_laneq_u32 (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vclez_s32 (int32x2_t __a) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_laneq_u64 (uint64x2_t __a, const int __b) - { -- return (uint32x2_t) (__a <= 0); -+ return __aarch64_vdupq_laneq_u64 (__a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vclez_s64 (int64x1_t __a) -+/* vdupb_lane */ -+__extension__ extern __inline poly8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupb_lane_p8 (poly8x8_t __a, const int __b) - { -- return (uint64x1_t) (__a <= __AARCH64_INT64_C (0)); -+ return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vclezq_f32 (float32x4_t __a) -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupb_lane_s8 (int8x8_t __a, const int __b) - { -- return (uint32x4_t) (__a <= 0.0f); -+ return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vclezq_f64 (float64x2_t __a) -+__extension__ extern __inline uint8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupb_lane_u8 (uint8x8_t __a, const int __b) - { -- return (uint64x2_t) (__a <= 0.0); -+ return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vclezq_s8 (int8x16_t __a) -+/* vduph_lane */ -+ -+__extension__ extern __inline float16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vduph_lane_f16 (float16x4_t __a, const int __b) - { -- return (uint8x16_t) (__a <= 0); -+ return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vclezq_s16 (int16x8_t __a) -+__extension__ extern __inline poly16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vduph_lane_p16 (poly16x4_t __a, const int __b) - { -- return (uint16x8_t) (__a <= 0); -+ return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vclezq_s32 (int32x4_t __a) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vduph_lane_s16 (int16x4_t __a, const int __b) - { -- return (uint32x4_t) (__a <= 0); -+ return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vclezq_s64 (int64x2_t __a) -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vduph_lane_u16 (uint16x4_t __a, const int __b) - { -- return (uint64x2_t) (__a <= __AARCH64_INT64_C (0)); -+ return __aarch64_vget_lane_any (__a, __b); - } - --/* vclez - scalar. */ -+/* vdups_lane */ - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vclezs_f32 (float32_t __a) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdups_lane_f32 (float32x2_t __a, const int __b) - { -- return __a <= 0.0f ? -1 : 0; -+ return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vclezd_s64 (int64_t __a) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdups_lane_s32 (int32x2_t __a, const int __b) - { -- return __a <= 0 ? -1ll : 0ll; -+ return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vclezd_f64 (float64_t __a) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdups_lane_u32 (uint32x2_t __a, const int __b) - { -- return __a <= 0.0 ? -1ll : 0ll; -+ return __aarch64_vget_lane_any (__a, __b); - } - --/* vclt - vector. */ -- --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vclt_f32 (float32x2_t __a, float32x2_t __b) -+/* vdupd_lane */ -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupd_lane_f64 (float64x1_t __a, const int __b) - { -- return (uint32x2_t) (__a < __b); -+ __AARCH64_LANE_CHECK (__a, __b); -+ return __a[0]; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vclt_f64 (float64x1_t __a, float64x1_t __b) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupd_lane_s64 (int64x1_t __a, const int __b) - { -- return (uint64x1_t) (__a < __b); -+ __AARCH64_LANE_CHECK (__a, __b); -+ return __a[0]; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vclt_s8 (int8x8_t __a, int8x8_t __b) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupd_lane_u64 (uint64x1_t __a, const int __b) - { -- return (uint8x8_t) (__a < __b); -+ __AARCH64_LANE_CHECK (__a, __b); -+ return __a[0]; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vclt_s16 (int16x4_t __a, int16x4_t __b) -+/* vdupb_laneq */ -+__extension__ extern __inline poly8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupb_laneq_p8 (poly8x16_t __a, const int __b) - { -- return (uint16x4_t) (__a < __b); -+ return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vclt_s32 (int32x2_t __a, int32x2_t __b) -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupb_laneq_s8 (int8x16_t __a, const int __b) - { -- return (uint32x2_t) (__a < __b); -+ return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vclt_s64 (int64x1_t __a, int64x1_t __b) -+__extension__ extern __inline uint8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupb_laneq_u8 (uint8x16_t __a, const int __b) - { -- return (uint64x1_t) (__a < __b); -+ return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vclt_u8 (uint8x8_t __a, uint8x8_t __b) --{ -- return (__a < __b); --} -+/* vduph_laneq */ - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vclt_u16 (uint16x4_t __a, uint16x4_t __b) -+__extension__ extern __inline float16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vduph_laneq_f16 (float16x8_t __a, const int __b) - { -- return (__a < __b); -+ return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vclt_u32 (uint32x2_t __a, uint32x2_t __b) -+__extension__ extern __inline poly16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vduph_laneq_p16 (poly16x8_t __a, const int __b) - { -- return (__a < __b); -+ return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vclt_u64 (uint64x1_t __a, uint64x1_t __b) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vduph_laneq_s16 (int16x8_t __a, const int __b) - { -- return (__a < __b); -+ return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcltq_f32 (float32x4_t __a, float32x4_t __b) -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vduph_laneq_u16 (uint16x8_t __a, const int __b) - { -- return (uint32x4_t) (__a < __b); -+ return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcltq_f64 (float64x2_t __a, float64x2_t __b) -+/* vdups_laneq */ -+ -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdups_laneq_f32 (float32x4_t __a, const int __b) - { -- return (uint64x2_t) (__a < __b); -+ return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vcltq_s8 (int8x16_t __a, int8x16_t __b) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdups_laneq_s32 (int32x4_t __a, const int __b) - { -- return (uint8x16_t) (__a < __b); -+ return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vcltq_s16 (int16x8_t __a, int16x8_t __b) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdups_laneq_u32 (uint32x4_t __a, const int __b) - { -- return (uint16x8_t) (__a < __b); -+ return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcltq_s32 (int32x4_t __a, int32x4_t __b) -+/* vdupd_laneq */ -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupd_laneq_f64 (float64x2_t __a, const int __b) - { -- return (uint32x4_t) (__a < __b); -+ return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcltq_s64 (int64x2_t __a, int64x2_t __b) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupd_laneq_s64 (int64x2_t __a, const int __b) - { -- return (uint64x2_t) (__a < __b); -+ return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vcltq_u8 (uint8x16_t __a, uint8x16_t __b) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupd_laneq_u64 (uint64x2_t __a, const int __b) - { -- return (__a < __b); -+ return __aarch64_vget_lane_any (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vcltq_u16 (uint16x8_t __a, uint16x8_t __b) -+/* vext */ -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vext_f16 (float16x4_t __a, float16x4_t __b, __const int __c) - { -- return (__a < __b); -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, -+ (uint16x4_t) {4 - __c, 5 - __c, 6 - __c, 7 - __c}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint16x4_t) {__c, __c + 1, __c + 2, __c + 3}); -+#endif - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcltq_u32 (uint32x4_t __a, uint32x4_t __b) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c) - { -- return (__a < __b); -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1}); -+#endif - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcltq_u64 (uint64x2_t __a, uint64x2_t __b) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c) - { -- return (__a < __b); -+ __AARCH64_LANE_CHECK (__a, __c); -+ /* The only possible index to the assembler instruction returns element 0. */ -+ return __a; - } -- --/* vclt - scalar. */ -- --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vclts_f32 (float32_t __a, float32_t __b) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c) - { -- return __a < __b ? -1 : 0; -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint8x8_t) -+ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); -+#endif - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcltd_s64 (int64_t __a, int64_t __b) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c) - { -- return __a < __b ? -1ll : 0ll; -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, -+ (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3}); -+#endif - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcltd_u64 (uint64_t __a, uint64_t __b) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vext_p64 (poly64x1_t __a, poly64x1_t __b, __const int __c) - { -- return __a < __b ? -1ll : 0ll; -+ __AARCH64_LANE_CHECK (__a, __c); -+ /* The only possible index to the assembler instruction returns element 0. */ -+ return __a; - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcltd_f64 (float64_t __a, float64_t __b) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c) - { -- return __a < __b ? -1ll : 0ll; -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint8x8_t) -+ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); -+#endif - } - --/* vcltz - vector. */ -- --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcltz_f32 (float32x2_t __a) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c) - { -- return (uint32x2_t) (__a < 0.0f); -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, -+ (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3}); -+#endif - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcltz_f64 (float64x1_t __a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c) - { -- return (uint64x1_t) (__a < (float64x1_t) {0.0}); -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1}); -+#endif - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vcltz_s8 (int8x8_t __a) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c) - { -- return (uint8x8_t) (__a < 0); -+ __AARCH64_LANE_CHECK (__a, __c); -+ /* The only possible index to the assembler instruction returns element 0. */ -+ return __a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vcltz_s16 (int16x4_t __a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c) - { -- return (uint16x4_t) (__a < 0); -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint8x8_t) -+ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); -+#endif - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcltz_s32 (int32x2_t __a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c) - { -- return (uint32x2_t) (__a < 0); -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, -+ (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3}); -+#endif - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcltz_s64 (int64x1_t __a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c) - { -- return (uint64x1_t) (__a < __AARCH64_INT64_C (0)); -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1}); -+#endif - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcltzq_f32 (float32x4_t __a) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c) - { -- return (uint32x4_t) (__a < 0.0f); -+ __AARCH64_LANE_CHECK (__a, __c); -+ /* The only possible index to the assembler instruction returns element 0. */ -+ return __a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcltzq_f64 (float64x2_t __a) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vextq_f16 (float16x8_t __a, float16x8_t __b, __const int __c) - { -- return (uint64x2_t) (__a < 0.0); -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, -+ (uint16x8_t) {8 - __c, 9 - __c, 10 - __c, 11 - __c, -+ 12 - __c, 13 - __c, 14 - __c, -+ 15 - __c}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint16x8_t) {__c, __c + 1, __c + 2, __c + 3, -+ __c + 4, __c + 5, __c + 6, __c + 7}); -+#endif - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vcltzq_s8 (int8x16_t __a) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c) - { -- return (uint8x16_t) (__a < 0); -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, -+ (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3}); -+#endif - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vcltzq_s16 (int16x8_t __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c) - { -- return (uint16x8_t) (__a < 0); -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1}); -+#endif - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcltzq_s32 (int32x4_t __a) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c) - { -- return (uint32x4_t) (__a < 0); -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint8x16_t) -+ {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c, -+ 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, -+ __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15}); -+#endif - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcltzq_s64 (int64x2_t __a) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c) - { -- return (uint64x2_t) (__a < __AARCH64_INT64_C (0)); -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint16x8_t) -+ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); -+#endif - } - --/* vcltz - scalar. */ -- --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vcltzs_f32 (float32_t __a) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vextq_p64 (poly64x2_t __a, poly64x2_t __b, __const int __c) - { -- return __a < 0.0f ? -1 : 0; -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1}); -+#endif - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcltzd_s64 (int64_t __a) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c) - { -- return __a < 0 ? -1ll : 0ll; -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint8x16_t) -+ {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c, -+ 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, -+ __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15}); -+#endif - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcltzd_f64 (float64_t __a) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c) - { -- return __a < 0.0 ? -1ll : 0ll; -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint16x8_t) -+ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); -+#endif - } - --/* vcls. */ -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vcls_s8 (int8x8_t __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c) - { -- return __builtin_aarch64_clrsbv8qi (__a); -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, -+ (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3}); -+#endif - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vcls_s16 (int16x4_t __a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c) - { -- return __builtin_aarch64_clrsbv4hi (__a); -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1}); -+#endif - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vcls_s32 (int32x2_t __a) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c) - { -- return __builtin_aarch64_clrsbv2si (__a); -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint8x16_t) -+ {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c, -+ 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, -+ __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15}); -+#endif - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vclsq_s8 (int8x16_t __a) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c) - { -- return __builtin_aarch64_clrsbv16qi (__a); -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint16x8_t) -+ {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); -+#endif - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vclsq_s16 (int16x8_t __a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c) - { -- return __builtin_aarch64_clrsbv8hi (__a); -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, -+ (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3}); -+#endif - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vclsq_s32 (int32x4_t __a) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c) - { -- return __builtin_aarch64_clrsbv4si (__a); -+ __AARCH64_LANE_CHECK (__a, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1}); -+#endif - } - --/* vclz. */ -+/* vfma */ - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vclz_s8 (int8x8_t __a) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfma_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c) - { -- return __builtin_aarch64_clzv8qi (__a); -+ return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])}; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vclz_s16 (int16x4_t __a) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfma_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) - { -- return __builtin_aarch64_clzv4hi (__a); -+ return __builtin_aarch64_fmav2sf (__b, __c, __a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vclz_s32 (int32x2_t __a) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) - { -- return __builtin_aarch64_clzv2si (__a); -+ return __builtin_aarch64_fmav4sf (__b, __c, __a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vclz_u8 (uint8x8_t __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmaq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c) - { -- return (uint8x8_t)__builtin_aarch64_clzv8qi ((int8x8_t)__a); -+ return __builtin_aarch64_fmav2df (__b, __c, __a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vclz_u16 (uint16x4_t __a) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfma_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) - { -- return (uint16x4_t)__builtin_aarch64_clzv4hi ((int16x4_t)__a); -+ return __builtin_aarch64_fmav2sf (__b, vdup_n_f32 (__c), __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vclz_u32 (uint32x2_t __a) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfma_n_f64 (float64x1_t __a, float64x1_t __b, float64_t __c) - { -- return (uint32x2_t)__builtin_aarch64_clzv2si ((int32x2_t)__a); -+ return (float64x1_t) {__b[0] * __c + __a[0]}; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vclzq_s8 (int8x16_t __a) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) - { -- return __builtin_aarch64_clzv16qi (__a); -+ return __builtin_aarch64_fmav4sf (__b, vdupq_n_f32 (__c), __a); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vclzq_s16 (int16x8_t __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmaq_n_f64 (float64x2_t __a, float64x2_t __b, float64_t __c) - { -- return __builtin_aarch64_clzv8hi (__a); -+ return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c), __a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vclzq_s32 (int32x4_t __a) -+/* vfma_lane */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfma_lane_f32 (float32x2_t __a, float32x2_t __b, -+ float32x2_t __c, const int __lane) - { -- return __builtin_aarch64_clzv4si (__a); -+ return __builtin_aarch64_fmav2sf (__b, -+ __aarch64_vdup_lane_f32 (__c, __lane), -+ __a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vclzq_u8 (uint8x16_t __a) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfma_lane_f64 (float64x1_t __a, float64x1_t __b, -+ float64x1_t __c, const int __lane) - { -- return (uint8x16_t)__builtin_aarch64_clzv16qi ((int8x16_t)__a); -+ return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])}; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vclzq_u16 (uint16x8_t __a) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmad_lane_f64 (float64_t __a, float64_t __b, -+ float64x1_t __c, const int __lane) - { -- return (uint16x8_t)__builtin_aarch64_clzv8hi ((int16x8_t)__a); -+ return __builtin_fma (__b, __c[0], __a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vclzq_u32 (uint32x4_t __a) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmas_lane_f32 (float32_t __a, float32_t __b, -+ float32x2_t __c, const int __lane) - { -- return (uint32x4_t)__builtin_aarch64_clzv4si ((int32x4_t)__a); -+ return __builtin_fmaf (__b, __aarch64_vget_lane_any (__c, __lane), __a); - } - --/* vcnt. */ -+/* vfma_laneq */ - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vcnt_p8 (poly8x8_t __a) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfma_laneq_f32 (float32x2_t __a, float32x2_t __b, -+ float32x4_t __c, const int __lane) - { -- return (poly8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a); -+ return __builtin_aarch64_fmav2sf (__b, -+ __aarch64_vdup_laneq_f32 (__c, __lane), -+ __a); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vcnt_s8 (int8x8_t __a) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfma_laneq_f64 (float64x1_t __a, float64x1_t __b, -+ float64x2_t __c, const int __lane) - { -- return __builtin_aarch64_popcountv8qi (__a); -+ float64_t __c0 = __aarch64_vget_lane_any (__c, __lane); -+ return (float64x1_t) {__builtin_fma (__b[0], __c0, __a[0])}; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vcnt_u8 (uint8x8_t __a) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmad_laneq_f64 (float64_t __a, float64_t __b, -+ float64x2_t __c, const int __lane) - { -- return (uint8x8_t) __builtin_aarch64_popcountv8qi ((int8x8_t) __a); -+ return __builtin_fma (__b, __aarch64_vget_lane_any (__c, __lane), __a); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vcntq_p8 (poly8x16_t __a) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmas_laneq_f32 (float32_t __a, float32_t __b, -+ float32x4_t __c, const int __lane) - { -- return (poly8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a); -+ return __builtin_fmaf (__b, __aarch64_vget_lane_any (__c, __lane), __a); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vcntq_s8 (int8x16_t __a) -+/* vfmaq_lane */ -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b, -+ float32x2_t __c, const int __lane) - { -- return __builtin_aarch64_popcountv16qi (__a); -+ return __builtin_aarch64_fmav4sf (__b, -+ __aarch64_vdupq_lane_f32 (__c, __lane), -+ __a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vcntq_u8 (uint8x16_t __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b, -+ float64x1_t __c, const int __lane) - { -- return (uint8x16_t) __builtin_aarch64_popcountv16qi ((int8x16_t) __a); -+ return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c[0]), __a); - } - --/* vcvt (double -> float). */ -+/* vfmaq_laneq */ - --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) --vcvt_f16_f32 (float32x4_t __a) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmaq_laneq_f32 (float32x4_t __a, float32x4_t __b, -+ float32x4_t __c, const int __lane) - { -- return __builtin_aarch64_float_truncate_lo_v4hf (__a); -+ return __builtin_aarch64_fmav4sf (__b, -+ __aarch64_vdupq_laneq_f32 (__c, __lane), -+ __a); - } - --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) --vcvt_high_f16_f32 (float16x4_t __a, float32x4_t __b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b, -+ float64x2_t __c, const int __lane) - { -- return __builtin_aarch64_float_truncate_hi_v8hf (__a, __b); -+ return __builtin_aarch64_fmav2df (__b, -+ __aarch64_vdupq_laneq_f64 (__c, __lane), -+ __a); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vcvt_f32_f64 (float64x2_t __a) -+/* vfms */ -+ -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfms_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c) - { -- return __builtin_aarch64_float_truncate_lo_v2sf (__a); -+ return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])}; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfms_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) - { -- return __builtin_aarch64_float_truncate_hi_v4sf (__a, __b); -+ return __builtin_aarch64_fmav2sf (-__b, __c, __a); - } - --/* vcvt (float -> double). */ -- --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vcvt_f32_f16 (float16x4_t __a) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) - { -- return __builtin_aarch64_float_extend_lo_v4sf (__a); -+ return __builtin_aarch64_fmav4sf (-__b, __c, __a); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vcvt_f64_f32 (float32x2_t __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmsq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c) - { -- -- return __builtin_aarch64_float_extend_lo_v2df (__a); -+ return __builtin_aarch64_fmav2df (-__b, __c, __a); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vcvt_high_f32_f16 (float16x8_t __a) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfms_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) - { -- return __builtin_aarch64_vec_unpacks_hi_v8hf (__a); -+ return __builtin_aarch64_fmav2sf (-__b, vdup_n_f32 (__c), __a); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vcvt_high_f64_f32 (float32x4_t __a) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfms_n_f64 (float64x1_t __a, float64x1_t __b, float64_t __c) - { -- return __builtin_aarch64_vec_unpacks_hi_v4sf (__a); -+ return (float64x1_t) {-__b[0] * __c + __a[0]}; - } - --/* vcvt (<u>int -> float) */ -- --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vcvtd_f64_s64 (int64_t __a) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) - { -- return (float64_t) __a; -+ return __builtin_aarch64_fmav4sf (-__b, vdupq_n_f32 (__c), __a); - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vcvtd_f64_u64 (uint64_t __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmsq_n_f64 (float64x2_t __a, float64x2_t __b, float64_t __c) - { -- return (float64_t) __a; -+ return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c), __a); - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vcvts_f32_s32 (int32_t __a) -+/* vfms_lane */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfms_lane_f32 (float32x2_t __a, float32x2_t __b, -+ float32x2_t __c, const int __lane) - { -- return (float32_t) __a; -+ return __builtin_aarch64_fmav2sf (-__b, -+ __aarch64_vdup_lane_f32 (__c, __lane), -+ __a); - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vcvts_f32_u32 (uint32_t __a) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfms_lane_f64 (float64x1_t __a, float64x1_t __b, -+ float64x1_t __c, const int __lane) - { -- return (float32_t) __a; -+ return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])}; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vcvt_f32_s32 (int32x2_t __a) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmsd_lane_f64 (float64_t __a, float64_t __b, -+ float64x1_t __c, const int __lane) - { -- return __builtin_aarch64_floatv2siv2sf (__a); -+ return __builtin_fma (-__b, __c[0], __a); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vcvt_f32_u32 (uint32x2_t __a) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmss_lane_f32 (float32_t __a, float32_t __b, -+ float32x2_t __c, const int __lane) - { -- return __builtin_aarch64_floatunsv2siv2sf ((int32x2_t) __a); -+ return __builtin_fmaf (-__b, __aarch64_vget_lane_any (__c, __lane), __a); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vcvtq_f32_s32 (int32x4_t __a) -+/* vfms_laneq */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfms_laneq_f32 (float32x2_t __a, float32x2_t __b, -+ float32x4_t __c, const int __lane) - { -- return __builtin_aarch64_floatv4siv4sf (__a); -+ return __builtin_aarch64_fmav2sf (-__b, -+ __aarch64_vdup_laneq_f32 (__c, __lane), -+ __a); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vcvtq_f32_u32 (uint32x4_t __a) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfms_laneq_f64 (float64x1_t __a, float64x1_t __b, -+ float64x2_t __c, const int __lane) - { -- return __builtin_aarch64_floatunsv4siv4sf ((int32x4_t) __a); -+ float64_t __c0 = __aarch64_vget_lane_any (__c, __lane); -+ return (float64x1_t) {__builtin_fma (-__b[0], __c0, __a[0])}; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vcvtq_f64_s64 (int64x2_t __a) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmsd_laneq_f64 (float64_t __a, float64_t __b, -+ float64x2_t __c, const int __lane) - { -- return __builtin_aarch64_floatv2div2df (__a); -+ return __builtin_fma (-__b, __aarch64_vget_lane_any (__c, __lane), __a); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vcvtq_f64_u64 (uint64x2_t __a) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmss_laneq_f32 (float32_t __a, float32_t __b, -+ float32x4_t __c, const int __lane) - { -- return __builtin_aarch64_floatunsv2div2df ((int64x2_t) __a); -+ return __builtin_fmaf (-__b, __aarch64_vget_lane_any (__c, __lane), __a); - } - --/* vcvt (float -> <u>int) */ -+/* vfmsq_lane */ - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vcvtd_s64_f64 (float64_t __a) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b, -+ float32x2_t __c, const int __lane) - { -- return (int64_t) __a; -+ return __builtin_aarch64_fmav4sf (-__b, -+ __aarch64_vdupq_lane_f32 (__c, __lane), -+ __a); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcvtd_u64_f64 (float64_t __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b, -+ float64x1_t __c, const int __lane) - { -- return (uint64_t) __a; -+ return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c[0]), __a); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vcvts_s32_f32 (float32_t __a) -+/* vfmsq_laneq */ -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmsq_laneq_f32 (float32x4_t __a, float32x4_t __b, -+ float32x4_t __c, const int __lane) - { -- return (int32_t) __a; -+ return __builtin_aarch64_fmav4sf (-__b, -+ __aarch64_vdupq_laneq_f32 (__c, __lane), -+ __a); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vcvts_u32_f32 (float32_t __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b, -+ float64x2_t __c, const int __lane) - { -- return (uint32_t) __a; -+ return __builtin_aarch64_fmav2df (-__b, -+ __aarch64_vdupq_laneq_f64 (__c, __lane), -+ __a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vcvt_s32_f32 (float32x2_t __a) -+/* vld1 */ -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_f16 (const float16_t *__a) - { -- return __builtin_aarch64_lbtruncv2sfv2si (__a); -+ return __builtin_aarch64_ld1v4hf (__a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcvt_u32_f32 (float32x2_t __a) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_f32 (const float32_t *a) - { -- return __builtin_aarch64_lbtruncuv2sfv2si_us (__a); -+ return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vcvtq_s32_f32 (float32x4_t __a) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_f64 (const float64_t *a) - { -- return __builtin_aarch64_lbtruncv4sfv4si (__a); -+ return (float64x1_t) {*a}; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcvtq_u32_f32 (float32x4_t __a) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_p8 (const poly8_t *a) - { -- return __builtin_aarch64_lbtruncuv4sfv4si_us (__a); -+ return (poly8x8_t) -+ __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vcvt_s64_f64 (float64x1_t __a) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_p16 (const poly16_t *a) - { -- return (int64x1_t) {vcvtd_s64_f64 (__a[0])}; -+ return (poly16x4_t) -+ __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcvt_u64_f64 (float64x1_t __a) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_p64 (const poly64_t *a) - { -- return (uint64x1_t) {vcvtd_u64_f64 (__a[0])}; -+ return (poly64x1_t) {*a}; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vcvtq_s64_f64 (float64x2_t __a) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_s8 (const int8_t *a) - { -- return __builtin_aarch64_lbtruncv2dfv2di (__a); -+ return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcvtq_u64_f64 (float64x2_t __a) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_s16 (const int16_t *a) - { -- return __builtin_aarch64_lbtruncuv2dfv2di_us (__a); -+ return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a); - } - --/* vcvta */ -- --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vcvtad_s64_f64 (float64_t __a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_s32 (const int32_t *a) - { -- return __builtin_aarch64_lrounddfdi (__a); -+ return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcvtad_u64_f64 (float64_t __a) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_s64 (const int64_t *a) - { -- return __builtin_aarch64_lroundudfdi_us (__a); -+ return (int64x1_t) {*a}; - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vcvtas_s32_f32 (float32_t __a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_u8 (const uint8_t *a) - { -- return __builtin_aarch64_lroundsfsi (__a); -+ return (uint8x8_t) -+ __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vcvtas_u32_f32 (float32_t __a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_u16 (const uint16_t *a) - { -- return __builtin_aarch64_lroundusfsi_us (__a); -+ return (uint16x4_t) -+ __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vcvta_s32_f32 (float32x2_t __a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_u32 (const uint32_t *a) - { -- return __builtin_aarch64_lroundv2sfv2si (__a); -+ return (uint32x2_t) -+ __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcvta_u32_f32 (float32x2_t __a) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_u64 (const uint64_t *a) - { -- return __builtin_aarch64_lrounduv2sfv2si_us (__a); -+ return (uint64x1_t) {*a}; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vcvtaq_s32_f32 (float32x4_t __a) -+/* vld1q */ -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_f16 (const float16_t *__a) - { -- return __builtin_aarch64_lroundv4sfv4si (__a); -+ return __builtin_aarch64_ld1v8hf (__a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcvtaq_u32_f32 (float32x4_t __a) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_f32 (const float32_t *a) - { -- return __builtin_aarch64_lrounduv4sfv4si_us (__a); -+ return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vcvta_s64_f64 (float64x1_t __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_f64 (const float64_t *a) - { -- return (int64x1_t) {vcvtad_s64_f64 (__a[0])}; -+ return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcvta_u64_f64 (float64x1_t __a) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_p8 (const poly8_t *a) - { -- return (uint64x1_t) {vcvtad_u64_f64 (__a[0])}; -+ return (poly8x16_t) -+ __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vcvtaq_s64_f64 (float64x2_t __a) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_p16 (const poly16_t *a) - { -- return __builtin_aarch64_lroundv2dfv2di (__a); -+ return (poly16x8_t) -+ __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcvtaq_u64_f64 (float64x2_t __a) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_p64 (const poly64_t *a) - { -- return __builtin_aarch64_lrounduv2dfv2di_us (__a); -+ return (poly64x2_t) -+ __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a); - } - --/* vcvtm */ -- --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vcvtmd_s64_f64 (float64_t __a) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_s8 (const int8_t *a) - { -- return __builtin_llfloor (__a); -+ return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcvtmd_u64_f64 (float64_t __a) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_s16 (const int16_t *a) - { -- return __builtin_aarch64_lfloorudfdi_us (__a); -+ return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vcvtms_s32_f32 (float32_t __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_s32 (const int32_t *a) - { -- return __builtin_ifloorf (__a); -+ return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vcvtms_u32_f32 (float32_t __a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_s64 (const int64_t *a) - { -- return __builtin_aarch64_lfloorusfsi_us (__a); -+ return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vcvtm_s32_f32 (float32x2_t __a) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_u8 (const uint8_t *a) - { -- return __builtin_aarch64_lfloorv2sfv2si (__a); -+ return (uint8x16_t) -+ __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcvtm_u32_f32 (float32x2_t __a) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_u16 (const uint16_t *a) - { -- return __builtin_aarch64_lflooruv2sfv2si_us (__a); -+ return (uint16x8_t) -+ __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vcvtmq_s32_f32 (float32x4_t __a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_u32 (const uint32_t *a) - { -- return __builtin_aarch64_lfloorv4sfv4si (__a); -+ return (uint32x4_t) -+ __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcvtmq_u32_f32 (float32x4_t __a) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_u64 (const uint64_t *a) - { -- return __builtin_aarch64_lflooruv4sfv4si_us (__a); -+ return (uint64x2_t) -+ __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vcvtm_s64_f64 (float64x1_t __a) -+/* vld1_dup */ -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_dup_f16 (const float16_t* __a) - { -- return (int64x1_t) {vcvtmd_s64_f64 (__a[0])}; -+ return vdup_n_f16 (*__a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcvtm_u64_f64 (float64x1_t __a) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_dup_f32 (const float32_t* __a) - { -- return (uint64x1_t) {vcvtmd_u64_f64 (__a[0])}; -+ return vdup_n_f32 (*__a); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vcvtmq_s64_f64 (float64x2_t __a) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_dup_f64 (const float64_t* __a) - { -- return __builtin_aarch64_lfloorv2dfv2di (__a); -+ return vdup_n_f64 (*__a); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcvtmq_u64_f64 (float64x2_t __a) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_dup_p8 (const poly8_t* __a) - { -- return __builtin_aarch64_lflooruv2dfv2di_us (__a); -+ return vdup_n_p8 (*__a); - } - --/* vcvtn */ -- --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vcvtnd_s64_f64 (float64_t __a) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_dup_p16 (const poly16_t* __a) - { -- return __builtin_aarch64_lfrintndfdi (__a); -+ return vdup_n_p16 (*__a); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcvtnd_u64_f64 (float64_t __a) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_dup_p64 (const poly64_t* __a) - { -- return __builtin_aarch64_lfrintnudfdi_us (__a); -+ return vdup_n_p64 (*__a); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vcvtns_s32_f32 (float32_t __a) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_dup_s8 (const int8_t* __a) - { -- return __builtin_aarch64_lfrintnsfsi (__a); -+ return vdup_n_s8 (*__a); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vcvtns_u32_f32 (float32_t __a) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_dup_s16 (const int16_t* __a) - { -- return __builtin_aarch64_lfrintnusfsi_us (__a); -+ return vdup_n_s16 (*__a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vcvtn_s32_f32 (float32x2_t __a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_dup_s32 (const int32_t* __a) - { -- return __builtin_aarch64_lfrintnv2sfv2si (__a); -+ return vdup_n_s32 (*__a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcvtn_u32_f32 (float32x2_t __a) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_dup_s64 (const int64_t* __a) - { -- return __builtin_aarch64_lfrintnuv2sfv2si_us (__a); -+ return vdup_n_s64 (*__a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vcvtnq_s32_f32 (float32x4_t __a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_dup_u8 (const uint8_t* __a) - { -- return __builtin_aarch64_lfrintnv4sfv4si (__a); -+ return vdup_n_u8 (*__a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcvtnq_u32_f32 (float32x4_t __a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_dup_u16 (const uint16_t* __a) - { -- return __builtin_aarch64_lfrintnuv4sfv4si_us (__a); -+ return vdup_n_u16 (*__a); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vcvtn_s64_f64 (float64x1_t __a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_dup_u32 (const uint32_t* __a) - { -- return (int64x1_t) {vcvtnd_s64_f64 (__a[0])}; -+ return vdup_n_u32 (*__a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcvtn_u64_f64 (float64x1_t __a) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_dup_u64 (const uint64_t* __a) - { -- return (uint64x1_t) {vcvtnd_u64_f64 (__a[0])}; -+ return vdup_n_u64 (*__a); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vcvtnq_s64_f64 (float64x2_t __a) -+/* vld1q_dup */ -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_dup_f16 (const float16_t* __a) - { -- return __builtin_aarch64_lfrintnv2dfv2di (__a); -+ return vdupq_n_f16 (*__a); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcvtnq_u64_f64 (float64x2_t __a) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_dup_f32 (const float32_t* __a) - { -- return __builtin_aarch64_lfrintnuv2dfv2di_us (__a); -+ return vdupq_n_f32 (*__a); - } - --/* vcvtp */ -- --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vcvtpd_s64_f64 (float64_t __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_dup_f64 (const float64_t* __a) - { -- return __builtin_llceil (__a); -+ return vdupq_n_f64 (*__a); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vcvtpd_u64_f64 (float64_t __a) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_dup_p8 (const poly8_t* __a) - { -- return __builtin_aarch64_lceiludfdi_us (__a); -+ return vdupq_n_p8 (*__a); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vcvtps_s32_f32 (float32_t __a) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_dup_p16 (const poly16_t* __a) - { -- return __builtin_iceilf (__a); -+ return vdupq_n_p16 (*__a); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vcvtps_u32_f32 (float32_t __a) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_dup_p64 (const poly64_t* __a) - { -- return __builtin_aarch64_lceilusfsi_us (__a); -+ return vdupq_n_p64 (*__a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vcvtp_s32_f32 (float32x2_t __a) -+ __extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_dup_s8 (const int8_t* __a) - { -- return __builtin_aarch64_lceilv2sfv2si (__a); -+ return vdupq_n_s8 (*__a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vcvtp_u32_f32 (float32x2_t __a) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_dup_s16 (const int16_t* __a) - { -- return __builtin_aarch64_lceiluv2sfv2si_us (__a); -+ return vdupq_n_s16 (*__a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vcvtpq_s32_f32 (float32x4_t __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_dup_s32 (const int32_t* __a) - { -- return __builtin_aarch64_lceilv4sfv4si (__a); -+ return vdupq_n_s32 (*__a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vcvtpq_u32_f32 (float32x4_t __a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_dup_s64 (const int64_t* __a) - { -- return __builtin_aarch64_lceiluv4sfv4si_us (__a); -+ return vdupq_n_s64 (*__a); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vcvtp_s64_f64 (float64x1_t __a) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_dup_u8 (const uint8_t* __a) - { -- return (int64x1_t) {vcvtpd_s64_f64 (__a[0])}; -+ return vdupq_n_u8 (*__a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vcvtp_u64_f64 (float64x1_t __a) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_dup_u16 (const uint16_t* __a) - { -- return (uint64x1_t) {vcvtpd_u64_f64 (__a[0])}; -+ return vdupq_n_u16 (*__a); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vcvtpq_s64_f64 (float64x2_t __a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_dup_u32 (const uint32_t* __a) - { -- return __builtin_aarch64_lceilv2dfv2di (__a); -+ return vdupq_n_u32 (*__a); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vcvtpq_u64_f64 (float64x2_t __a) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_dup_u64 (const uint64_t* __a) - { -- return __builtin_aarch64_lceiluv2dfv2di_us (__a); -+ return vdupq_n_u64 (*__a); - } - --/* vdup_n */ -+/* vld1_lane */ - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vdup_n_f32 (float32_t __a) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_lane_f16 (const float16_t *__src, float16x4_t __vec, const int __lane) - { -- return (float32x2_t) {__a, __a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vdup_n_f64 (float64_t __a) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_lane_f32 (const float32_t *__src, float32x2_t __vec, const int __lane) - { -- return (float64x1_t) {__a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vdup_n_p8 (poly8_t __a) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_lane_f64 (const float64_t *__src, float64x1_t __vec, const int __lane) - { -- return (poly8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vdup_n_p16 (poly16_t __a) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_lane_p8 (const poly8_t *__src, poly8x8_t __vec, const int __lane) - { -- return (poly16x4_t) {__a, __a, __a, __a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vdup_n_s8 (int8_t __a) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_lane_p16 (const poly16_t *__src, poly16x4_t __vec, const int __lane) - { -- return (int8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vdup_n_s16 (int16_t __a) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_lane_p64 (const poly64_t *__src, poly64x1_t __vec, const int __lane) - { -- return (int16x4_t) {__a, __a, __a, __a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vdup_n_s32 (int32_t __a) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_lane_s8 (const int8_t *__src, int8x8_t __vec, const int __lane) - { -- return (int32x2_t) {__a, __a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vdup_n_s64 (int64_t __a) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_lane_s16 (const int16_t *__src, int16x4_t __vec, const int __lane) - { -- return (int64x1_t) {__a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vdup_n_u8 (uint8_t __a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_lane_s32 (const int32_t *__src, int32x2_t __vec, const int __lane) - { -- return (uint8x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vdup_n_u16 (uint16_t __a) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_lane_s64 (const int64_t *__src, int64x1_t __vec, const int __lane) - { -- return (uint16x4_t) {__a, __a, __a, __a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vdup_n_u32 (uint32_t __a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_lane_u8 (const uint8_t *__src, uint8x8_t __vec, const int __lane) - { -- return (uint32x2_t) {__a, __a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vdup_n_u64 (uint64_t __a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_lane_u16 (const uint16_t *__src, uint16x4_t __vec, const int __lane) - { -- return (uint64x1_t) {__a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --/* vdupq_n */ -- --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vdupq_n_f32 (float32_t __a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_lane_u32 (const uint32_t *__src, uint32x2_t __vec, const int __lane) - { -- return (float32x4_t) {__a, __a, __a, __a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vdupq_n_f64 (float64_t __a) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane) - { -- return (float64x2_t) {__a, __a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vdupq_n_p8 (uint32_t __a) -+/* vld1q_lane */ -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_lane_f16 (const float16_t *__src, float16x8_t __vec, const int __lane) - { -- return (poly8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a, -- __a, __a, __a, __a, __a, __a, __a, __a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vdupq_n_p16 (uint32_t __a) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_lane_f32 (const float32_t *__src, float32x4_t __vec, const int __lane) - { -- return (poly16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vdupq_n_s8 (int32_t __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_lane_f64 (const float64_t *__src, float64x2_t __vec, const int __lane) - { -- return (int8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a, -- __a, __a, __a, __a, __a, __a, __a, __a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vdupq_n_s16 (int32_t __a) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_lane_p8 (const poly8_t *__src, poly8x16_t __vec, const int __lane) - { -- return (int16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vdupq_n_s32 (int32_t __a) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_lane_p16 (const poly16_t *__src, poly16x8_t __vec, const int __lane) - { -- return (int32x4_t) {__a, __a, __a, __a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vdupq_n_s64 (int64_t __a) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_lane_p64 (const poly64_t *__src, poly64x2_t __vec, const int __lane) - { -- return (int64x2_t) {__a, __a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vdupq_n_u8 (uint32_t __a) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_lane_s8 (const int8_t *__src, int8x16_t __vec, const int __lane) - { -- return (uint8x16_t) {__a, __a, __a, __a, __a, __a, __a, __a, -- __a, __a, __a, __a, __a, __a, __a, __a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vdupq_n_u16 (uint32_t __a) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_lane_s16 (const int16_t *__src, int16x8_t __vec, const int __lane) - { -- return (uint16x8_t) {__a, __a, __a, __a, __a, __a, __a, __a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vdupq_n_u32 (uint32_t __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_lane_s32 (const int32_t *__src, int32x4_t __vec, const int __lane) - { -- return (uint32x4_t) {__a, __a, __a, __a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vdupq_n_u64 (uint64_t __a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_lane_s64 (const int64_t *__src, int64x2_t __vec, const int __lane) - { -- return (uint64x2_t) {__a, __a}; -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --/* vdup_lane */ -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vdup_lane_f32 (float32x2_t __a, const int __b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_lane_u8 (const uint8_t *__src, uint8x16_t __vec, const int __lane) - { -- return __aarch64_vdup_lane_f32 (__a, __b); -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vdup_lane_f64 (float64x1_t __a, const int __b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_lane_u16 (const uint16_t *__src, uint16x8_t __vec, const int __lane) - { -- return __aarch64_vdup_lane_f64 (__a, __b); -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vdup_lane_p8 (poly8x8_t __a, const int __b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_lane_u32 (const uint32_t *__src, uint32x4_t __vec, const int __lane) - { -- return __aarch64_vdup_lane_p8 (__a, __b); -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vdup_lane_p16 (poly16x4_t __a, const int __b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane) - { -- return __aarch64_vdup_lane_p16 (__a, __b); -+ return __aarch64_vset_lane_any (*__src, __vec, __lane); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vdup_lane_s8 (int8x8_t __a, const int __b) -+/* vldn */ -+ -+__extension__ extern __inline int64x1x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_s64 (const int64_t * __a) - { -- return __aarch64_vdup_lane_s8 (__a, __b); -+ int64x1x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); -+ ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); -+ return ret; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vdup_lane_s16 (int16x4_t __a, const int __b) -+__extension__ extern __inline uint64x1x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_u64 (const uint64_t * __a) - { -- return __aarch64_vdup_lane_s16 (__a, __b); -+ uint64x1x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); -+ ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); -+ return ret; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vdup_lane_s32 (int32x2_t __a, const int __b) -+__extension__ extern __inline float64x1x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_f64 (const float64_t * __a) - { -- return __aarch64_vdup_lane_s32 (__a, __b); -+ float64x1x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a); -+ ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)}; -+ ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)}; -+ return ret; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vdup_lane_s64 (int64x1_t __a, const int __b) -+__extension__ extern __inline int8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_s8 (const int8_t * __a) - { -- return __aarch64_vdup_lane_s64 (__a, __b); -+ int8x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); -+ ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); -+ return ret; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vdup_lane_u8 (uint8x8_t __a, const int __b) -+__extension__ extern __inline poly8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_p8 (const poly8_t * __a) - { -- return __aarch64_vdup_lane_u8 (__a, __b); -+ poly8x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); -+ ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); -+ return ret; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vdup_lane_u16 (uint16x4_t __a, const int __b) -+__extension__ extern __inline poly64x1x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_p64 (const poly64_t * __a) - { -- return __aarch64_vdup_lane_u16 (__a, __b); -+ poly64x1x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 0); -+ ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 1); -+ return ret; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vdup_lane_u32 (uint32x2_t __a, const int __b) -+__extension__ extern __inline int16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_s16 (const int16_t * __a) - { -- return __aarch64_vdup_lane_u32 (__a, __b); -+ int16x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); -+ ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); -+ return ret; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vdup_lane_u64 (uint64x1_t __a, const int __b) -+__extension__ extern __inline poly16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_p16 (const poly16_t * __a) - { -- return __aarch64_vdup_lane_u64 (__a, __b); -+ poly16x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); -+ ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); -+ return ret; - } - --/* vdup_laneq */ -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vdup_laneq_f32 (float32x4_t __a, const int __b) -+__extension__ extern __inline int32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_s32 (const int32_t * __a) - { -- return __aarch64_vdup_laneq_f32 (__a, __b); -+ int32x2x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0); -+ ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1); -+ return ret; - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vdup_laneq_f64 (float64x2_t __a, const int __b) -+__extension__ extern __inline uint8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_u8 (const uint8_t * __a) - { -- return __aarch64_vdup_laneq_f64 (__a, __b); -+ uint8x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); -+ ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); -+ return ret; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vdup_laneq_p8 (poly8x16_t __a, const int __b) -+__extension__ extern __inline uint16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_u16 (const uint16_t * __a) - { -- return __aarch64_vdup_laneq_p8 (__a, __b); -+ uint16x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); -+ ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); -+ return ret; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vdup_laneq_p16 (poly16x8_t __a, const int __b) -+__extension__ extern __inline uint32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_u32 (const uint32_t * __a) - { -- return __aarch64_vdup_laneq_p16 (__a, __b); -+ uint32x2x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0); -+ ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1); -+ return ret; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vdup_laneq_s8 (int8x16_t __a, const int __b) -+__extension__ extern __inline float16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_f16 (const float16_t * __a) - { -- return __aarch64_vdup_laneq_s8 (__a, __b); -+ float16x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v4hf (__a); -+ ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0); -+ ret.val[1] = __builtin_aarch64_get_dregoiv4hf (__o, 1); -+ return ret; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vdup_laneq_s16 (int16x8_t __a, const int __b) -+__extension__ extern __inline float32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_f32 (const float32_t * __a) - { -- return __aarch64_vdup_laneq_s16 (__a, __b); -+ float32x2x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a); -+ ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0); -+ ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1); -+ return ret; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vdup_laneq_s32 (int32x4_t __a, const int __b) -+__extension__ extern __inline int8x16x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_s8 (const int8_t * __a) - { -- return __aarch64_vdup_laneq_s32 (__a, __b); -+ int8x16x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); -+ ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); -+ return ret; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vdup_laneq_s64 (int64x2_t __a, const int __b) -+__extension__ extern __inline poly8x16x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_p8 (const poly8_t * __a) - { -- return __aarch64_vdup_laneq_s64 (__a, __b); -+ poly8x16x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); -+ ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); -+ return ret; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vdup_laneq_u8 (uint8x16_t __a, const int __b) -+__extension__ extern __inline int16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_s16 (const int16_t * __a) - { -- return __aarch64_vdup_laneq_u8 (__a, __b); -+ int16x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); -+ ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); -+ return ret; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vdup_laneq_u16 (uint16x8_t __a, const int __b) -+__extension__ extern __inline poly16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_p16 (const poly16_t * __a) - { -- return __aarch64_vdup_laneq_u16 (__a, __b); -+ poly16x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); -+ ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); -+ return ret; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vdup_laneq_u32 (uint32x4_t __a, const int __b) -+__extension__ extern __inline poly64x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_p64 (const poly64_t * __a) - { -- return __aarch64_vdup_laneq_u32 (__a, __b); -+ poly64x2x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 0); -+ ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 1); -+ return ret; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vdup_laneq_u64 (uint64x2_t __a, const int __b) -+__extension__ extern __inline int32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_s32 (const int32_t * __a) - { -- return __aarch64_vdup_laneq_u64 (__a, __b); -+ int32x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); -+ ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); -+ return ret; - } - --/* vdupq_lane */ --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vdupq_lane_f32 (float32x2_t __a, const int __b) -+__extension__ extern __inline int64x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_s64 (const int64_t * __a) - { -- return __aarch64_vdupq_lane_f32 (__a, __b); -+ int64x2x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); -+ ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); -+ return ret; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vdupq_lane_f64 (float64x1_t __a, const int __b) -+__extension__ extern __inline uint8x16x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_u8 (const uint8_t * __a) - { -- return __aarch64_vdupq_lane_f64 (__a, __b); -+ uint8x16x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); -+ ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); -+ return ret; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vdupq_lane_p8 (poly8x8_t __a, const int __b) -+__extension__ extern __inline uint16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_u16 (const uint16_t * __a) - { -- return __aarch64_vdupq_lane_p8 (__a, __b); -+ uint16x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); -+ ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); -+ return ret; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vdupq_lane_p16 (poly16x4_t __a, const int __b) -+__extension__ extern __inline uint32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_u32 (const uint32_t * __a) - { -- return __aarch64_vdupq_lane_p16 (__a, __b); -+ uint32x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); -+ ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); -+ return ret; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vdupq_lane_s8 (int8x8_t __a, const int __b) -+__extension__ extern __inline uint64x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_u64 (const uint64_t * __a) - { -- return __aarch64_vdupq_lane_s8 (__a, __b); -+ uint64x2x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); -+ ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); -+ return ret; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vdupq_lane_s16 (int16x4_t __a, const int __b) -+__extension__ extern __inline float16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_f16 (const float16_t * __a) - { -- return __aarch64_vdupq_lane_s16 (__a, __b); -+ float16x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v8hf (__a); -+ ret.val[0] = __builtin_aarch64_get_qregoiv8hf (__o, 0); -+ ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1); -+ return ret; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vdupq_lane_s32 (int32x2_t __a, const int __b) -+__extension__ extern __inline float32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_f32 (const float32_t * __a) - { -- return __aarch64_vdupq_lane_s32 (__a, __b); -+ float32x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a); -+ ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0); -+ ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1); -+ return ret; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vdupq_lane_s64 (int64x1_t __a, const int __b) -+__extension__ extern __inline float64x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_f64 (const float64_t * __a) - { -- return __aarch64_vdupq_lane_s64 (__a, __b); -+ float64x2x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a); -+ ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0); -+ ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1); -+ return ret; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vdupq_lane_u8 (uint8x8_t __a, const int __b) -+__extension__ extern __inline int64x1x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_s64 (const int64_t * __a) - { -- return __aarch64_vdupq_lane_u8 (__a, __b); -+ int64x1x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0); -+ ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1); -+ ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2); -+ return ret; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vdupq_lane_u16 (uint16x4_t __a, const int __b) -+__extension__ extern __inline uint64x1x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_u64 (const uint64_t * __a) - { -- return __aarch64_vdupq_lane_u16 (__a, __b); -+ uint64x1x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0); -+ ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1); -+ ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2); -+ return ret; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vdupq_lane_u32 (uint32x2_t __a, const int __b) -+__extension__ extern __inline float64x1x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_f64 (const float64_t * __a) - { -- return __aarch64_vdupq_lane_u32 (__a, __b); -+ float64x1x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a); -+ ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)}; -+ ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)}; -+ ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)}; -+ return ret; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vdupq_lane_u64 (uint64x1_t __a, const int __b) -+__extension__ extern __inline int8x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_s8 (const int8_t * __a) - { -- return __aarch64_vdupq_lane_u64 (__a, __b); -+ int8x8x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); -+ ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); -+ ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); -+ return ret; - } - --/* vdupq_laneq */ --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vdupq_laneq_f32 (float32x4_t __a, const int __b) -+__extension__ extern __inline poly8x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_p8 (const poly8_t * __a) - { -- return __aarch64_vdupq_laneq_f32 (__a, __b); -+ poly8x8x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); -+ ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); -+ ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); -+ return ret; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vdupq_laneq_f64 (float64x2_t __a, const int __b) -+__extension__ extern __inline int16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_s16 (const int16_t * __a) - { -- return __aarch64_vdupq_laneq_f64 (__a, __b); -+ int16x4x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); -+ ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); -+ ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); -+ return ret; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vdupq_laneq_p8 (poly8x16_t __a, const int __b) -+__extension__ extern __inline poly16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_p16 (const poly16_t * __a) - { -- return __aarch64_vdupq_laneq_p8 (__a, __b); -+ poly16x4x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); -+ ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); -+ ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); -+ return ret; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vdupq_laneq_p16 (poly16x8_t __a, const int __b) -+__extension__ extern __inline int32x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_s32 (const int32_t * __a) - { -- return __aarch64_vdupq_laneq_p16 (__a, __b); -+ int32x2x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0); -+ ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1); -+ ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2); -+ return ret; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vdupq_laneq_s8 (int8x16_t __a, const int __b) -+__extension__ extern __inline uint8x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_u8 (const uint8_t * __a) - { -- return __aarch64_vdupq_laneq_s8 (__a, __b); -+ uint8x8x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); -+ ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); -+ ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); -+ return ret; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vdupq_laneq_s16 (int16x8_t __a, const int __b) -+__extension__ extern __inline uint16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_u16 (const uint16_t * __a) - { -- return __aarch64_vdupq_laneq_s16 (__a, __b); -+ uint16x4x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); -+ ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); -+ ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); -+ return ret; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vdupq_laneq_s32 (int32x4_t __a, const int __b) -+__extension__ extern __inline uint32x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_u32 (const uint32_t * __a) - { -- return __aarch64_vdupq_laneq_s32 (__a, __b); -+ uint32x2x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0); -+ ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1); -+ ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2); -+ return ret; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vdupq_laneq_s64 (int64x2_t __a, const int __b) -+__extension__ extern __inline float16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_f16 (const float16_t * __a) - { -- return __aarch64_vdupq_laneq_s64 (__a, __b); -+ float16x4x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v4hf (__a); -+ ret.val[0] = __builtin_aarch64_get_dregciv4hf (__o, 0); -+ ret.val[1] = __builtin_aarch64_get_dregciv4hf (__o, 1); -+ ret.val[2] = __builtin_aarch64_get_dregciv4hf (__o, 2); -+ return ret; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vdupq_laneq_u8 (uint8x16_t __a, const int __b) -+__extension__ extern __inline float32x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_f32 (const float32_t * __a) - { -- return __aarch64_vdupq_laneq_u8 (__a, __b); -+ float32x2x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a); -+ ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0); -+ ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1); -+ ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2); -+ return ret; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vdupq_laneq_u16 (uint16x8_t __a, const int __b) -+__extension__ extern __inline poly64x1x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_p64 (const poly64_t * __a) - { -- return __aarch64_vdupq_laneq_u16 (__a, __b); -+ poly64x1x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 0); -+ ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 1); -+ ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 2); -+ return ret; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vdupq_laneq_u32 (uint32x4_t __a, const int __b) -+__extension__ extern __inline int8x16x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_s8 (const int8_t * __a) - { -- return __aarch64_vdupq_laneq_u32 (__a, __b); -+ int8x16x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); -+ ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); -+ ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); -+ return ret; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vdupq_laneq_u64 (uint64x2_t __a, const int __b) -+__extension__ extern __inline poly8x16x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_p8 (const poly8_t * __a) - { -- return __aarch64_vdupq_laneq_u64 (__a, __b); -+ poly8x16x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); -+ ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); -+ ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); -+ return ret; - } - --/* vdupb_lane */ --__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) --vdupb_lane_p8 (poly8x8_t __a, const int __b) -+__extension__ extern __inline int16x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_s16 (const int16_t * __a) - { -- return __aarch64_vget_lane_any (__a, __b); -+ int16x8x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); -+ ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); -+ ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); -+ return ret; - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vdupb_lane_s8 (int8x8_t __a, const int __b) -+__extension__ extern __inline poly16x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_p16 (const poly16_t * __a) - { -- return __aarch64_vget_lane_any (__a, __b); -+ poly16x8x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); -+ ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); -+ ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); -+ return ret; - } - --__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) --vdupb_lane_u8 (uint8x8_t __a, const int __b) -+__extension__ extern __inline int32x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_s32 (const int32_t * __a) - { -- return __aarch64_vget_lane_any (__a, __b); -+ int32x4x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0); -+ ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1); -+ ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2); -+ return ret; - } - --/* vduph_lane */ --__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) --vduph_lane_p16 (poly16x4_t __a, const int __b) -+__extension__ extern __inline int64x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_s64 (const int64_t * __a) - { -- return __aarch64_vget_lane_any (__a, __b); -+ int64x2x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); -+ ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); -+ ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); -+ return ret; - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vduph_lane_s16 (int16x4_t __a, const int __b) -+__extension__ extern __inline uint8x16x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_u8 (const uint8_t * __a) - { -- return __aarch64_vget_lane_any (__a, __b); -+ uint8x16x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); -+ ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); -+ ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); -+ return ret; - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) --vduph_lane_u16 (uint16x4_t __a, const int __b) -+__extension__ extern __inline uint16x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_u16 (const uint16_t * __a) - { -- return __aarch64_vget_lane_any (__a, __b); -+ uint16x8x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); -+ ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); -+ ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); -+ return ret; - } - --/* vdups_lane */ --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vdups_lane_f32 (float32x2_t __a, const int __b) -+__extension__ extern __inline uint32x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_u32 (const uint32_t * __a) - { -- return __aarch64_vget_lane_any (__a, __b); -+ uint32x4x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0); -+ ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1); -+ ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2); -+ return ret; - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vdups_lane_s32 (int32x2_t __a, const int __b) -+__extension__ extern __inline uint64x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_u64 (const uint64_t * __a) - { -- return __aarch64_vget_lane_any (__a, __b); -+ uint64x2x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); -+ ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); -+ ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); -+ return ret; - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vdups_lane_u32 (uint32x2_t __a, const int __b) -+__extension__ extern __inline float16x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_f16 (const float16_t * __a) - { -- return __aarch64_vget_lane_any (__a, __b); -+ float16x8x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v8hf (__a); -+ ret.val[0] = __builtin_aarch64_get_qregciv8hf (__o, 0); -+ ret.val[1] = __builtin_aarch64_get_qregciv8hf (__o, 1); -+ ret.val[2] = __builtin_aarch64_get_qregciv8hf (__o, 2); -+ return ret; - } - --/* vdupd_lane */ --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vdupd_lane_f64 (float64x1_t __a, const int __b) -+__extension__ extern __inline float32x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_f32 (const float32_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __b); -- return __a[0]; -+ float32x4x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a); -+ ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0); -+ ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1); -+ ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2); -+ return ret; - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vdupd_lane_s64 (int64x1_t __a, const int __b) -+__extension__ extern __inline float64x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_f64 (const float64_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __b); -- return __a[0]; -+ float64x2x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a); -+ ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0); -+ ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1); -+ ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2); -+ return ret; - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vdupd_lane_u64 (uint64x1_t __a, const int __b) -+__extension__ extern __inline poly64x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_p64 (const poly64_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __b); -- return __a[0]; -+ poly64x2x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 0); -+ ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 1); -+ ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 2); -+ return ret; - } - --/* vdupb_laneq */ --__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) --vdupb_laneq_p8 (poly8x16_t __a, const int __b) -+__extension__ extern __inline int64x1x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_s64 (const int64_t * __a) - { -- return __aarch64_vget_lane_any (__a, __b); -+ int64x1x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); -+ ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); -+ ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); -+ ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); -+ return ret; - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vdupb_laneq_s8 (int8x16_t __a, const int __attribute__ ((unused)) __b) -+__extension__ extern __inline uint64x1x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_u64 (const uint64_t * __a) - { -- return __aarch64_vget_lane_any (__a, __b); -+ uint64x1x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); -+ ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); -+ ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); -+ ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); -+ return ret; - } - --__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) --vdupb_laneq_u8 (uint8x16_t __a, const int __b) -+__extension__ extern __inline float64x1x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_f64 (const float64_t * __a) - { -- return __aarch64_vget_lane_any (__a, __b); -+ float64x1x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a); -+ ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)}; -+ ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)}; -+ ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)}; -+ ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)}; -+ return ret; - } - --/* vduph_laneq */ --__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) --vduph_laneq_p16 (poly16x8_t __a, const int __b) -+__extension__ extern __inline int8x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_s8 (const int8_t * __a) - { -- return __aarch64_vget_lane_any (__a, __b); -+ int8x8x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); -+ ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); -+ ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); -+ ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); -+ return ret; - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vduph_laneq_s16 (int16x8_t __a, const int __b) -+__extension__ extern __inline poly8x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_p8 (const poly8_t * __a) - { -- return __aarch64_vget_lane_any (__a, __b); -+ poly8x8x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); -+ ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); -+ ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); -+ ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); -+ return ret; - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) --vduph_laneq_u16 (uint16x8_t __a, const int __b) -+__extension__ extern __inline int16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_s16 (const int16_t * __a) - { -- return __aarch64_vget_lane_any (__a, __b); -+ int16x4x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); -+ ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); -+ ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); -+ ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); -+ return ret; - } - --/* vdups_laneq */ --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vdups_laneq_f32 (float32x4_t __a, const int __b) -+__extension__ extern __inline poly16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_p16 (const poly16_t * __a) - { -- return __aarch64_vget_lane_any (__a, __b); -+ poly16x4x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); -+ ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); -+ ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); -+ ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); -+ return ret; - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vdups_laneq_s32 (int32x4_t __a, const int __b) -+__extension__ extern __inline int32x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_s32 (const int32_t * __a) - { -- return __aarch64_vget_lane_any (__a, __b); -+ int32x2x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0); -+ ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1); -+ ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2); -+ ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3); -+ return ret; - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vdups_laneq_u32 (uint32x4_t __a, const int __b) -+__extension__ extern __inline uint8x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_u8 (const uint8_t * __a) - { -- return __aarch64_vget_lane_any (__a, __b); -+ uint8x8x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); -+ ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); -+ ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); -+ ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); -+ return ret; - } - --/* vdupd_laneq */ --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vdupd_laneq_f64 (float64x2_t __a, const int __b) -+__extension__ extern __inline uint16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_u16 (const uint16_t * __a) - { -- return __aarch64_vget_lane_any (__a, __b); -+ uint16x4x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); -+ ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); -+ ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); -+ ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); -+ return ret; - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vdupd_laneq_s64 (int64x2_t __a, const int __b) -+__extension__ extern __inline uint32x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_u32 (const uint32_t * __a) - { -- return __aarch64_vget_lane_any (__a, __b); -+ uint32x2x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0); -+ ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1); -+ ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2); -+ ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3); -+ return ret; - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vdupd_laneq_u64 (uint64x2_t __a, const int __b) -+__extension__ extern __inline float16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_f16 (const float16_t * __a) - { -- return __aarch64_vget_lane_any (__a, __b); -+ float16x4x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v4hf (__a); -+ ret.val[0] = __builtin_aarch64_get_dregxiv4hf (__o, 0); -+ ret.val[1] = __builtin_aarch64_get_dregxiv4hf (__o, 1); -+ ret.val[2] = __builtin_aarch64_get_dregxiv4hf (__o, 2); -+ ret.val[3] = __builtin_aarch64_get_dregxiv4hf (__o, 3); -+ return ret; - } - --/* vext */ -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c) -+__extension__ extern __inline float32x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_f32 (const float32_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c}); --#else -- return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1}); --#endif -+ float32x2x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a); -+ ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0); -+ ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1); -+ ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2); -+ ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3); -+ return ret; - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c) -+__extension__ extern __inline poly64x1x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_p64 (const poly64_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); -- /* The only possible index to the assembler instruction returns element 0. */ -- return __a; -+ poly64x1x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 0); -+ ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 1); -+ ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 2); -+ ret.val[3] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 3); -+ return ret; - } --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c) -+ -+__extension__ extern __inline int8x16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_s8 (const int8_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__b, __a, (uint8x8_t) -- {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); --#else -- return __builtin_shuffle (__a, __b, -- (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); --#endif -+ int8x16x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); -+ ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); -+ ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); -+ ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); -+ return ret; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c) -+__extension__ extern __inline poly8x16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_p8 (const poly8_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__b, __a, -- (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); --#else -- return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3}); --#endif -+ poly8x16x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); -+ ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); -+ ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); -+ ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); -+ return ret; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c) -+__extension__ extern __inline int16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_s16 (const int16_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__b, __a, (uint8x8_t) -- {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); --#else -- return __builtin_shuffle (__a, __b, -- (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); --#endif -+ int16x8x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); -+ ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); -+ ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); -+ ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); -+ return ret; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c) -+__extension__ extern __inline poly16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_p16 (const poly16_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__b, __a, -- (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); --#else -- return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3}); --#endif -+ poly16x8x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); -+ ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); -+ ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); -+ ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); -+ return ret; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c) -+__extension__ extern __inline int32x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_s32 (const int32_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c}); --#else -- return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1}); --#endif -+ int32x4x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); -+ ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); -+ ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); -+ ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); -+ return ret; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c) -+__extension__ extern __inline int64x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_s64 (const int64_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); -- /* The only possible index to the assembler instruction returns element 0. */ -- return __a; -+ int64x2x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0); -+ ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1); -+ ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2); -+ ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3); -+ return ret; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c) -+__extension__ extern __inline uint8x16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_u8 (const uint8_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__b, __a, (uint8x8_t) -- {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); --#else -- return __builtin_shuffle (__a, __b, -- (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); --#endif -+ uint8x16x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); -+ ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); -+ ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); -+ ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); -+ return ret; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c) -+__extension__ extern __inline uint16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_u16 (const uint16_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__b, __a, -- (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); --#else -- return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3}); --#endif -+ uint16x8x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); -+ ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); -+ ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); -+ ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); -+ return ret; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c) -+__extension__ extern __inline uint32x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_u32 (const uint32_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c}); --#else -- return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1}); --#endif -+ uint32x4x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); -+ ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); -+ ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); -+ ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); -+ return ret; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c) -+__extension__ extern __inline uint64x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_u64 (const uint64_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); -- /* The only possible index to the assembler instruction returns element 0. */ -- return __a; -+ uint64x2x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0); -+ ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1); -+ ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2); -+ ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3); -+ return ret; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c) -+__extension__ extern __inline float16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_f16 (const float16_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__b, __a, -- (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); --#else -- return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3}); --#endif -+ float16x8x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v8hf (__a); -+ ret.val[0] = __builtin_aarch64_get_qregxiv8hf (__o, 0); -+ ret.val[1] = __builtin_aarch64_get_qregxiv8hf (__o, 1); -+ ret.val[2] = __builtin_aarch64_get_qregxiv8hf (__o, 2); -+ ret.val[3] = __builtin_aarch64_get_qregxiv8hf (__o, 3); -+ return ret; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c) -+__extension__ extern __inline float32x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_f32 (const float32_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c}); --#else -- return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1}); --#endif -+ float32x4x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a); -+ ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0); -+ ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1); -+ ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2); -+ ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3); -+ return ret; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c) -+__extension__ extern __inline float64x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_f64 (const float64_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__b, __a, (uint8x16_t) -- {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c, -- 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c}); --#else -- return __builtin_shuffle (__a, __b, (uint8x16_t) -- {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, -- __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15}); --#endif -+ float64x2x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a); -+ ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0); -+ ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1); -+ ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2); -+ ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3); -+ return ret; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c) -+__extension__ extern __inline poly64x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_p64 (const poly64_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__b, __a, (uint16x8_t) -- {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); --#else -- return __builtin_shuffle (__a, __b, -- (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); --#endif -+ poly64x2x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 0); -+ ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 1); -+ ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 2); -+ ret.val[3] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 3); -+ return ret; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c) -+/* vldn_dup */ -+ -+__extension__ extern __inline int8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_dup_s8 (const int8_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__b, __a, (uint8x16_t) -- {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c, -- 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c}); --#else -- return __builtin_shuffle (__a, __b, (uint8x16_t) -- {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, -- __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15}); --#endif -+ int8x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); -+ ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); -+ return ret; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c) -+__extension__ extern __inline int16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_dup_s16 (const int16_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__b, __a, (uint16x8_t) -- {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); --#else -- return __builtin_shuffle (__a, __b, -- (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); --#endif -+ int16x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); -+ ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); -+ return ret; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c) -+__extension__ extern __inline int32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_dup_s32 (const int32_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__b, __a, -- (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); --#else -- return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3}); --#endif -+ int32x2x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0); -+ ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1); -+ return ret; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c) -+__extension__ extern __inline float16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_dup_f16 (const float16_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c}); --#else -- return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1}); --#endif -+ float16x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv4hf ((const __builtin_aarch64_simd_hf *) __a); -+ ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0); -+ ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 1); -+ return ret; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c) -+__extension__ extern __inline float32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_dup_f32 (const float32_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__b, __a, (uint8x16_t) -- {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c, -- 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c}); --#else -- return __builtin_shuffle (__a, __b, (uint8x16_t) -- {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, -- __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15}); --#endif -+ float32x2x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv2sf ((const __builtin_aarch64_simd_sf *) __a); -+ ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0); -+ ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1); -+ return ret; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c) -+__extension__ extern __inline float64x1x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_dup_f64 (const float64_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__b, __a, (uint16x8_t) -- {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); --#else -- return __builtin_shuffle (__a, __b, -- (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); --#endif -+ float64x1x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rdf ((const __builtin_aarch64_simd_df *) __a); -+ ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)}; -+ ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)}; -+ return ret; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c) -+__extension__ extern __inline uint8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_dup_u8 (const uint8_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__b, __a, -- (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); --#else -- return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3}); --#endif -+ uint8x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); -+ ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); -+ return ret; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c) -+__extension__ extern __inline uint16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_dup_u16 (const uint16_t * __a) - { -- __AARCH64_LANE_CHECK (__a, __c); --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c}); --#else -- return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1}); --#endif -+ uint16x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); -+ ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); -+ return ret; - } - --/* vfma */ -- --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vfma_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c) -+__extension__ extern __inline uint32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_dup_u32 (const uint32_t * __a) - { -- return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])}; -+ uint32x2x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0); -+ ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1); -+ return ret; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vfma_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) -+__extension__ extern __inline poly8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_dup_p8 (const poly8_t * __a) - { -- return __builtin_aarch64_fmav2sf (__b, __c, __a); -+ poly8x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); -+ ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); -+ return ret; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) -+__extension__ extern __inline poly16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_dup_p16 (const poly16_t * __a) - { -- return __builtin_aarch64_fmav4sf (__b, __c, __a); -+ poly16x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); -+ ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); -+ return ret; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vfmaq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c) -+__extension__ extern __inline poly64x1x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_dup_p64 (const poly64_t * __a) - { -- return __builtin_aarch64_fmav2df (__b, __c, __a); -+ poly64x1x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 0); -+ ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregoidi_pss (__o, 1); -+ return ret; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vfma_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) -+ -+__extension__ extern __inline int64x1x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_dup_s64 (const int64_t * __a) - { -- return __builtin_aarch64_fmav2sf (__b, vdup_n_f32 (__c), __a); -+ int64x1x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); -+ ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); -+ return ret; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vfmaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) -+__extension__ extern __inline uint64x1x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2_dup_u64 (const uint64_t * __a) - { -- return __builtin_aarch64_fmav4sf (__b, vdupq_n_f32 (__c), __a); -+ uint64x1x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); -+ ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); -+ return ret; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vfmaq_n_f64 (float64x2_t __a, float64x2_t __b, float64_t __c) -+__extension__ extern __inline int8x16x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_dup_s8 (const int8_t * __a) - { -- return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c), __a); -+ int8x16x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); -+ ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); -+ return ret; - } - --/* vfma_lane */ -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vfma_lane_f32 (float32x2_t __a, float32x2_t __b, -- float32x2_t __c, const int __lane) -+__extension__ extern __inline poly8x16x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_dup_p8 (const poly8_t * __a) - { -- return __builtin_aarch64_fmav2sf (__b, -- __aarch64_vdup_lane_f32 (__c, __lane), -- __a); -+ poly8x16x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); -+ ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); -+ return ret; - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vfma_lane_f64 (float64x1_t __a, float64x1_t __b, -- float64x1_t __c, const int __lane) -+__extension__ extern __inline int16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_dup_s16 (const int16_t * __a) - { -- return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])}; -+ int16x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); -+ ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); -+ return ret; - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vfmad_lane_f64 (float64_t __a, float64_t __b, -- float64x1_t __c, const int __lane) -+__extension__ extern __inline poly16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_dup_p16 (const poly16_t * __a) - { -- return __builtin_fma (__b, __c[0], __a); -+ poly16x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); -+ ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); -+ return ret; - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vfmas_lane_f32 (float32_t __a, float32_t __b, -- float32x2_t __c, const int __lane) -+__extension__ extern __inline int32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_dup_s32 (const int32_t * __a) - { -- return __builtin_fmaf (__b, __aarch64_vget_lane_any (__c, __lane), __a); -+ int32x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); -+ ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); -+ return ret; - } - --/* vfma_laneq */ -+__extension__ extern __inline int64x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_dup_s64 (const int64_t * __a) -+{ -+ int64x2x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); -+ ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); -+ return ret; -+} - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vfma_laneq_f32 (float32x2_t __a, float32x2_t __b, -- float32x4_t __c, const int __lane) -+__extension__ extern __inline uint8x16x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_dup_u8 (const uint8_t * __a) - { -- return __builtin_aarch64_fmav2sf (__b, -- __aarch64_vdup_laneq_f32 (__c, __lane), -- __a); -+ uint8x16x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); -+ ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); -+ return ret; - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vfma_laneq_f64 (float64x1_t __a, float64x1_t __b, -- float64x2_t __c, const int __lane) -+__extension__ extern __inline uint16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_dup_u16 (const uint16_t * __a) - { -- float64_t __c0 = __aarch64_vget_lane_any (__c, __lane); -- return (float64x1_t) {__builtin_fma (__b[0], __c0, __a[0])}; -+ uint16x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); -+ ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); -+ return ret; - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vfmad_laneq_f64 (float64_t __a, float64_t __b, -- float64x2_t __c, const int __lane) -+__extension__ extern __inline uint32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_dup_u32 (const uint32_t * __a) - { -- return __builtin_fma (__b, __aarch64_vget_lane_any (__c, __lane), __a); -+ uint32x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); -+ ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); -+ return ret; - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vfmas_laneq_f32 (float32_t __a, float32_t __b, -- float32x4_t __c, const int __lane) -+__extension__ extern __inline uint64x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_dup_u64 (const uint64_t * __a) - { -- return __builtin_fmaf (__b, __aarch64_vget_lane_any (__c, __lane), __a); -+ uint64x2x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); -+ ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); -+ return ret; - } - --/* vfmaq_lane */ -- --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vfmaq_lane_f32 (float32x4_t __a, float32x4_t __b, -- float32x2_t __c, const int __lane) -+__extension__ extern __inline float16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_dup_f16 (const float16_t * __a) - { -- return __builtin_aarch64_fmav4sf (__b, -- __aarch64_vdupq_lane_f32 (__c, __lane), -- __a); -+ float16x8x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv8hf ((const __builtin_aarch64_simd_hf *) __a); -+ ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 0); -+ ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1); -+ return ret; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vfmaq_lane_f64 (float64x2_t __a, float64x2_t __b, -- float64x1_t __c, const int __lane) -+__extension__ extern __inline float32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_dup_f32 (const float32_t * __a) - { -- return __builtin_aarch64_fmav2df (__b, vdupq_n_f64 (__c[0]), __a); -+ float32x4x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv4sf ((const __builtin_aarch64_simd_sf *) __a); -+ ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0); -+ ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1); -+ return ret; - } - --/* vfmaq_laneq */ -- --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vfmaq_laneq_f32 (float32x4_t __a, float32x4_t __b, -- float32x4_t __c, const int __lane) -+__extension__ extern __inline float64x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_dup_f64 (const float64_t * __a) - { -- return __builtin_aarch64_fmav4sf (__b, -- __aarch64_vdupq_laneq_f32 (__c, __lane), -- __a); -+ float64x2x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv2df ((const __builtin_aarch64_simd_df *) __a); -+ ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0); -+ ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1); -+ return ret; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b, -- float64x2_t __c, const int __lane) -+__extension__ extern __inline poly64x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld2q_dup_p64 (const poly64_t * __a) - { -- return __builtin_aarch64_fmav2df (__b, -- __aarch64_vdupq_laneq_f64 (__c, __lane), -- __a); -+ poly64x2x2_t ret; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 0); -+ ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregoiv2di_pss (__o, 1); -+ return ret; - } - --/* vfms */ -- --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vfms_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c) -+__extension__ extern __inline int64x1x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_dup_s64 (const int64_t * __a) - { -- return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])}; -+ int64x1x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0); -+ ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1); -+ ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2); -+ return ret; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vfms_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) -+__extension__ extern __inline uint64x1x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_dup_u64 (const uint64_t * __a) - { -- return __builtin_aarch64_fmav2sf (-__b, __c, __a); -+ uint64x1x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0); -+ ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1); -+ ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2); -+ return ret; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) -+__extension__ extern __inline float64x1x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_dup_f64 (const float64_t * __a) - { -- return __builtin_aarch64_fmav4sf (-__b, __c, __a); -+ float64x1x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rdf ((const __builtin_aarch64_simd_df *) __a); -+ ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)}; -+ ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)}; -+ ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)}; -+ return ret; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vfmsq_f64 (float64x2_t __a, float64x2_t __b, float64x2_t __c) -+__extension__ extern __inline int8x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_dup_s8 (const int8_t * __a) - { -- return __builtin_aarch64_fmav2df (-__b, __c, __a); -+ int8x8x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); -+ ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); -+ ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); -+ return ret; - } - -- --/* vfms_lane */ -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vfms_lane_f32 (float32x2_t __a, float32x2_t __b, -- float32x2_t __c, const int __lane) -+__extension__ extern __inline poly8x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_dup_p8 (const poly8_t * __a) - { -- return __builtin_aarch64_fmav2sf (-__b, -- __aarch64_vdup_lane_f32 (__c, __lane), -- __a); -+ poly8x8x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); -+ ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); -+ ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); -+ return ret; - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vfms_lane_f64 (float64x1_t __a, float64x1_t __b, -- float64x1_t __c, const int __lane) -+__extension__ extern __inline int16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_dup_s16 (const int16_t * __a) - { -- return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])}; -+ int16x4x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); -+ ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); -+ ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); -+ return ret; - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vfmsd_lane_f64 (float64_t __a, float64_t __b, -- float64x1_t __c, const int __lane) -+__extension__ extern __inline poly16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_dup_p16 (const poly16_t * __a) - { -- return __builtin_fma (-__b, __c[0], __a); -+ poly16x4x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); -+ ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); -+ ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); -+ return ret; - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vfmss_lane_f32 (float32_t __a, float32_t __b, -- float32x2_t __c, const int __lane) -+__extension__ extern __inline int32x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_dup_s32 (const int32_t * __a) - { -- return __builtin_fmaf (-__b, __aarch64_vget_lane_any (__c, __lane), __a); -+ int32x2x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0); -+ ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1); -+ ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2); -+ return ret; - } - --/* vfms_laneq */ -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vfms_laneq_f32 (float32x2_t __a, float32x2_t __b, -- float32x4_t __c, const int __lane) -+__extension__ extern __inline uint8x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_dup_u8 (const uint8_t * __a) - { -- return __builtin_aarch64_fmav2sf (-__b, -- __aarch64_vdup_laneq_f32 (__c, __lane), -- __a); -+ uint8x8x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); -+ ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); -+ ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); -+ return ret; - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vfms_laneq_f64 (float64x1_t __a, float64x1_t __b, -- float64x2_t __c, const int __lane) -+__extension__ extern __inline uint16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_dup_u16 (const uint16_t * __a) - { -- float64_t __c0 = __aarch64_vget_lane_any (__c, __lane); -- return (float64x1_t) {__builtin_fma (-__b[0], __c0, __a[0])}; -+ uint16x4x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); -+ ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); -+ ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); -+ return ret; - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vfmsd_laneq_f64 (float64_t __a, float64_t __b, -- float64x2_t __c, const int __lane) -+__extension__ extern __inline uint32x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_dup_u32 (const uint32_t * __a) - { -- return __builtin_fma (-__b, __aarch64_vget_lane_any (__c, __lane), __a); -+ uint32x2x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0); -+ ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1); -+ ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2); -+ return ret; - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vfmss_laneq_f32 (float32_t __a, float32_t __b, -- float32x4_t __c, const int __lane) -+__extension__ extern __inline float16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_dup_f16 (const float16_t * __a) - { -- return __builtin_fmaf (-__b, __aarch64_vget_lane_any (__c, __lane), __a); -+ float16x4x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv4hf ((const __builtin_aarch64_simd_hf *) __a); -+ ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 0); -+ ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 1); -+ ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 2); -+ return ret; - } - --/* vfmsq_lane */ -- --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vfmsq_lane_f32 (float32x4_t __a, float32x4_t __b, -- float32x2_t __c, const int __lane) -+__extension__ extern __inline float32x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_dup_f32 (const float32_t * __a) - { -- return __builtin_aarch64_fmav4sf (-__b, -- __aarch64_vdupq_lane_f32 (__c, __lane), -- __a); -+ float32x2x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv2sf ((const __builtin_aarch64_simd_sf *) __a); -+ ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0); -+ ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1); -+ ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2); -+ return ret; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vfmsq_lane_f64 (float64x2_t __a, float64x2_t __b, -- float64x1_t __c, const int __lane) -+__extension__ extern __inline poly64x1x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3_dup_p64 (const poly64_t * __a) - { -- return __builtin_aarch64_fmav2df (-__b, vdupq_n_f64 (__c[0]), __a); -+ poly64x1x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 0); -+ ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 1); -+ ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregcidi_pss (__o, 2); -+ return ret; - } - --/* vfmsq_laneq */ -- --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vfmsq_laneq_f32 (float32x4_t __a, float32x4_t __b, -- float32x4_t __c, const int __lane) -+__extension__ extern __inline int8x16x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_dup_s8 (const int8_t * __a) - { -- return __builtin_aarch64_fmav4sf (-__b, -- __aarch64_vdupq_laneq_f32 (__c, __lane), -- __a); -+ int8x16x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); -+ ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); -+ ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); -+ return ret; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vfmsq_laneq_f64 (float64x2_t __a, float64x2_t __b, -- float64x2_t __c, const int __lane) -+__extension__ extern __inline poly8x16x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_dup_p8 (const poly8_t * __a) - { -- return __builtin_aarch64_fmav2df (-__b, -- __aarch64_vdupq_laneq_f64 (__c, __lane), -- __a); -+ poly8x16x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); -+ ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); -+ ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); -+ return ret; - } - --/* vld1 */ -+__extension__ extern __inline int16x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_dup_s16 (const int16_t * __a) -+{ -+ int16x8x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); -+ ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); -+ ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); -+ return ret; -+} - --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) --vld1_f16 (const float16_t *__a) -+__extension__ extern __inline poly16x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_dup_p16 (const poly16_t * __a) - { -- return __builtin_aarch64_ld1v4hf (__a); -+ poly16x8x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); -+ ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); -+ ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); -+ return ret; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vld1_f32 (const float32_t *a) -+__extension__ extern __inline int32x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_dup_s32 (const int32_t * __a) - { -- return __builtin_aarch64_ld1v2sf ((const __builtin_aarch64_simd_sf *) a); -+ int32x4x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0); -+ ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1); -+ ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2); -+ return ret; - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vld1_f64 (const float64_t *a) -+__extension__ extern __inline int64x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_dup_s64 (const int64_t * __a) - { -- return (float64x1_t) {*a}; -+ int64x2x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); -+ ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); -+ ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); -+ return ret; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vld1_p8 (const poly8_t *a) -+__extension__ extern __inline uint8x16x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_dup_u8 (const uint8_t * __a) - { -- return (poly8x8_t) -- __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a); -+ uint8x16x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); -+ ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); -+ ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); -+ return ret; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vld1_p16 (const poly16_t *a) -+__extension__ extern __inline uint16x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_dup_u16 (const uint16_t * __a) - { -- return (poly16x4_t) -- __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a); -+ uint16x8x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); -+ ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); -+ ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); -+ return ret; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vld1_s8 (const int8_t *a) -+__extension__ extern __inline uint32x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_dup_u32 (const uint32_t * __a) - { -- return __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a); -+ uint32x4x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0); -+ ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1); -+ ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2); -+ return ret; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vld1_s16 (const int16_t *a) -+__extension__ extern __inline uint64x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_dup_u64 (const uint64_t * __a) - { -- return __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a); -+ uint64x2x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); -+ ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); -+ ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); -+ return ret; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vld1_s32 (const int32_t *a) -+__extension__ extern __inline float16x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_dup_f16 (const float16_t * __a) - { -- return __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a); -+ float16x8x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv8hf ((const __builtin_aarch64_simd_hf *) __a); -+ ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 0); -+ ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 1); -+ ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 2); -+ return ret; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vld1_s64 (const int64_t *a) -+__extension__ extern __inline float32x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_dup_f32 (const float32_t * __a) - { -- return (int64x1_t) {*a}; -+ float32x4x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv4sf ((const __builtin_aarch64_simd_sf *) __a); -+ ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0); -+ ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1); -+ ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2); -+ return ret; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vld1_u8 (const uint8_t *a) -+__extension__ extern __inline float64x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_dup_f64 (const float64_t * __a) - { -- return (uint8x8_t) -- __builtin_aarch64_ld1v8qi ((const __builtin_aarch64_simd_qi *) a); -+ float64x2x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv2df ((const __builtin_aarch64_simd_df *) __a); -+ ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0); -+ ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1); -+ ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2); -+ return ret; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vld1_u16 (const uint16_t *a) -+__extension__ extern __inline poly64x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld3q_dup_p64 (const poly64_t * __a) - { -- return (uint16x4_t) -- __builtin_aarch64_ld1v4hi ((const __builtin_aarch64_simd_hi *) a); -+ poly64x2x3_t ret; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 0); -+ ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 1); -+ ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregciv2di_pss (__o, 2); -+ return ret; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vld1_u32 (const uint32_t *a) -+__extension__ extern __inline int64x1x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_dup_s64 (const int64_t * __a) - { -- return (uint32x2_t) -- __builtin_aarch64_ld1v2si ((const __builtin_aarch64_simd_si *) a); -+ int64x1x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); -+ ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); -+ ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); -+ ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); -+ return ret; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vld1_u64 (const uint64_t *a) -+__extension__ extern __inline uint64x1x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_dup_u64 (const uint64_t * __a) - { -- return (uint64x1_t) {*a}; -+ uint64x1x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); -+ ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); -+ ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); -+ ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); -+ return ret; - } - --/* vld1q */ -+__extension__ extern __inline float64x1x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_dup_f64 (const float64_t * __a) -+{ -+ float64x1x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rdf ((const __builtin_aarch64_simd_df *) __a); -+ ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)}; -+ ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)}; -+ ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)}; -+ ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)}; -+ return ret; -+} - --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) --vld1q_f16 (const float16_t *__a) -+__extension__ extern __inline int8x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_dup_s8 (const int8_t * __a) - { -- return __builtin_aarch64_ld1v8hf (__a); -+ int8x8x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); -+ ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); -+ ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); -+ ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); -+ return ret; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vld1q_f32 (const float32_t *a) -+__extension__ extern __inline poly8x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_dup_p8 (const poly8_t * __a) - { -- return __builtin_aarch64_ld1v4sf ((const __builtin_aarch64_simd_sf *) a); -+ poly8x8x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); -+ ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); -+ ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); -+ ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); -+ return ret; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vld1q_f64 (const float64_t *a) -+__extension__ extern __inline int16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_dup_s16 (const int16_t * __a) - { -- return __builtin_aarch64_ld1v2df ((const __builtin_aarch64_simd_df *) a); -+ int16x4x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); -+ ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); -+ ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); -+ ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); -+ return ret; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vld1q_p8 (const poly8_t *a) -+__extension__ extern __inline poly16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_dup_p16 (const poly16_t * __a) - { -- return (poly8x16_t) -- __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a); -+ poly16x4x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); -+ ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); -+ ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); -+ ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); -+ return ret; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vld1q_p16 (const poly16_t *a) -+__extension__ extern __inline int32x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_dup_s32 (const int32_t * __a) - { -- return (poly16x8_t) -- __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a); -+ int32x2x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0); -+ ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1); -+ ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2); -+ ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3); -+ return ret; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vld1q_s8 (const int8_t *a) -+__extension__ extern __inline uint8x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_dup_u8 (const uint8_t * __a) - { -- return __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a); -+ uint8x8x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); -+ ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); -+ ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); -+ ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); -+ return ret; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vld1q_s16 (const int16_t *a) -+__extension__ extern __inline uint16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_dup_u16 (const uint16_t * __a) - { -- return __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a); -+ uint16x4x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); -+ ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); -+ ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); -+ ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); -+ return ret; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vld1q_s32 (const int32_t *a) -+__extension__ extern __inline uint32x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_dup_u32 (const uint32_t * __a) - { -- return __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a); -+ uint32x2x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0); -+ ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1); -+ ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2); -+ ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3); -+ return ret; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vld1q_s64 (const int64_t *a) -+__extension__ extern __inline float16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_dup_f16 (const float16_t * __a) - { -- return __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a); -+ float16x4x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv4hf ((const __builtin_aarch64_simd_hf *) __a); -+ ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 0); -+ ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 1); -+ ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 2); -+ ret.val[3] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 3); -+ return ret; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vld1q_u8 (const uint8_t *a) -+__extension__ extern __inline float32x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_dup_f32 (const float32_t * __a) - { -- return (uint8x16_t) -- __builtin_aarch64_ld1v16qi ((const __builtin_aarch64_simd_qi *) a); -+ float32x2x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv2sf ((const __builtin_aarch64_simd_sf *) __a); -+ ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0); -+ ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1); -+ ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2); -+ ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3); -+ return ret; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vld1q_u16 (const uint16_t *a) -+__extension__ extern __inline poly64x1x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4_dup_p64 (const poly64_t * __a) - { -- return (uint16x8_t) -- __builtin_aarch64_ld1v8hi ((const __builtin_aarch64_simd_hi *) a); -+ poly64x1x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 0); -+ ret.val[1] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 1); -+ ret.val[2] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 2); -+ ret.val[3] = (poly64x1_t) __builtin_aarch64_get_dregxidi_pss (__o, 3); -+ return ret; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vld1q_u32 (const uint32_t *a) -+__extension__ extern __inline int8x16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_dup_s8 (const int8_t * __a) - { -- return (uint32x4_t) -- __builtin_aarch64_ld1v4si ((const __builtin_aarch64_simd_si *) a); -+ int8x16x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); -+ ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); -+ ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); -+ ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); -+ return ret; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vld1q_u64 (const uint64_t *a) -+__extension__ extern __inline poly8x16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_dup_p8 (const poly8_t * __a) - { -- return (uint64x2_t) -- __builtin_aarch64_ld1v2di ((const __builtin_aarch64_simd_di *) a); -+ poly8x16x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); -+ ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); -+ ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); -+ ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); -+ return ret; - } - --/* vld1_dup */ -- --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) --vld1_dup_f16 (const float16_t* __a) -+__extension__ extern __inline int16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_dup_s16 (const int16_t * __a) - { -- float16_t __f = *__a; -- return (float16x4_t) { __f, __f, __f, __f }; -+ int16x8x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); -+ ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); -+ ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); -+ ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); -+ return ret; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vld1_dup_f32 (const float32_t* __a) -+__extension__ extern __inline poly16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_dup_p16 (const poly16_t * __a) - { -- return vdup_n_f32 (*__a); -+ poly16x8x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); -+ ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); -+ ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); -+ ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); -+ return ret; - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vld1_dup_f64 (const float64_t* __a) -+__extension__ extern __inline int32x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_dup_s32 (const int32_t * __a) - { -- return vdup_n_f64 (*__a); -+ int32x4x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); -+ ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); -+ ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); -+ ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); -+ return ret; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vld1_dup_p8 (const poly8_t* __a) -+__extension__ extern __inline int64x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_dup_s64 (const int64_t * __a) - { -- return vdup_n_p8 (*__a); -+ int64x2x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0); -+ ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1); -+ ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2); -+ ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3); -+ return ret; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vld1_dup_p16 (const poly16_t* __a) -+__extension__ extern __inline uint8x16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_dup_u8 (const uint8_t * __a) - { -- return vdup_n_p16 (*__a); -+ uint8x16x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a); -+ ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); -+ ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); -+ ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); -+ ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); -+ return ret; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vld1_dup_s8 (const int8_t* __a) -+__extension__ extern __inline uint16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_dup_u16 (const uint16_t * __a) - { -- return vdup_n_s8 (*__a); -+ uint16x8x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a); -+ ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); -+ ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); -+ ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); -+ ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); -+ return ret; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vld1_dup_s16 (const int16_t* __a) -+__extension__ extern __inline uint32x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_dup_u32 (const uint32_t * __a) - { -- return vdup_n_s16 (*__a); -+ uint32x4x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a); -+ ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); -+ ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); -+ ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); -+ ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); -+ return ret; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vld1_dup_s32 (const int32_t* __a) -+__extension__ extern __inline uint64x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_dup_u64 (const uint64_t * __a) - { -- return vdup_n_s32 (*__a); -+ uint64x2x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0); -+ ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1); -+ ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2); -+ ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3); -+ return ret; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vld1_dup_s64 (const int64_t* __a) -+__extension__ extern __inline float16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_dup_f16 (const float16_t * __a) - { -- return vdup_n_s64 (*__a); -+ float16x8x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv8hf ((const __builtin_aarch64_simd_hf *) __a); -+ ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 0); -+ ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 1); -+ ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 2); -+ ret.val[3] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 3); -+ return ret; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vld1_dup_u8 (const uint8_t* __a) -+__extension__ extern __inline float32x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_dup_f32 (const float32_t * __a) - { -- return vdup_n_u8 (*__a); -+ float32x4x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv4sf ((const __builtin_aarch64_simd_sf *) __a); -+ ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0); -+ ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1); -+ ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2); -+ ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3); -+ return ret; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vld1_dup_u16 (const uint16_t* __a) -+__extension__ extern __inline float64x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_dup_f64 (const float64_t * __a) - { -- return vdup_n_u16 (*__a); -+ float64x2x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv2df ((const __builtin_aarch64_simd_df *) __a); -+ ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0); -+ ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1); -+ ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2); -+ ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3); -+ return ret; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vld1_dup_u32 (const uint32_t* __a) -+__extension__ extern __inline poly64x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vld4q_dup_p64 (const poly64_t * __a) - { -- return vdup_n_u32 (*__a); -+ poly64x2x4_t ret; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a); -+ ret.val[0] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 0); -+ ret.val[1] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 1); -+ ret.val[2] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 2); -+ ret.val[3] = (poly64x2_t) __builtin_aarch64_get_qregxiv2di_pss (__o, 3); -+ return ret; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vld1_dup_u64 (const uint64_t* __a) --{ -- return vdup_n_u64 (*__a); -+/* vld2_lane */ -+ -+#define __LD2_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, \ -+ qmode, ptrmode, funcsuffix, signedtype) \ -+__extension__ extern __inline intype \ -+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \ -+vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ -+{ \ -+ __builtin_aarch64_simd_oi __o; \ -+ largetype __temp; \ -+ __temp.val[0] = \ -+ vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \ -+ __temp.val[1] = \ -+ vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \ -+ __o = __builtin_aarch64_set_qregoi##qmode (__o, \ -+ (signedtype) __temp.val[0], \ -+ 0); \ -+ __o = __builtin_aarch64_set_qregoi##qmode (__o, \ -+ (signedtype) __temp.val[1], \ -+ 1); \ -+ __o = __builtin_aarch64_ld2_lane##mode ( \ -+ (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \ -+ __b.val[0] = (vectype) __builtin_aarch64_get_dregoidi (__o, 0); \ -+ __b.val[1] = (vectype) __builtin_aarch64_get_dregoidi (__o, 1); \ -+ return __b; \ - } - --/* vld1q_dup */ -+__LD2_LANE_FUNC (float16x4x2_t, float16x4_t, float16x8x2_t, float16_t, v4hf, -+ v8hf, hf, f16, float16x8_t) -+__LD2_LANE_FUNC (float32x2x2_t, float32x2_t, float32x4x2_t, float32_t, v2sf, v4sf, -+ sf, f32, float32x4_t) -+__LD2_LANE_FUNC (float64x1x2_t, float64x1_t, float64x2x2_t, float64_t, df, v2df, -+ df, f64, float64x2_t) -+__LD2_LANE_FUNC (poly8x8x2_t, poly8x8_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8, -+ int8x16_t) -+__LD2_LANE_FUNC (poly16x4x2_t, poly16x4_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi, -+ p16, int16x8_t) -+__LD2_LANE_FUNC (poly64x1x2_t, poly64x1_t, poly64x2x2_t, poly64_t, di, -+ v2di_ssps, di, p64, poly64x2_t) -+__LD2_LANE_FUNC (int8x8x2_t, int8x8_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8, -+ int8x16_t) -+__LD2_LANE_FUNC (int16x4x2_t, int16x4_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16, -+ int16x8_t) -+__LD2_LANE_FUNC (int32x2x2_t, int32x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32, -+ int32x4_t) -+__LD2_LANE_FUNC (int64x1x2_t, int64x1_t, int64x2x2_t, int64_t, di, v2di, di, s64, -+ int64x2_t) -+__LD2_LANE_FUNC (uint8x8x2_t, uint8x8_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8, -+ int8x16_t) -+__LD2_LANE_FUNC (uint16x4x2_t, uint16x4_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi, -+ u16, int16x8_t) -+__LD2_LANE_FUNC (uint32x2x2_t, uint32x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si, -+ u32, int32x4_t) -+__LD2_LANE_FUNC (uint64x1x2_t, uint64x1_t, uint64x2x2_t, uint64_t, di, v2di, di, -+ u64, int64x2_t) - --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) --vld1q_dup_f16 (const float16_t* __a) --{ -- float16_t __f = *__a; -- return (float16x8_t) { __f, __f, __f, __f, __f, __f, __f, __f }; -+#undef __LD2_LANE_FUNC -+ -+/* vld2q_lane */ -+ -+#define __LD2_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \ -+__extension__ extern __inline intype \ -+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \ -+vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ -+{ \ -+ __builtin_aarch64_simd_oi __o; \ -+ intype ret; \ -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[0], 0); \ -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[1], 1); \ -+ __o = __builtin_aarch64_ld2_lane##mode ( \ -+ (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \ -+ ret.val[0] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 0); \ -+ ret.val[1] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 1); \ -+ return ret; \ - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vld1q_dup_f32 (const float32_t* __a) --{ -- return vdupq_n_f32 (*__a); -+__LD2_LANE_FUNC (float16x8x2_t, float16x8_t, float16_t, v8hf, hf, f16) -+__LD2_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32) -+__LD2_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64) -+__LD2_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8) -+__LD2_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16) -+__LD2_LANE_FUNC (poly64x2x2_t, poly64x2_t, poly64_t, v2di, di, p64) -+__LD2_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8) -+__LD2_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16) -+__LD2_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32) -+__LD2_LANE_FUNC (int64x2x2_t, int64x2_t, int64_t, v2di, di, s64) -+__LD2_LANE_FUNC (uint8x16x2_t, uint8x16_t, uint8_t, v16qi, qi, u8) -+__LD2_LANE_FUNC (uint16x8x2_t, uint16x8_t, uint16_t, v8hi, hi, u16) -+__LD2_LANE_FUNC (uint32x4x2_t, uint32x4_t, uint32_t, v4si, si, u32) -+__LD2_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64) -+ -+#undef __LD2_LANE_FUNC -+ -+/* vld3_lane */ -+ -+#define __LD3_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, \ -+ qmode, ptrmode, funcsuffix, signedtype) \ -+__extension__ extern __inline intype \ -+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \ -+vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ -+{ \ -+ __builtin_aarch64_simd_ci __o; \ -+ largetype __temp; \ -+ __temp.val[0] = \ -+ vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \ -+ __temp.val[1] = \ -+ vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \ -+ __temp.val[2] = \ -+ vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); \ -+ __o = __builtin_aarch64_set_qregci##qmode (__o, \ -+ (signedtype) __temp.val[0], \ -+ 0); \ -+ __o = __builtin_aarch64_set_qregci##qmode (__o, \ -+ (signedtype) __temp.val[1], \ -+ 1); \ -+ __o = __builtin_aarch64_set_qregci##qmode (__o, \ -+ (signedtype) __temp.val[2], \ -+ 2); \ -+ __o = __builtin_aarch64_ld3_lane##mode ( \ -+ (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \ -+ __b.val[0] = (vectype) __builtin_aarch64_get_dregcidi (__o, 0); \ -+ __b.val[1] = (vectype) __builtin_aarch64_get_dregcidi (__o, 1); \ -+ __b.val[2] = (vectype) __builtin_aarch64_get_dregcidi (__o, 2); \ -+ return __b; \ - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vld1q_dup_f64 (const float64_t* __a) --{ -- return vdupq_n_f64 (*__a); --} -+__LD3_LANE_FUNC (float16x4x3_t, float16x4_t, float16x8x3_t, float16_t, v4hf, -+ v8hf, hf, f16, float16x8_t) -+__LD3_LANE_FUNC (float32x2x3_t, float32x2_t, float32x4x3_t, float32_t, v2sf, v4sf, -+ sf, f32, float32x4_t) -+__LD3_LANE_FUNC (float64x1x3_t, float64x1_t, float64x2x3_t, float64_t, df, v2df, -+ df, f64, float64x2_t) -+__LD3_LANE_FUNC (poly8x8x3_t, poly8x8_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8, -+ int8x16_t) -+__LD3_LANE_FUNC (poly16x4x3_t, poly16x4_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi, -+ p16, int16x8_t) -+__LD3_LANE_FUNC (poly64x1x3_t, poly64x1_t, poly64x2x3_t, poly64_t, di, -+ v2di_ssps, di, p64, poly64x2_t) -+__LD3_LANE_FUNC (int8x8x3_t, int8x8_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8, -+ int8x16_t) -+__LD3_LANE_FUNC (int16x4x3_t, int16x4_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16, -+ int16x8_t) -+__LD3_LANE_FUNC (int32x2x3_t, int32x2_t, int32x4x3_t, int32_t, v2si, v4si, si, s32, -+ int32x4_t) -+__LD3_LANE_FUNC (int64x1x3_t, int64x1_t, int64x2x3_t, int64_t, di, v2di, di, s64, -+ int64x2_t) -+__LD3_LANE_FUNC (uint8x8x3_t, uint8x8_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8, -+ int8x16_t) -+__LD3_LANE_FUNC (uint16x4x3_t, uint16x4_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi, -+ u16, int16x8_t) -+__LD3_LANE_FUNC (uint32x2x3_t, uint32x2_t, uint32x4x3_t, uint32_t, v2si, v4si, si, -+ u32, int32x4_t) -+__LD3_LANE_FUNC (uint64x1x3_t, uint64x1_t, uint64x2x3_t, uint64_t, di, v2di, di, -+ u64, int64x2_t) - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vld1q_dup_p8 (const poly8_t* __a) --{ -- return vdupq_n_p8 (*__a); --} -+#undef __LD3_LANE_FUNC - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vld1q_dup_p16 (const poly16_t* __a) --{ -- return vdupq_n_p16 (*__a); --} -+/* vld3q_lane */ - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vld1q_dup_s8 (const int8_t* __a) --{ -- return vdupq_n_s8 (*__a); -+#define __LD3_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \ -+__extension__ extern __inline intype \ -+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \ -+vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ -+{ \ -+ __builtin_aarch64_simd_ci __o; \ -+ intype ret; \ -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0); \ -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1); \ -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2); \ -+ __o = __builtin_aarch64_ld3_lane##mode ( \ -+ (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \ -+ ret.val[0] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 0); \ -+ ret.val[1] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 1); \ -+ ret.val[2] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 2); \ -+ return ret; \ - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vld1q_dup_s16 (const int16_t* __a) --{ -- return vdupq_n_s16 (*__a); --} -+__LD3_LANE_FUNC (float16x8x3_t, float16x8_t, float16_t, v8hf, hf, f16) -+__LD3_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32) -+__LD3_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64) -+__LD3_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8) -+__LD3_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16) -+__LD3_LANE_FUNC (poly64x2x3_t, poly64x2_t, poly64_t, v2di, di, p64) -+__LD3_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8) -+__LD3_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16) -+__LD3_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32) -+__LD3_LANE_FUNC (int64x2x3_t, int64x2_t, int64_t, v2di, di, s64) -+__LD3_LANE_FUNC (uint8x16x3_t, uint8x16_t, uint8_t, v16qi, qi, u8) -+__LD3_LANE_FUNC (uint16x8x3_t, uint16x8_t, uint16_t, v8hi, hi, u16) -+__LD3_LANE_FUNC (uint32x4x3_t, uint32x4_t, uint32_t, v4si, si, u32) -+__LD3_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64) - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vld1q_dup_s32 (const int32_t* __a) --{ -- return vdupq_n_s32 (*__a); --} -+#undef __LD3_LANE_FUNC - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vld1q_dup_s64 (const int64_t* __a) --{ -- return vdupq_n_s64 (*__a); --} -+/* vld4_lane */ - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vld1q_dup_u8 (const uint8_t* __a) --{ -- return vdupq_n_u8 (*__a); -+#define __LD4_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, \ -+ qmode, ptrmode, funcsuffix, signedtype) \ -+__extension__ extern __inline intype \ -+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \ -+vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ -+{ \ -+ __builtin_aarch64_simd_xi __o; \ -+ largetype __temp; \ -+ __temp.val[0] = \ -+ vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \ -+ __temp.val[1] = \ -+ vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \ -+ __temp.val[2] = \ -+ vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); \ -+ __temp.val[3] = \ -+ vcombine_##funcsuffix (__b.val[3], vcreate_##funcsuffix (0)); \ -+ __o = __builtin_aarch64_set_qregxi##qmode (__o, \ -+ (signedtype) __temp.val[0], \ -+ 0); \ -+ __o = __builtin_aarch64_set_qregxi##qmode (__o, \ -+ (signedtype) __temp.val[1], \ -+ 1); \ -+ __o = __builtin_aarch64_set_qregxi##qmode (__o, \ -+ (signedtype) __temp.val[2], \ -+ 2); \ -+ __o = __builtin_aarch64_set_qregxi##qmode (__o, \ -+ (signedtype) __temp.val[3], \ -+ 3); \ -+ __o = __builtin_aarch64_ld4_lane##mode ( \ -+ (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \ -+ __b.val[0] = (vectype) __builtin_aarch64_get_dregxidi (__o, 0); \ -+ __b.val[1] = (vectype) __builtin_aarch64_get_dregxidi (__o, 1); \ -+ __b.val[2] = (vectype) __builtin_aarch64_get_dregxidi (__o, 2); \ -+ __b.val[3] = (vectype) __builtin_aarch64_get_dregxidi (__o, 3); \ -+ return __b; \ - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vld1q_dup_u16 (const uint16_t* __a) --{ -- return vdupq_n_u16 (*__a); --} -+/* vld4q_lane */ - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vld1q_dup_u32 (const uint32_t* __a) --{ -- return vdupq_n_u32 (*__a); --} -+__LD4_LANE_FUNC (float16x4x4_t, float16x4_t, float16x8x4_t, float16_t, v4hf, -+ v8hf, hf, f16, float16x8_t) -+__LD4_LANE_FUNC (float32x2x4_t, float32x2_t, float32x4x4_t, float32_t, v2sf, v4sf, -+ sf, f32, float32x4_t) -+__LD4_LANE_FUNC (float64x1x4_t, float64x1_t, float64x2x4_t, float64_t, df, v2df, -+ df, f64, float64x2_t) -+__LD4_LANE_FUNC (poly8x8x4_t, poly8x8_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8, -+ int8x16_t) -+__LD4_LANE_FUNC (poly16x4x4_t, poly16x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi, -+ p16, int16x8_t) -+__LD4_LANE_FUNC (poly64x1x4_t, poly64x1_t, poly64x2x4_t, poly64_t, di, -+ v2di_ssps, di, p64, poly64x2_t) -+__LD4_LANE_FUNC (int8x8x4_t, int8x8_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8, -+ int8x16_t) -+__LD4_LANE_FUNC (int16x4x4_t, int16x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16, -+ int16x8_t) -+__LD4_LANE_FUNC (int32x2x4_t, int32x2_t, int32x4x4_t, int32_t, v2si, v4si, si, s32, -+ int32x4_t) -+__LD4_LANE_FUNC (int64x1x4_t, int64x1_t, int64x2x4_t, int64_t, di, v2di, di, s64, -+ int64x2_t) -+__LD4_LANE_FUNC (uint8x8x4_t, uint8x8_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8, -+ int8x16_t) -+__LD4_LANE_FUNC (uint16x4x4_t, uint16x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi, -+ u16, int16x8_t) -+__LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v2si, v4si, si, -+ u32, int32x4_t) -+__LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, di, v2di, di, -+ u64, int64x2_t) - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vld1q_dup_u64 (const uint64_t* __a) --{ -- return vdupq_n_u64 (*__a); --} -+#undef __LD4_LANE_FUNC - --/* vld1_lane */ -+/* vld4q_lane */ - --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) --vld1_lane_f16 (const float16_t *__src, float16x4_t __vec, const int __lane) --{ -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+#define __LD4_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \ -+__extension__ extern __inline intype \ -+__attribute__ ((__always_inline__, __gnu_inline__,__artificial__)) \ -+vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ -+{ \ -+ __builtin_aarch64_simd_xi __o; \ -+ intype ret; \ -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); \ -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); \ -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); \ -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); \ -+ __o = __builtin_aarch64_ld4_lane##mode ( \ -+ (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \ -+ ret.val[0] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 0); \ -+ ret.val[1] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 1); \ -+ ret.val[2] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 2); \ -+ ret.val[3] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 3); \ -+ return ret; \ - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vld1_lane_f32 (const float32_t *__src, float32x2_t __vec, const int __lane) --{ -- return __aarch64_vset_lane_any (*__src, __vec, __lane); --} -+__LD4_LANE_FUNC (float16x8x4_t, float16x8_t, float16_t, v8hf, hf, f16) -+__LD4_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32) -+__LD4_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64) -+__LD4_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8) -+__LD4_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16) -+__LD4_LANE_FUNC (poly64x2x4_t, poly64x2_t, poly64_t, v2di, di, p64) -+__LD4_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8) -+__LD4_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16) -+__LD4_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32) -+__LD4_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64) -+__LD4_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8) -+__LD4_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16) -+__LD4_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32) -+__LD4_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64) - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vld1_lane_f64 (const float64_t *__src, float64x1_t __vec, const int __lane) --{ -- return __aarch64_vset_lane_any (*__src, __vec, __lane); --} -+#undef __LD4_LANE_FUNC - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vld1_lane_p8 (const poly8_t *__src, poly8x8_t __vec, const int __lane) --{ -- return __aarch64_vset_lane_any (*__src, __vec, __lane); --} -+/* vmax */ - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vld1_lane_p16 (const poly16_t *__src, poly16x4_t __vec, const int __lane) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmax_f32 (float32x2_t __a, float32x2_t __b) - { -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+ return __builtin_aarch64_smax_nanv2sf (__a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vld1_lane_s8 (const int8_t *__src, int8x8_t __vec, const int __lane) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmax_f64 (float64x1_t __a, float64x1_t __b) - { -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+ return (float64x1_t) -+ { __builtin_aarch64_smax_nandf (vget_lane_f64 (__a, 0), -+ vget_lane_f64 (__b, 0)) }; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vld1_lane_s16 (const int16_t *__src, int16x4_t __vec, const int __lane) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmax_s8 (int8x8_t __a, int8x8_t __b) - { -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+ return __builtin_aarch64_smaxv8qi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vld1_lane_s32 (const int32_t *__src, int32x2_t __vec, const int __lane) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmax_s16 (int16x4_t __a, int16x4_t __b) - { -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+ return __builtin_aarch64_smaxv4hi (__a, __b); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vld1_lane_s64 (const int64_t *__src, int64x1_t __vec, const int __lane) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmax_s32 (int32x2_t __a, int32x2_t __b) - { -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+ return __builtin_aarch64_smaxv2si (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vld1_lane_u8 (const uint8_t *__src, uint8x8_t __vec, const int __lane) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmax_u8 (uint8x8_t __a, uint8x8_t __b) - { -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+ return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a, -+ (int8x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vld1_lane_u16 (const uint16_t *__src, uint16x4_t __vec, const int __lane) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmax_u16 (uint16x4_t __a, uint16x4_t __b) - { -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+ return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a, -+ (int16x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vld1_lane_u32 (const uint32_t *__src, uint32x2_t __vec, const int __lane) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmax_u32 (uint32x2_t __a, uint32x2_t __b) - { -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+ return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a, -+ (int32x2_t) __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vld1_lane_u64 (const uint64_t *__src, uint64x1_t __vec, const int __lane) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxq_f32 (float32x4_t __a, float32x4_t __b) - { -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+ return __builtin_aarch64_smax_nanv4sf (__a, __b); - } - --/* vld1q_lane */ -- --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) --vld1q_lane_f16 (const float16_t *__src, float16x8_t __vec, const int __lane) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxq_f64 (float64x2_t __a, float64x2_t __b) - { -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+ return __builtin_aarch64_smax_nanv2df (__a, __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vld1q_lane_f32 (const float32_t *__src, float32x4_t __vec, const int __lane) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxq_s8 (int8x16_t __a, int8x16_t __b) - { -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+ return __builtin_aarch64_smaxv16qi (__a, __b); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vld1q_lane_f64 (const float64_t *__src, float64x2_t __vec, const int __lane) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxq_s16 (int16x8_t __a, int16x8_t __b) - { -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+ return __builtin_aarch64_smaxv8hi (__a, __b); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vld1q_lane_p8 (const poly8_t *__src, poly8x16_t __vec, const int __lane) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxq_s32 (int32x4_t __a, int32x4_t __b) - { -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+ return __builtin_aarch64_smaxv4si (__a, __b); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vld1q_lane_p16 (const poly16_t *__src, poly16x8_t __vec, const int __lane) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+ return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a, -+ (int8x16_t) __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vld1q_lane_s8 (const int8_t *__src, int8x16_t __vec, const int __lane) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+ return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a, -+ (int16x8_t) __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vld1q_lane_s16 (const int16_t *__src, int16x8_t __vec, const int __lane) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+ return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a, -+ (int32x4_t) __b); - } -+/* vmulx */ - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vld1q_lane_s32 (const int32_t *__src, int32x4_t __vec, const int __lane) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulx_f32 (float32x2_t __a, float32x2_t __b) - { -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+ return __builtin_aarch64_fmulxv2sf (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vld1q_lane_s64 (const int64_t *__src, int64x2_t __vec, const int __lane) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulxq_f32 (float32x4_t __a, float32x4_t __b) - { -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+ return __builtin_aarch64_fmulxv4sf (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vld1q_lane_u8 (const uint8_t *__src, uint8x16_t __vec, const int __lane) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulx_f64 (float64x1_t __a, float64x1_t __b) - { -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+ return (float64x1_t) {__builtin_aarch64_fmulxdf (__a[0], __b[0])}; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vld1q_lane_u16 (const uint16_t *__src, uint16x8_t __vec, const int __lane) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulxq_f64 (float64x2_t __a, float64x2_t __b) - { -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+ return __builtin_aarch64_fmulxv2df (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vld1q_lane_u32 (const uint32_t *__src, uint32x4_t __vec, const int __lane) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulxs_f32 (float32_t __a, float32_t __b) - { -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+ return __builtin_aarch64_fmulxsf (__a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vld1q_lane_u64 (const uint64_t *__src, uint64x2_t __vec, const int __lane) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulxd_f64 (float64_t __a, float64_t __b) - { -- return __aarch64_vset_lane_any (*__src, __vec, __lane); -+ return __builtin_aarch64_fmulxdf (__a, __b); - } - --/* vldn */ -- --__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__)) --vld2_s64 (const int64_t * __a) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulx_lane_f32 (float32x2_t __a, float32x2_t __v, const int __lane) - { -- int64x1x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); -- ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); -- return ret; -+ return vmulx_f32 (__a, __aarch64_vdup_lane_f32 (__v, __lane)); - } - --__extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__)) --vld2_u64 (const uint64_t * __a) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulx_lane_f64 (float64x1_t __a, float64x1_t __v, const int __lane) - { -- uint64x1x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2di ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); -- ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); -- return ret; -+ return vmulx_f64 (__a, __aarch64_vdup_lane_f64 (__v, __lane)); - } - --__extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__)) --vld2_f64 (const float64_t * __a) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulxq_lane_f32 (float32x4_t __a, float32x2_t __v, const int __lane) - { -- float64x1x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2df ((const __builtin_aarch64_simd_df *) __a); -- ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)}; -- ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)}; -- return ret; -+ return vmulxq_f32 (__a, __aarch64_vdupq_lane_f32 (__v, __lane)); - } - --__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) --vld2_s8 (const int8_t * __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulxq_lane_f64 (float64x2_t __a, float64x1_t __v, const int __lane) - { -- int8x8x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); -- ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); -- return ret; -+ return vmulxq_f64 (__a, __aarch64_vdupq_lane_f64 (__v, __lane)); - } - --__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) --vld2_p8 (const poly8_t * __a) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulx_laneq_f32 (float32x2_t __a, float32x4_t __v, const int __lane) - { -- poly8x8x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); -- ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); -- return ret; -+ return vmulx_f32 (__a, __aarch64_vdup_laneq_f32 (__v, __lane)); - } - --__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) --vld2_s16 (const int16_t * __a) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulx_laneq_f64 (float64x1_t __a, float64x2_t __v, const int __lane) - { -- int16x4x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); -- ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); -- return ret; -+ return vmulx_f64 (__a, __aarch64_vdup_laneq_f64 (__v, __lane)); - } - --__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) --vld2_p16 (const poly16_t * __a) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulxq_laneq_f32 (float32x4_t __a, float32x4_t __v, const int __lane) - { -- poly16x4x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); -- ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); -- return ret; -+ return vmulxq_f32 (__a, __aarch64_vdupq_laneq_f32 (__v, __lane)); - } - --__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) --vld2_s32 (const int32_t * __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulxq_laneq_f64 (float64x2_t __a, float64x2_t __v, const int __lane) - { -- int32x2x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0); -- ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1); -- return ret; -+ return vmulxq_f64 (__a, __aarch64_vdupq_laneq_f64 (__v, __lane)); - } - --__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) --vld2_u8 (const uint8_t * __a) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulxs_lane_f32 (float32_t __a, float32x2_t __v, const int __lane) - { -- uint8x8x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v8qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); -- ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); -- return ret; -+ return vmulxs_f32 (__a, __aarch64_vget_lane_any (__v, __lane)); - } - --__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) --vld2_u16 (const uint16_t * __a) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulxs_laneq_f32 (float32_t __a, float32x4_t __v, const int __lane) - { -- uint16x4x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v4hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); -- ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); -- return ret; -+ return vmulxs_f32 (__a, __aarch64_vget_lane_any (__v, __lane)); - } - --__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) --vld2_u32 (const uint32_t * __a) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulxd_lane_f64 (float64_t __a, float64x1_t __v, const int __lane) - { -- uint32x2x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v2si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0); -- ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1); -- return ret; -+ return vmulxd_f64 (__a, __aarch64_vget_lane_any (__v, __lane)); - } - --__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__)) --vld2_f16 (const float16_t * __a) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulxd_laneq_f64 (float64_t __a, float64x2_t __v, const int __lane) - { -- float16x4x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v4hf (__a); -- ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0); -- ret.val[1] = __builtin_aarch64_get_dregoiv4hf (__o, 1); -- return ret; -+ return vmulxd_f64 (__a, __aarch64_vget_lane_any (__v, __lane)); - } - --__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) --vld2_f32 (const float32_t * __a) -+/* vpmax */ -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmax_s8 (int8x8_t a, int8x8_t b) - { -- float32x2x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v2sf ((const __builtin_aarch64_simd_sf *) __a); -- ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0); -- ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1); -- return ret; -+ return __builtin_aarch64_smaxpv8qi (a, b); - } - --__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) --vld2q_s8 (const int8_t * __a) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmax_s16 (int16x4_t a, int16x4_t b) - { -- int8x16x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); -- ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); -- return ret; -+ return __builtin_aarch64_smaxpv4hi (a, b); - } - --__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) --vld2q_p8 (const poly8_t * __a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmax_s32 (int32x2_t a, int32x2_t b) - { -- poly8x16x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); -- ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); -- return ret; -+ return __builtin_aarch64_smaxpv2si (a, b); - } - --__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) --vld2q_s16 (const int16_t * __a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmax_u8 (uint8x8_t a, uint8x8_t b) - { -- int16x8x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); -- ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); -- return ret; -+ return (uint8x8_t) __builtin_aarch64_umaxpv8qi ((int8x8_t) a, -+ (int8x8_t) b); - } - --__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) --vld2q_p16 (const poly16_t * __a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmax_u16 (uint16x4_t a, uint16x4_t b) - { -- poly16x8x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); -- ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); -- return ret; -+ return (uint16x4_t) __builtin_aarch64_umaxpv4hi ((int16x4_t) a, -+ (int16x4_t) b); - } - --__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) --vld2q_s32 (const int32_t * __a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmax_u32 (uint32x2_t a, uint32x2_t b) - { -- int32x4x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); -- ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); -- return ret; -+ return (uint32x2_t) __builtin_aarch64_umaxpv2si ((int32x2_t) a, -+ (int32x2_t) b); - } - --__extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__)) --vld2q_s64 (const int64_t * __a) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmaxq_s8 (int8x16_t a, int8x16_t b) - { -- int64x2x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); -- ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); -- return ret; -+ return __builtin_aarch64_smaxpv16qi (a, b); - } - --__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) --vld2q_u8 (const uint8_t * __a) --{ -- uint8x16x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); -- ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); -- return ret; -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmaxq_s16 (int16x8_t a, int16x8_t b) -+{ -+ return __builtin_aarch64_smaxpv8hi (a, b); - } - --__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) --vld2q_u16 (const uint16_t * __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmaxq_s32 (int32x4_t a, int32x4_t b) - { -- uint16x8x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); -- ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); -- return ret; -+ return __builtin_aarch64_smaxpv4si (a, b); - } - --__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) --vld2q_u32 (const uint32_t * __a) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmaxq_u8 (uint8x16_t a, uint8x16_t b) - { -- uint32x4x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); -- ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); -- return ret; -+ return (uint8x16_t) __builtin_aarch64_umaxpv16qi ((int8x16_t) a, -+ (int8x16_t) b); - } - --__extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__)) --vld2q_u64 (const uint64_t * __a) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmaxq_u16 (uint16x8_t a, uint16x8_t b) - { -- uint64x2x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); -- ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); -- return ret; -+ return (uint16x8_t) __builtin_aarch64_umaxpv8hi ((int16x8_t) a, -+ (int16x8_t) b); - } - --__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__)) --vld2q_f16 (const float16_t * __a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmaxq_u32 (uint32x4_t a, uint32x4_t b) - { -- float16x8x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v8hf (__a); -- ret.val[0] = __builtin_aarch64_get_qregoiv8hf (__o, 0); -- ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1); -- return ret; -+ return (uint32x4_t) __builtin_aarch64_umaxpv4si ((int32x4_t) a, -+ (int32x4_t) b); - } - --__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) --vld2q_f32 (const float32_t * __a) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmax_f32 (float32x2_t a, float32x2_t b) - { -- float32x4x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a); -- ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0); -- ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1); -- return ret; -+ return __builtin_aarch64_smax_nanpv2sf (a, b); - } - --__extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__)) --vld2q_f64 (const float64_t * __a) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmaxq_f32 (float32x4_t a, float32x4_t b) - { -- float64x2x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a); -- ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0); -- ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1); -- return ret; -+ return __builtin_aarch64_smax_nanpv4sf (a, b); - } - --__extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__)) --vld3_s64 (const int64_t * __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmaxq_f64 (float64x2_t a, float64x2_t b) - { -- int64x1x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0); -- ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1); -- ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2); -- return ret; -+ return __builtin_aarch64_smax_nanpv2df (a, b); - } - --__extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__)) --vld3_u64 (const uint64_t * __a) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmaxqd_f64 (float64x2_t a) - { -- uint64x1x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3di ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0); -- ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1); -- ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2); -- return ret; -+ return __builtin_aarch64_reduc_smax_nan_scal_v2df (a); - } - --__extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__)) --vld3_f64 (const float64_t * __a) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmaxs_f32 (float32x2_t a) - { -- float64x1x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3df ((const __builtin_aarch64_simd_df *) __a); -- ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)}; -- ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)}; -- ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)}; -- return ret; -+ return __builtin_aarch64_reduc_smax_nan_scal_v2sf (a); - } - --__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__)) --vld3_s8 (const int8_t * __a) -+/* vpmaxnm */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmaxnm_f32 (float32x2_t a, float32x2_t b) - { -- int8x8x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); -- ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); -- ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); -- return ret; -+ return __builtin_aarch64_smaxpv2sf (a, b); - } - --__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__)) --vld3_p8 (const poly8_t * __a) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmaxnmq_f32 (float32x4_t a, float32x4_t b) - { -- poly8x8x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); -- ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); -- ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); -- return ret; -+ return __builtin_aarch64_smaxpv4sf (a, b); - } - --__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__)) --vld3_s16 (const int16_t * __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmaxnmq_f64 (float64x2_t a, float64x2_t b) - { -- int16x4x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); -- ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); -- ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); -- return ret; -+ return __builtin_aarch64_smaxpv2df (a, b); - } - --__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__)) --vld3_p16 (const poly16_t * __a) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmaxnmqd_f64 (float64x2_t a) - { -- poly16x4x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); -- ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); -- ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); -- return ret; -+ return __builtin_aarch64_reduc_smax_scal_v2df (a); - } - --__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__)) --vld3_s32 (const int32_t * __a) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmaxnms_f32 (float32x2_t a) - { -- int32x2x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0); -- ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1); -- ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2); -- return ret; -+ return __builtin_aarch64_reduc_smax_scal_v2sf (a); - } - --__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__)) --vld3_u8 (const uint8_t * __a) -+/* vpmin */ -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmin_s8 (int8x8_t a, int8x8_t b) - { -- uint8x8x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v8qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); -- ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); -- ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); -- return ret; -+ return __builtin_aarch64_sminpv8qi (a, b); - } - --__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__)) --vld3_u16 (const uint16_t * __a) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmin_s16 (int16x4_t a, int16x4_t b) - { -- uint16x4x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v4hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); -- ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); -- ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); -- return ret; -+ return __builtin_aarch64_sminpv4hi (a, b); - } - --__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__)) --vld3_u32 (const uint32_t * __a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmin_s32 (int32x2_t a, int32x2_t b) - { -- uint32x2x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v2si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0); -- ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1); -- ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2); -- return ret; -+ return __builtin_aarch64_sminpv2si (a, b); - } - --__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__)) --vld3_f16 (const float16_t * __a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmin_u8 (uint8x8_t a, uint8x8_t b) - { -- float16x4x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v4hf (__a); -- ret.val[0] = __builtin_aarch64_get_dregciv4hf (__o, 0); -- ret.val[1] = __builtin_aarch64_get_dregciv4hf (__o, 1); -- ret.val[2] = __builtin_aarch64_get_dregciv4hf (__o, 2); -- return ret; -+ return (uint8x8_t) __builtin_aarch64_uminpv8qi ((int8x8_t) a, -+ (int8x8_t) b); - } - --__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__)) --vld3_f32 (const float32_t * __a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmin_u16 (uint16x4_t a, uint16x4_t b) - { -- float32x2x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v2sf ((const __builtin_aarch64_simd_sf *) __a); -- ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0); -- ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1); -- ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2); -- return ret; -+ return (uint16x4_t) __builtin_aarch64_uminpv4hi ((int16x4_t) a, -+ (int16x4_t) b); - } - --__extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__)) --vld3q_s8 (const int8_t * __a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmin_u32 (uint32x2_t a, uint32x2_t b) - { -- int8x16x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); -- ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); -- ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); -- return ret; -+ return (uint32x2_t) __builtin_aarch64_uminpv2si ((int32x2_t) a, -+ (int32x2_t) b); - } - --__extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__)) --vld3q_p8 (const poly8_t * __a) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpminq_s8 (int8x16_t a, int8x16_t b) - { -- poly8x16x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); -- ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); -- ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); -- return ret; -+ return __builtin_aarch64_sminpv16qi (a, b); - } - --__extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__)) --vld3q_s16 (const int16_t * __a) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpminq_s16 (int16x8_t a, int16x8_t b) - { -- int16x8x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); -- ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); -- ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); -- return ret; -+ return __builtin_aarch64_sminpv8hi (a, b); - } - --__extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__)) --vld3q_p16 (const poly16_t * __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpminq_s32 (int32x4_t a, int32x4_t b) - { -- poly16x8x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); -- ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); -- ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); -- return ret; -+ return __builtin_aarch64_sminpv4si (a, b); - } - --__extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__)) --vld3q_s32 (const int32_t * __a) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpminq_u8 (uint8x16_t a, uint8x16_t b) - { -- int32x4x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0); -- ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1); -- ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2); -- return ret; -+ return (uint8x16_t) __builtin_aarch64_uminpv16qi ((int8x16_t) a, -+ (int8x16_t) b); - } - --__extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__)) --vld3q_s64 (const int64_t * __a) --{ -- int64x2x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); -- ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); -- ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); -- return ret; -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpminq_u16 (uint16x8_t a, uint16x8_t b) -+{ -+ return (uint16x8_t) __builtin_aarch64_uminpv8hi ((int16x8_t) a, -+ (int16x8_t) b); - } - --__extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__)) --vld3q_u8 (const uint8_t * __a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpminq_u32 (uint32x4_t a, uint32x4_t b) - { -- uint8x16x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); -- ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); -- ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); -- return ret; -+ return (uint32x4_t) __builtin_aarch64_uminpv4si ((int32x4_t) a, -+ (int32x4_t) b); - } - --__extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__)) --vld3q_u16 (const uint16_t * __a) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmin_f32 (float32x2_t a, float32x2_t b) - { -- uint16x8x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); -- ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); -- ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); -- return ret; -+ return __builtin_aarch64_smin_nanpv2sf (a, b); - } - --__extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__)) --vld3q_u32 (const uint32_t * __a) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpminq_f32 (float32x4_t a, float32x4_t b) - { -- uint32x4x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0); -- ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1); -- ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2); -- return ret; -+ return __builtin_aarch64_smin_nanpv4sf (a, b); - } - --__extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__)) --vld3q_u64 (const uint64_t * __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpminq_f64 (float64x2_t a, float64x2_t b) - { -- uint64x2x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); -- ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); -- ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); -- return ret; -+ return __builtin_aarch64_smin_nanpv2df (a, b); - } - --__extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__)) --vld3q_f16 (const float16_t * __a) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpminqd_f64 (float64x2_t a) - { -- float16x8x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v8hf (__a); -- ret.val[0] = __builtin_aarch64_get_qregciv8hf (__o, 0); -- ret.val[1] = __builtin_aarch64_get_qregciv8hf (__o, 1); -- ret.val[2] = __builtin_aarch64_get_qregciv8hf (__o, 2); -- return ret; -+ return __builtin_aarch64_reduc_smin_nan_scal_v2df (a); - } - --__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__)) --vld3q_f32 (const float32_t * __a) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmins_f32 (float32x2_t a) - { -- float32x4x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a); -- ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0); -- ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1); -- ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2); -- return ret; -+ return __builtin_aarch64_reduc_smin_nan_scal_v2sf (a); - } - --__extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__)) --vld3q_f64 (const float64_t * __a) -+/* vpminnm */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpminnm_f32 (float32x2_t a, float32x2_t b) - { -- float64x2x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a); -- ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0); -- ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1); -- ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2); -- return ret; -+ return __builtin_aarch64_sminpv2sf (a, b); - } - --__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__)) --vld4_s64 (const int64_t * __a) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpminnmq_f32 (float32x4_t a, float32x4_t b) - { -- int64x1x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); -- ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); -- ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); -- ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); -- return ret; -+ return __builtin_aarch64_sminpv4sf (a, b); - } - --__extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__)) --vld4_u64 (const uint64_t * __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpminnmq_f64 (float64x2_t a, float64x2_t b) - { -- uint64x1x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4di ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); -- ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); -- ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); -- ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); -- return ret; -+ return __builtin_aarch64_sminpv2df (a, b); - } - --__extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__)) --vld4_f64 (const float64_t * __a) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpminnmqd_f64 (float64x2_t a) - { -- float64x1x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4df ((const __builtin_aarch64_simd_df *) __a); -- ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)}; -- ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)}; -- ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)}; -- ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)}; -- return ret; -+ return __builtin_aarch64_reduc_smin_scal_v2df (a); - } - --__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__)) --vld4_s8 (const int8_t * __a) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpminnms_f32 (float32x2_t a) - { -- int8x8x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); -- ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); -- ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); -- ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); -- return ret; -+ return __builtin_aarch64_reduc_smin_scal_v2sf (a); - } - --__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__)) --vld4_p8 (const poly8_t * __a) -+/* vmaxnm */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxnm_f32 (float32x2_t __a, float32x2_t __b) - { -- poly8x8x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); -- ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); -- ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); -- ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); -- return ret; -+ return __builtin_aarch64_fmaxv2sf (__a, __b); - } - --__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__)) --vld4_s16 (const int16_t * __a) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxnm_f64 (float64x1_t __a, float64x1_t __b) - { -- int16x4x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); -- ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); -- ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); -- ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); -- return ret; -+ return (float64x1_t) -+ { __builtin_aarch64_fmaxdf (vget_lane_f64 (__a, 0), -+ vget_lane_f64 (__b, 0)) }; - } - --__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__)) --vld4_p16 (const poly16_t * __a) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxnmq_f32 (float32x4_t __a, float32x4_t __b) - { -- poly16x4x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); -- ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); -- ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); -- ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); -- return ret; -+ return __builtin_aarch64_fmaxv4sf (__a, __b); - } - --__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__)) --vld4_s32 (const int32_t * __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxnmq_f64 (float64x2_t __a, float64x2_t __b) - { -- int32x2x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0); -- ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1); -- ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2); -- ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3); -- return ret; -+ return __builtin_aarch64_fmaxv2df (__a, __b); - } - --__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__)) --vld4_u8 (const uint8_t * __a) -+/* vmaxv */ -+ -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxv_f32 (float32x2_t __a) - { -- uint8x8x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v8qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); -- ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); -- ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); -- ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); -- return ret; -+ return __builtin_aarch64_reduc_smax_nan_scal_v2sf (__a); - } - --__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__)) --vld4_u16 (const uint16_t * __a) -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxv_s8 (int8x8_t __a) - { -- uint16x4x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v4hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); -- ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); -- ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); -- ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); -- return ret; -+ return __builtin_aarch64_reduc_smax_scal_v8qi (__a); - } - --__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__)) --vld4_u32 (const uint32_t * __a) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxv_s16 (int16x4_t __a) - { -- uint32x2x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v2si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0); -- ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1); -- ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2); -- ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3); -- return ret; -+ return __builtin_aarch64_reduc_smax_scal_v4hi (__a); - } - --__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__)) --vld4_f16 (const float16_t * __a) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxv_s32 (int32x2_t __a) - { -- float16x4x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v4hf (__a); -- ret.val[0] = __builtin_aarch64_get_dregxiv4hf (__o, 0); -- ret.val[1] = __builtin_aarch64_get_dregxiv4hf (__o, 1); -- ret.val[2] = __builtin_aarch64_get_dregxiv4hf (__o, 2); -- ret.val[3] = __builtin_aarch64_get_dregxiv4hf (__o, 3); -- return ret; -+ return __builtin_aarch64_reduc_smax_scal_v2si (__a); - } - --__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__)) --vld4_f32 (const float32_t * __a) -+__extension__ extern __inline uint8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxv_u8 (uint8x8_t __a) - { -- float32x2x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v2sf ((const __builtin_aarch64_simd_sf *) __a); -- ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0); -- ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1); -- ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2); -- ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3); -- return ret; -+ return __builtin_aarch64_reduc_umax_scal_v8qi_uu (__a); - } - --__extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__)) --vld4q_s8 (const int8_t * __a) -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxv_u16 (uint16x4_t __a) - { -- int8x16x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); -- ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); -- ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); -- ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); -- return ret; -+ return __builtin_aarch64_reduc_umax_scal_v4hi_uu (__a); - } - --__extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__)) --vld4q_p8 (const poly8_t * __a) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxv_u32 (uint32x2_t __a) - { -- poly8x16x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); -- ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); -- ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); -- ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); -- return ret; -+ return __builtin_aarch64_reduc_umax_scal_v2si_uu (__a); - } - --__extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__)) --vld4q_s16 (const int16_t * __a) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxvq_f32 (float32x4_t __a) - { -- int16x8x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); -- ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); -- ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); -- ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); -- return ret; -+ return __builtin_aarch64_reduc_smax_nan_scal_v4sf (__a); - } - --__extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__)) --vld4q_p16 (const poly16_t * __a) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxvq_f64 (float64x2_t __a) - { -- poly16x8x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); -- ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); -- ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); -- ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); -- return ret; -+ return __builtin_aarch64_reduc_smax_nan_scal_v2df (__a); - } - --__extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__)) --vld4q_s32 (const int32_t * __a) -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxvq_s8 (int8x16_t __a) - { -- int32x4x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); -- ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); -- ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); -- ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); -- return ret; -+ return __builtin_aarch64_reduc_smax_scal_v16qi (__a); - } - --__extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__)) --vld4q_s64 (const int64_t * __a) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxvq_s16 (int16x8_t __a) - { -- int64x2x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0); -- ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1); -- ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2); -- ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3); -- return ret; -+ return __builtin_aarch64_reduc_smax_scal_v8hi (__a); - } - --__extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__)) --vld4q_u8 (const uint8_t * __a) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxvq_s32 (int32x4_t __a) - { -- uint8x16x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); -- ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); -- ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); -- ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); -- return ret; -+ return __builtin_aarch64_reduc_smax_scal_v4si (__a); - } - --__extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__)) --vld4q_u16 (const uint16_t * __a) -+__extension__ extern __inline uint8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxvq_u8 (uint8x16_t __a) - { -- uint16x8x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); -- ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); -- ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); -- ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); -- return ret; -+ return __builtin_aarch64_reduc_umax_scal_v16qi_uu (__a); - } - --__extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__)) --vld4q_u32 (const uint32_t * __a) -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxvq_u16 (uint16x8_t __a) - { -- uint32x4x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); -- ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); -- ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); -- ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); -- return ret; -+ return __builtin_aarch64_reduc_umax_scal_v8hi_uu (__a); - } - --__extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__)) --vld4q_u64 (const uint64_t * __a) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxvq_u32 (uint32x4_t __a) - { -- uint64x2x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0); -- ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1); -- ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2); -- ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3); -- return ret; -+ return __builtin_aarch64_reduc_umax_scal_v4si_uu (__a); - } - --__extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__)) --vld4q_f16 (const float16_t * __a) -+/* vmaxnmv */ -+ -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxnmv_f32 (float32x2_t __a) - { -- float16x8x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v8hf (__a); -- ret.val[0] = __builtin_aarch64_get_qregxiv8hf (__o, 0); -- ret.val[1] = __builtin_aarch64_get_qregxiv8hf (__o, 1); -- ret.val[2] = __builtin_aarch64_get_qregxiv8hf (__o, 2); -- ret.val[3] = __builtin_aarch64_get_qregxiv8hf (__o, 3); -- return ret; -+ return __builtin_aarch64_reduc_smax_scal_v2sf (__a); - } - --__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__)) --vld4q_f32 (const float32_t * __a) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxnmvq_f32 (float32x4_t __a) - { -- float32x4x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a); -- ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0); -- ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1); -- ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2); -- ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3); -- return ret; -+ return __builtin_aarch64_reduc_smax_scal_v4sf (__a); - } - --__extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__)) --vld4q_f64 (const float64_t * __a) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxnmvq_f64 (float64x2_t __a) - { -- float64x2x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a); -- ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0); -- ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1); -- ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2); -- ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3); -- return ret; -+ return __builtin_aarch64_reduc_smax_scal_v2df (__a); - } - --/* vldn_dup */ -+/* vmin */ - --__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) --vld2_dup_s8 (const int8_t * __a) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmin_f32 (float32x2_t __a, float32x2_t __b) - { -- int8x8x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); -- ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); -- return ret; -+ return __builtin_aarch64_smin_nanv2sf (__a, __b); - } - --__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) --vld2_dup_s16 (const int16_t * __a) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmin_f64 (float64x1_t __a, float64x1_t __b) - { -- int16x4x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); -- ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); -- return ret; -+ return (float64x1_t) -+ { __builtin_aarch64_smin_nandf (vget_lane_f64 (__a, 0), -+ vget_lane_f64 (__b, 0)) }; - } - --__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) --vld2_dup_s32 (const int32_t * __a) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmin_s8 (int8x8_t __a, int8x8_t __b) - { -- int32x2x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0); -- ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1); -- return ret; -+ return __builtin_aarch64_sminv8qi (__a, __b); - } - --__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__)) --vld2_dup_f16 (const float16_t * __a) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmin_s16 (int16x4_t __a, int16x4_t __b) - { -- float16x4x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv4hf ((const __builtin_aarch64_simd_hf *) __a); -- ret.val[0] = __builtin_aarch64_get_dregoiv4hf (__o, 0); -- ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregoiv4hf (__o, 1); -- return ret; -+ return __builtin_aarch64_sminv4hi (__a, __b); - } - --__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) --vld2_dup_f32 (const float32_t * __a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmin_s32 (int32x2_t __a, int32x2_t __b) - { -- float32x2x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv2sf ((const __builtin_aarch64_simd_sf *) __a); -- ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 0); -- ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregoiv2sf (__o, 1); -- return ret; -+ return __builtin_aarch64_sminv2si (__a, __b); - } - --__extension__ static __inline float64x1x2_t __attribute__ ((__always_inline__)) --vld2_dup_f64 (const float64_t * __a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmin_u8 (uint8x8_t __a, uint8x8_t __b) - { -- float64x1x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rdf ((const __builtin_aarch64_simd_df *) __a); -- ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 0)}; -- ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregoidf (__o, 1)}; -- return ret; -+ return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a, -+ (int8x8_t) __b); - } - --__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) --vld2_dup_u8 (const uint8_t * __a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmin_u16 (uint16x4_t __a, uint16x4_t __b) - { -- uint8x8x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); -- ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); -- return ret; -+ return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a, -+ (int16x4_t) __b); - } - --__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) --vld2_dup_u16 (const uint16_t * __a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmin_u32 (uint32x2_t __a, uint32x2_t __b) - { -- uint16x4x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); -- ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); -- return ret; -+ return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a, -+ (int32x2_t) __b); - } - --__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) --vld2_dup_u32 (const uint32_t * __a) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminq_f32 (float32x4_t __a, float32x4_t __b) - { -- uint32x2x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv2si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 0); -- ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregoiv2si (__o, 1); -- return ret; -+ return __builtin_aarch64_smin_nanv4sf (__a, __b); - } - --__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) --vld2_dup_p8 (const poly8_t * __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminq_f64 (float64x2_t __a, float64x2_t __b) - { -- poly8x8x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv8qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 0); -- ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregoiv8qi (__o, 1); -- return ret; -+ return __builtin_aarch64_smin_nanv2df (__a, __b); - } - --__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) --vld2_dup_p16 (const poly16_t * __a) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminq_s8 (int8x16_t __a, int8x16_t __b) - { -- poly16x4x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv4hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 0); -- ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregoiv4hi (__o, 1); -- return ret; -+ return __builtin_aarch64_sminv16qi (__a, __b); - } - --__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__)) --vld2_dup_s64 (const int64_t * __a) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminq_s16 (int16x8_t __a, int16x8_t __b) - { -- int64x1x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); -- ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); -- return ret; -+ return __builtin_aarch64_sminv8hi (__a, __b); - } - --__extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__)) --vld2_dup_u64 (const uint64_t * __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminq_s32 (int32x4_t __a, int32x4_t __b) - { -- uint64x1x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rdi ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 0); -- ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregoidi (__o, 1); -- return ret; -+ return __builtin_aarch64_sminv4si (__a, __b); - } - --__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) --vld2q_dup_s8 (const int8_t * __a) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- int8x16x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); -- ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); -- return ret; -+ return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a, -+ (int8x16_t) __b); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminq_u16 (uint16x8_t __a, uint16x8_t __b) -+{ -+ return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a, -+ (int16x8_t) __b); - } - --__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) --vld2q_dup_p8 (const poly8_t * __a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminq_u32 (uint32x4_t __a, uint32x4_t __b) -+{ -+ return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a, -+ (int32x4_t) __b); -+} -+ -+/* vminnm */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminnm_f32 (float32x2_t __a, float32x2_t __b) - { -- poly8x16x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); -- ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); -- return ret; -+ return __builtin_aarch64_fminv2sf (__a, __b); - } - --__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) --vld2q_dup_s16 (const int16_t * __a) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminnm_f64 (float64x1_t __a, float64x1_t __b) - { -- int16x8x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); -- ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); -- return ret; -+ return (float64x1_t) -+ { __builtin_aarch64_fmindf (vget_lane_f64 (__a, 0), -+ vget_lane_f64 (__b, 0)) }; - } - --__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) --vld2q_dup_p16 (const poly16_t * __a) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminnmq_f32 (float32x4_t __a, float32x4_t __b) - { -- poly16x8x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); -- ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); -- return ret; -+ return __builtin_aarch64_fminv4sf (__a, __b); - } - --__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) --vld2q_dup_s32 (const int32_t * __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminnmq_f64 (float64x2_t __a, float64x2_t __b) - { -- int32x4x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); -- ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); -- return ret; -+ return __builtin_aarch64_fminv2df (__a, __b); - } - --__extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__)) --vld2q_dup_s64 (const int64_t * __a) -+/* vminv */ -+ -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminv_f32 (float32x2_t __a) - { -- int64x2x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); -- ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); -- return ret; -+ return __builtin_aarch64_reduc_smin_nan_scal_v2sf (__a); - } - --__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) --vld2q_dup_u8 (const uint8_t * __a) -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminv_s8 (int8x8_t __a) - { -- uint8x16x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv16qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); -- ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); -- return ret; -+ return __builtin_aarch64_reduc_smin_scal_v8qi (__a); - } - --__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) --vld2q_dup_u16 (const uint16_t * __a) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminv_s16 (int16x4_t __a) - { -- uint16x8x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv8hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); -- ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); -- return ret; -+ return __builtin_aarch64_reduc_smin_scal_v4hi (__a); - } - --__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) --vld2q_dup_u32 (const uint32_t * __a) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminv_s32 (int32x2_t __a) - { -- uint32x4x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv4si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); -- ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); -- return ret; -+ return __builtin_aarch64_reduc_smin_scal_v2si (__a); - } - --__extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__)) --vld2q_dup_u64 (const uint64_t * __a) -+__extension__ extern __inline uint8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminv_u8 (uint8x8_t __a) - { -- uint64x2x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv2di ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); -- ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); -- return ret; -+ return __builtin_aarch64_reduc_umin_scal_v8qi_uu (__a); - } - --__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__)) --vld2q_dup_f16 (const float16_t * __a) -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminv_u16 (uint16x4_t __a) - { -- float16x8x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv8hf ((const __builtin_aarch64_simd_hf *) __a); -- ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregoiv8hf (__o, 0); -- ret.val[1] = __builtin_aarch64_get_qregoiv8hf (__o, 1); -- return ret; -+ return __builtin_aarch64_reduc_umin_scal_v4hi_uu (__a); - } - --__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) --vld2q_dup_f32 (const float32_t * __a) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminv_u32 (uint32x2_t __a) - { -- float32x4x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv4sf ((const __builtin_aarch64_simd_sf *) __a); -- ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0); -- ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1); -- return ret; -+ return __builtin_aarch64_reduc_umin_scal_v2si_uu (__a); - } - --__extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__)) --vld2q_dup_f64 (const float64_t * __a) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminvq_f32 (float32x4_t __a) - { -- float64x2x2_t ret; -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_ld2rv2df ((const __builtin_aarch64_simd_df *) __a); -- ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0); -- ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1); -- return ret; -+ return __builtin_aarch64_reduc_smin_nan_scal_v4sf (__a); - } - --__extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__)) --vld3_dup_s64 (const int64_t * __a) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminvq_f64 (float64x2_t __a) - { -- int64x1x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 0); -- ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 1); -- ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregcidi (__o, 2); -- return ret; -+ return __builtin_aarch64_reduc_smin_nan_scal_v2df (__a); - } - --__extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__)) --vld3_dup_u64 (const uint64_t * __a) -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminvq_s8 (int8x16_t __a) - { -- uint64x1x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rdi ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 0); -- ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 1); -- ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregcidi (__o, 2); -- return ret; -+ return __builtin_aarch64_reduc_smin_scal_v16qi (__a); - } - --__extension__ static __inline float64x1x3_t __attribute__ ((__always_inline__)) --vld3_dup_f64 (const float64_t * __a) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminvq_s16 (int16x8_t __a) - { -- float64x1x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rdf ((const __builtin_aarch64_simd_df *) __a); -- ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 0)}; -- ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 1)}; -- ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregcidf (__o, 2)}; -- return ret; -+ return __builtin_aarch64_reduc_smin_scal_v8hi (__a); - } - --__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__)) --vld3_dup_s8 (const int8_t * __a) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminvq_s32 (int32x4_t __a) - { -- int8x8x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); -- ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); -- ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); -- return ret; -+ return __builtin_aarch64_reduc_smin_scal_v4si (__a); - } - --__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__)) --vld3_dup_p8 (const poly8_t * __a) -+__extension__ extern __inline uint8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminvq_u8 (uint8x16_t __a) - { -- poly8x8x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); -- ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); -- ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); -- return ret; -+ return __builtin_aarch64_reduc_umin_scal_v16qi_uu (__a); - } - --__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__)) --vld3_dup_s16 (const int16_t * __a) -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminvq_u16 (uint16x8_t __a) - { -- int16x4x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); -- ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); -- ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); -- return ret; -+ return __builtin_aarch64_reduc_umin_scal_v8hi_uu (__a); - } - --__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__)) --vld3_dup_p16 (const poly16_t * __a) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminvq_u32 (uint32x4_t __a) - { -- poly16x4x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); -- ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); -- ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); -- return ret; -+ return __builtin_aarch64_reduc_umin_scal_v4si_uu (__a); - } - --__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__)) --vld3_dup_s32 (const int32_t * __a) -+/* vminnmv */ -+ -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminnmv_f32 (float32x2_t __a) - { -- int32x2x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0); -- ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1); -- ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2); -- return ret; -+ return __builtin_aarch64_reduc_smin_scal_v2sf (__a); - } - --__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__)) --vld3_dup_u8 (const uint8_t * __a) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminnmvq_f32 (float32x4_t __a) - { -- uint8x8x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv8qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 0); -- ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 1); -- ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregciv8qi (__o, 2); -- return ret; -+ return __builtin_aarch64_reduc_smin_scal_v4sf (__a); - } - --__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__)) --vld3_dup_u16 (const uint16_t * __a) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminnmvq_f64 (float64x2_t __a) - { -- uint16x4x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv4hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 0); -- ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 1); -- ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregciv4hi (__o, 2); -- return ret; -+ return __builtin_aarch64_reduc_smin_scal_v2df (__a); - } - --__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__)) --vld3_dup_u32 (const uint32_t * __a) -+/* vmla */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c) - { -- uint32x2x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv2si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 0); -- ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 1); -- ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregciv2si (__o, 2); -- return ret; -+ return a + b * c; - } - --__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__)) --vld3_dup_f16 (const float16_t * __a) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c) - { -- float16x4x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv4hf ((const __builtin_aarch64_simd_hf *) __a); -- ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 0); -- ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 1); -- ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregciv4hf (__o, 2); -- return ret; -+ return __a + __b * __c; - } - --__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__)) --vld3_dup_f32 (const float32_t * __a) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) - { -- float32x2x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv2sf ((const __builtin_aarch64_simd_sf *) __a); -- ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 0); -- ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 1); -- ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregciv2sf (__o, 2); -- return ret; -+ return a + b * c; - } - --__extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__)) --vld3q_dup_s8 (const int8_t * __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) - { -- int8x16x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); -- ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); -- ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); -- return ret; -+ return a + b * c; - } - --__extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__)) --vld3q_dup_p8 (const poly8_t * __a) -+/* vmla_lane */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_lane_f32 (float32x2_t __a, float32x2_t __b, -+ float32x2_t __c, const int __lane) - { -- poly8x16x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); -- ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); -- ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); -- return ret; -+ return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__)) --vld3q_dup_s16 (const int16_t * __a) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_lane_s16 (int16x4_t __a, int16x4_t __b, -+ int16x4_t __c, const int __lane) - { -- int16x8x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); -- ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); -- ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); -- return ret; -+ return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__)) --vld3q_dup_p16 (const poly16_t * __a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_lane_s32 (int32x2_t __a, int32x2_t __b, -+ int32x2_t __c, const int __lane) - { -- poly16x8x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); -- ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); -- ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); -- return ret; -+ return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__)) --vld3q_dup_s32 (const int32_t * __a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b, -+ uint16x4_t __c, const int __lane) - { -- int32x4x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0); -- ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1); -- ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2); -- return ret; -+ return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__)) --vld3q_dup_s64 (const int64_t * __a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b, -+ uint32x2_t __c, const int __lane) - { -- int64x2x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); -- ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); -- ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); -- return ret; -+ return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__)) --vld3q_dup_u8 (const uint8_t * __a) -+/* vmla_laneq */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_laneq_f32 (float32x2_t __a, float32x2_t __b, -+ float32x4_t __c, const int __lane) - { -- uint8x16x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv16qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); -- ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); -- ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); -- return ret; -+ return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__)) --vld3q_dup_u16 (const uint16_t * __a) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_laneq_s16 (int16x4_t __a, int16x4_t __b, -+ int16x8_t __c, const int __lane) - { -- uint16x8x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv8hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); -- ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); -- ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); -- return ret; -+ return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__)) --vld3q_dup_u32 (const uint32_t * __a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_laneq_s32 (int32x2_t __a, int32x2_t __b, -+ int32x4_t __c, const int __lane) - { -- uint32x4x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv4si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0); -- ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1); -- ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2); -- return ret; -+ return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__)) --vld3q_dup_u64 (const uint64_t * __a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_laneq_u16 (uint16x4_t __a, uint16x4_t __b, -+ uint16x8_t __c, const int __lane) - { -- uint64x2x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv2di ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); -- ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); -- ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); -- return ret; -+ return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__)) --vld3q_dup_f16 (const float16_t * __a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmla_laneq_u32 (uint32x2_t __a, uint32x2_t __b, -+ uint32x4_t __c, const int __lane) - { -- float16x8x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv8hf ((const __builtin_aarch64_simd_hf *) __a); -- ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 0); -- ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 1); -- ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregciv8hf (__o, 2); -- return ret; -+ return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__)) --vld3q_dup_f32 (const float32_t * __a) -+/* vmlaq_lane */ -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b, -+ float32x2_t __c, const int __lane) - { -- float32x4x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv4sf ((const __builtin_aarch64_simd_sf *) __a); -- ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0); -- ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1); -- ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2); -- return ret; -+ return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__)) --vld3q_dup_f64 (const float64_t * __a) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b, -+ int16x4_t __c, const int __lane) - { -- float64x2x3_t ret; -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_ld3rv2df ((const __builtin_aarch64_simd_df *) __a); -- ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0); -- ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1); -- ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2); -- return ret; -+ return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__)) --vld4_dup_s64 (const int64_t * __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b, -+ int32x2_t __c, const int __lane) - { -- int64x1x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); -- ret.val[1] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); -- ret.val[2] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); -- ret.val[3] = (int64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); -- return ret; -+ return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__)) --vld4_dup_u64 (const uint64_t * __a) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b, -+ uint16x4_t __c, const int __lane) - { -- uint64x1x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rdi ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 0); -- ret.val[1] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 1); -- ret.val[2] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 2); -- ret.val[3] = (uint64x1_t) __builtin_aarch64_get_dregxidi (__o, 3); -- return ret; -+ return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline float64x1x4_t __attribute__ ((__always_inline__)) --vld4_dup_f64 (const float64_t * __a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b, -+ uint32x2_t __c, const int __lane) - { -- float64x1x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rdf ((const __builtin_aarch64_simd_df *) __a); -- ret.val[0] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 0)}; -- ret.val[1] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 1)}; -- ret.val[2] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 2)}; -- ret.val[3] = (float64x1_t) {__builtin_aarch64_get_dregxidf (__o, 3)}; -- return ret; -+ return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__)) --vld4_dup_s8 (const int8_t * __a) -+ /* vmlaq_laneq */ -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b, -+ float32x4_t __c, const int __lane) - { -- int8x8x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); -- ret.val[1] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); -- ret.val[2] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); -- ret.val[3] = (int8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); -- return ret; -+ return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__)) --vld4_dup_p8 (const poly8_t * __a) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_laneq_s16 (int16x8_t __a, int16x8_t __b, -+ int16x8_t __c, const int __lane) - { -- poly8x8x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); -- ret.val[1] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); -- ret.val[2] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); -- ret.val[3] = (poly8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); -- return ret; -+ return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__)) --vld4_dup_s16 (const int16_t * __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_laneq_s32 (int32x4_t __a, int32x4_t __b, -+ int32x4_t __c, const int __lane) - { -- int16x4x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); -- ret.val[1] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); -- ret.val[2] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); -- ret.val[3] = (int16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); -- return ret; -+ return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__)) --vld4_dup_p16 (const poly16_t * __a) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, -+ uint16x8_t __c, const int __lane) - { -- poly16x4x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); -- ret.val[1] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); -- ret.val[2] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); -- ret.val[3] = (poly16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); -- return ret; -+ return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__)) --vld4_dup_s32 (const int32_t * __a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, -+ uint32x4_t __c, const int __lane) - { -- int32x2x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0); -- ret.val[1] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1); -- ret.val[2] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2); -- ret.val[3] = (int32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3); -- return ret; -+ return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__)) --vld4_dup_u8 (const uint8_t * __a) -+/* vmls */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c) - { -- uint8x8x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv8qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 0); -- ret.val[1] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 1); -- ret.val[2] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 2); -- ret.val[3] = (uint8x8_t) __builtin_aarch64_get_dregxiv8qi (__o, 3); -- return ret; -+ return a - b * c; - } - --__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__)) --vld4_dup_u16 (const uint16_t * __a) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c) - { -- uint16x4x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv4hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 0); -- ret.val[1] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 1); -- ret.val[2] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 2); -- ret.val[3] = (uint16x4_t) __builtin_aarch64_get_dregxiv4hi (__o, 3); -- return ret; -+ return __a - __b * __c; - } - --__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__)) --vld4_dup_u32 (const uint32_t * __a) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) - { -- uint32x2x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv2si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 0); -- ret.val[1] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 1); -- ret.val[2] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 2); -- ret.val[3] = (uint32x2_t) __builtin_aarch64_get_dregxiv2si (__o, 3); -- return ret; -+ return a - b * c; - } - --__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__)) --vld4_dup_f16 (const float16_t * __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) - { -- float16x4x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv4hf ((const __builtin_aarch64_simd_hf *) __a); -- ret.val[0] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 0); -- ret.val[1] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 1); -- ret.val[2] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 2); -- ret.val[3] = (float16x4_t) __builtin_aarch64_get_dregxiv4hf (__o, 3); -- return ret; -+ return a - b * c; - } - --__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__)) --vld4_dup_f32 (const float32_t * __a) -+/* vmls_lane */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_lane_f32 (float32x2_t __a, float32x2_t __b, -+ float32x2_t __c, const int __lane) - { -- float32x2x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv2sf ((const __builtin_aarch64_simd_sf *) __a); -- ret.val[0] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 0); -- ret.val[1] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 1); -- ret.val[2] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 2); -- ret.val[3] = (float32x2_t) __builtin_aarch64_get_dregxiv2sf (__o, 3); -- return ret; -+ return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__)) --vld4q_dup_s8 (const int8_t * __a) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_lane_s16 (int16x4_t __a, int16x4_t __b, -+ int16x4_t __c, const int __lane) - { -- int8x16x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); -- ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); -- ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); -- ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); -- return ret; -+ return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__)) --vld4q_dup_p8 (const poly8_t * __a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_lane_s32 (int32x2_t __a, int32x2_t __b, -+ int32x2_t __c, const int __lane) - { -- poly8x16x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); -- ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); -- ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); -- ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); -- return ret; -+ return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__)) --vld4q_dup_s16 (const int16_t * __a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b, -+ uint16x4_t __c, const int __lane) - { -- int16x8x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); -- ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); -- ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); -- ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); -- return ret; -+ return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__)) --vld4q_dup_p16 (const poly16_t * __a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b, -+ uint32x2_t __c, const int __lane) - { -- poly16x8x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); -- ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); -- ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); -- ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); -- return ret; -+ return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__)) --vld4q_dup_s32 (const int32_t * __a) -+/* vmls_laneq */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_laneq_f32 (float32x2_t __a, float32x2_t __b, -+ float32x4_t __c, const int __lane) - { -- int32x4x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); -- ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); -- ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); -- ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); -- return ret; -+ return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__)) --vld4q_dup_s64 (const int64_t * __a) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_laneq_s16 (int16x4_t __a, int16x4_t __b, -+ int16x8_t __c, const int __lane) - { -- int64x2x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0); -- ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1); -- ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2); -- ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3); -- return ret; -+ return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__)) --vld4q_dup_u8 (const uint8_t * __a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_laneq_s32 (int32x2_t __a, int32x2_t __b, -+ int32x4_t __c, const int __lane) - { -- uint8x16x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv16qi ((const __builtin_aarch64_simd_qi *) __a); -- ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); -- ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); -- ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); -- ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); -- return ret; -+ return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__)) --vld4q_dup_u16 (const uint16_t * __a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_laneq_u16 (uint16x4_t __a, uint16x4_t __b, -+ uint16x8_t __c, const int __lane) - { -- uint16x8x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv8hi ((const __builtin_aarch64_simd_hi *) __a); -- ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); -- ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); -- ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); -- ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); -- return ret; -+ return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__)) --vld4q_dup_u32 (const uint32_t * __a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmls_laneq_u32 (uint32x2_t __a, uint32x2_t __b, -+ uint32x4_t __c, const int __lane) - { -- uint32x4x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv4si ((const __builtin_aarch64_simd_si *) __a); -- ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); -- ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); -- ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); -- ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); -- return ret; -+ return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__)) --vld4q_dup_u64 (const uint64_t * __a) -+/* vmlsq_lane */ -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b, -+ float32x2_t __c, const int __lane) - { -- uint64x2x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv2di ((const __builtin_aarch64_simd_di *) __a); -- ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0); -- ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1); -- ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2); -- ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3); -- return ret; -+ return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__)) --vld4q_dup_f16 (const float16_t * __a) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b, -+ int16x4_t __c, const int __lane) - { -- float16x8x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv8hf ((const __builtin_aarch64_simd_hf *) __a); -- ret.val[0] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 0); -- ret.val[1] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 1); -- ret.val[2] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 2); -- ret.val[3] = (float16x8_t) __builtin_aarch64_get_qregxiv8hf (__o, 3); -- return ret; -+ return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__)) --vld4q_dup_f32 (const float32_t * __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b, -+ int32x2_t __c, const int __lane) - { -- float32x4x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv4sf ((const __builtin_aarch64_simd_sf *) __a); -- ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0); -- ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1); -- ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2); -- ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3); -- return ret; -+ return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__)) --vld4q_dup_f64 (const float64_t * __a) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b, -+ uint16x4_t __c, const int __lane) - { -- float64x2x4_t ret; -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_ld4rv2df ((const __builtin_aarch64_simd_df *) __a); -- ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0); -- ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1); -- ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2); -- ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3); -- return ret; -+ return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --/* vld2_lane */ -- --#define __LD2_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, \ -- qmode, ptrmode, funcsuffix, signedtype) \ --__extension__ static __inline intype __attribute__ ((__always_inline__)) \ --vld2_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ --{ \ -- __builtin_aarch64_simd_oi __o; \ -- largetype __temp; \ -- __temp.val[0] = \ -- vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \ -- __temp.val[1] = \ -- vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \ -- __o = __builtin_aarch64_set_qregoi##qmode (__o, \ -- (signedtype) __temp.val[0], \ -- 0); \ -- __o = __builtin_aarch64_set_qregoi##qmode (__o, \ -- (signedtype) __temp.val[1], \ -- 1); \ -- __o = __builtin_aarch64_ld2_lane##mode ( \ -- (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \ -- __b.val[0] = (vectype) __builtin_aarch64_get_dregoidi (__o, 0); \ -- __b.val[1] = (vectype) __builtin_aarch64_get_dregoidi (__o, 1); \ -- return __b; \ -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b, -+ uint32x2_t __c, const int __lane) -+{ -+ return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__LD2_LANE_FUNC (float16x4x2_t, float16x4_t, float16x8x2_t, float16_t, v4hf, -- v8hf, hf, f16, float16x8_t) --__LD2_LANE_FUNC (float32x2x2_t, float32x2_t, float32x4x2_t, float32_t, v2sf, v4sf, -- sf, f32, float32x4_t) --__LD2_LANE_FUNC (float64x1x2_t, float64x1_t, float64x2x2_t, float64_t, df, v2df, -- df, f64, float64x2_t) --__LD2_LANE_FUNC (poly8x8x2_t, poly8x8_t, poly8x16x2_t, poly8_t, v8qi, v16qi, qi, p8, -- int8x16_t) --__LD2_LANE_FUNC (poly16x4x2_t, poly16x4_t, poly16x8x2_t, poly16_t, v4hi, v8hi, hi, -- p16, int16x8_t) --__LD2_LANE_FUNC (int8x8x2_t, int8x8_t, int8x16x2_t, int8_t, v8qi, v16qi, qi, s8, -- int8x16_t) --__LD2_LANE_FUNC (int16x4x2_t, int16x4_t, int16x8x2_t, int16_t, v4hi, v8hi, hi, s16, -- int16x8_t) --__LD2_LANE_FUNC (int32x2x2_t, int32x2_t, int32x4x2_t, int32_t, v2si, v4si, si, s32, -- int32x4_t) --__LD2_LANE_FUNC (int64x1x2_t, int64x1_t, int64x2x2_t, int64_t, di, v2di, di, s64, -- int64x2_t) --__LD2_LANE_FUNC (uint8x8x2_t, uint8x8_t, uint8x16x2_t, uint8_t, v8qi, v16qi, qi, u8, -- int8x16_t) --__LD2_LANE_FUNC (uint16x4x2_t, uint16x4_t, uint16x8x2_t, uint16_t, v4hi, v8hi, hi, -- u16, int16x8_t) --__LD2_LANE_FUNC (uint32x2x2_t, uint32x2_t, uint32x4x2_t, uint32_t, v2si, v4si, si, -- u32, int32x4_t) --__LD2_LANE_FUNC (uint64x1x2_t, uint64x1_t, uint64x2x2_t, uint64_t, di, v2di, di, -- u64, int64x2_t) -+ /* vmlsq_laneq */ -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b, -+ float32x4_t __c, const int __lane) -+{ -+ return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+} - --#undef __LD2_LANE_FUNC -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_laneq_s16 (int16x8_t __a, int16x8_t __b, -+ int16x8_t __c, const int __lane) -+{ -+ return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+} - --/* vld2q_lane */ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_laneq_s32 (int32x4_t __a, int32x4_t __b, -+ int32x4_t __c, const int __lane) -+{ -+ return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+} -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, -+ uint16x8_t __c, const int __lane) -+{ -+ return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+} - --#define __LD2_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \ --__extension__ static __inline intype __attribute__ ((__always_inline__)) \ --vld2q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ --{ \ -- __builtin_aarch64_simd_oi __o; \ -- intype ret; \ -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[0], 0); \ -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) __b.val[1], 1); \ -- __o = __builtin_aarch64_ld2_lane##mode ( \ -- (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \ -- ret.val[0] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 0); \ -- ret.val[1] = (vtype) __builtin_aarch64_get_qregoiv4si (__o, 1); \ -- return ret; \ -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, -+ uint32x4_t __c, const int __lane) -+{ -+ return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); - } - --__LD2_LANE_FUNC (float16x8x2_t, float16x8_t, float16_t, v8hf, hf, f16) --__LD2_LANE_FUNC (float32x4x2_t, float32x4_t, float32_t, v4sf, sf, f32) --__LD2_LANE_FUNC (float64x2x2_t, float64x2_t, float64_t, v2df, df, f64) --__LD2_LANE_FUNC (poly8x16x2_t, poly8x16_t, poly8_t, v16qi, qi, p8) --__LD2_LANE_FUNC (poly16x8x2_t, poly16x8_t, poly16_t, v8hi, hi, p16) --__LD2_LANE_FUNC (int8x16x2_t, int8x16_t, int8_t, v16qi, qi, s8) --__LD2_LANE_FUNC (int16x8x2_t, int16x8_t, int16_t, v8hi, hi, s16) --__LD2_LANE_FUNC (int32x4x2_t, int32x4_t, int32_t, v4si, si, s32) --__LD2_LANE_FUNC (int64x2x2_t, int64x2_t, int64_t, v2di, di, s64) --__LD2_LANE_FUNC (uint8x16x2_t, uint8x16_t, uint8_t, v16qi, qi, u8) --__LD2_LANE_FUNC (uint16x8x2_t, uint16x8_t, uint16_t, v8hi, hi, u16) --__LD2_LANE_FUNC (uint32x4x2_t, uint32x4_t, uint32_t, v4si, si, u32) --__LD2_LANE_FUNC (uint64x2x2_t, uint64x2_t, uint64_t, v2di, di, u64) -+/* vmov_n_ */ - --#undef __LD2_LANE_FUNC -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmov_n_f16 (float16_t __a) -+{ -+ return vdup_n_f16 (__a); -+} - --/* vld3_lane */ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmov_n_f32 (float32_t __a) -+{ -+ return vdup_n_f32 (__a); -+} - --#define __LD3_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, \ -- qmode, ptrmode, funcsuffix, signedtype) \ --__extension__ static __inline intype __attribute__ ((__always_inline__)) \ --vld3_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ --{ \ -- __builtin_aarch64_simd_ci __o; \ -- largetype __temp; \ -- __temp.val[0] = \ -- vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \ -- __temp.val[1] = \ -- vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \ -- __temp.val[2] = \ -- vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); \ -- __o = __builtin_aarch64_set_qregci##qmode (__o, \ -- (signedtype) __temp.val[0], \ -- 0); \ -- __o = __builtin_aarch64_set_qregci##qmode (__o, \ -- (signedtype) __temp.val[1], \ -- 1); \ -- __o = __builtin_aarch64_set_qregci##qmode (__o, \ -- (signedtype) __temp.val[2], \ -- 2); \ -- __o = __builtin_aarch64_ld3_lane##mode ( \ -- (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \ -- __b.val[0] = (vectype) __builtin_aarch64_get_dregcidi (__o, 0); \ -- __b.val[1] = (vectype) __builtin_aarch64_get_dregcidi (__o, 1); \ -- __b.val[2] = (vectype) __builtin_aarch64_get_dregcidi (__o, 2); \ -- return __b; \ -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmov_n_f64 (float64_t __a) -+{ -+ return (float64x1_t) {__a}; - } - --__LD3_LANE_FUNC (float16x4x3_t, float16x4_t, float16x8x3_t, float16_t, v4hf, -- v8hf, hf, f16, float16x8_t) --__LD3_LANE_FUNC (float32x2x3_t, float32x2_t, float32x4x3_t, float32_t, v2sf, v4sf, -- sf, f32, float32x4_t) --__LD3_LANE_FUNC (float64x1x3_t, float64x1_t, float64x2x3_t, float64_t, df, v2df, -- df, f64, float64x2_t) --__LD3_LANE_FUNC (poly8x8x3_t, poly8x8_t, poly8x16x3_t, poly8_t, v8qi, v16qi, qi, p8, -- int8x16_t) --__LD3_LANE_FUNC (poly16x4x3_t, poly16x4_t, poly16x8x3_t, poly16_t, v4hi, v8hi, hi, -- p16, int16x8_t) --__LD3_LANE_FUNC (int8x8x3_t, int8x8_t, int8x16x3_t, int8_t, v8qi, v16qi, qi, s8, -- int8x16_t) --__LD3_LANE_FUNC (int16x4x3_t, int16x4_t, int16x8x3_t, int16_t, v4hi, v8hi, hi, s16, -- int16x8_t) --__LD3_LANE_FUNC (int32x2x3_t, int32x2_t, int32x4x3_t, int32_t, v2si, v4si, si, s32, -- int32x4_t) --__LD3_LANE_FUNC (int64x1x3_t, int64x1_t, int64x2x3_t, int64_t, di, v2di, di, s64, -- int64x2_t) --__LD3_LANE_FUNC (uint8x8x3_t, uint8x8_t, uint8x16x3_t, uint8_t, v8qi, v16qi, qi, u8, -- int8x16_t) --__LD3_LANE_FUNC (uint16x4x3_t, uint16x4_t, uint16x8x3_t, uint16_t, v4hi, v8hi, hi, -- u16, int16x8_t) --__LD3_LANE_FUNC (uint32x2x3_t, uint32x2_t, uint32x4x3_t, uint32_t, v2si, v4si, si, -- u32, int32x4_t) --__LD3_LANE_FUNC (uint64x1x3_t, uint64x1_t, uint64x2x3_t, uint64_t, di, v2di, di, -- u64, int64x2_t) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmov_n_p8 (poly8_t __a) -+{ -+ return vdup_n_p8 (__a); -+} - --#undef __LD3_LANE_FUNC -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmov_n_p16 (poly16_t __a) -+{ -+ return vdup_n_p16 (__a); -+} - --/* vld3q_lane */ -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmov_n_p64 (poly64_t __a) -+{ -+ return vdup_n_p64 (__a); -+} - --#define __LD3_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \ --__extension__ static __inline intype __attribute__ ((__always_inline__)) \ --vld3q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ --{ \ -- __builtin_aarch64_simd_ci __o; \ -- intype ret; \ -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[0], 0); \ -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[1], 1); \ -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) __b.val[2], 2); \ -- __o = __builtin_aarch64_ld3_lane##mode ( \ -- (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \ -- ret.val[0] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 0); \ -- ret.val[1] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 1); \ -- ret.val[2] = (vtype) __builtin_aarch64_get_qregciv4si (__o, 2); \ -- return ret; \ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmov_n_s8 (int8_t __a) -+{ -+ return vdup_n_s8 (__a); - } - --__LD3_LANE_FUNC (float16x8x3_t, float16x8_t, float16_t, v8hf, hf, f16) --__LD3_LANE_FUNC (float32x4x3_t, float32x4_t, float32_t, v4sf, sf, f32) --__LD3_LANE_FUNC (float64x2x3_t, float64x2_t, float64_t, v2df, df, f64) --__LD3_LANE_FUNC (poly8x16x3_t, poly8x16_t, poly8_t, v16qi, qi, p8) --__LD3_LANE_FUNC (poly16x8x3_t, poly16x8_t, poly16_t, v8hi, hi, p16) --__LD3_LANE_FUNC (int8x16x3_t, int8x16_t, int8_t, v16qi, qi, s8) --__LD3_LANE_FUNC (int16x8x3_t, int16x8_t, int16_t, v8hi, hi, s16) --__LD3_LANE_FUNC (int32x4x3_t, int32x4_t, int32_t, v4si, si, s32) --__LD3_LANE_FUNC (int64x2x3_t, int64x2_t, int64_t, v2di, di, s64) --__LD3_LANE_FUNC (uint8x16x3_t, uint8x16_t, uint8_t, v16qi, qi, u8) --__LD3_LANE_FUNC (uint16x8x3_t, uint16x8_t, uint16_t, v8hi, hi, u16) --__LD3_LANE_FUNC (uint32x4x3_t, uint32x4_t, uint32_t, v4si, si, u32) --__LD3_LANE_FUNC (uint64x2x3_t, uint64x2_t, uint64_t, v2di, di, u64) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmov_n_s16 (int16_t __a) -+{ -+ return vdup_n_s16 (__a); -+} - --#undef __LD3_LANE_FUNC -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmov_n_s32 (int32_t __a) -+{ -+ return vdup_n_s32 (__a); -+} - --/* vld4_lane */ -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmov_n_s64 (int64_t __a) -+{ -+ return (int64x1_t) {__a}; -+} - --#define __LD4_LANE_FUNC(intype, vectype, largetype, ptrtype, mode, \ -- qmode, ptrmode, funcsuffix, signedtype) \ --__extension__ static __inline intype __attribute__ ((__always_inline__)) \ --vld4_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ --{ \ -- __builtin_aarch64_simd_xi __o; \ -- largetype __temp; \ -- __temp.val[0] = \ -- vcombine_##funcsuffix (__b.val[0], vcreate_##funcsuffix (0)); \ -- __temp.val[1] = \ -- vcombine_##funcsuffix (__b.val[1], vcreate_##funcsuffix (0)); \ -- __temp.val[2] = \ -- vcombine_##funcsuffix (__b.val[2], vcreate_##funcsuffix (0)); \ -- __temp.val[3] = \ -- vcombine_##funcsuffix (__b.val[3], vcreate_##funcsuffix (0)); \ -- __o = __builtin_aarch64_set_qregxi##qmode (__o, \ -- (signedtype) __temp.val[0], \ -- 0); \ -- __o = __builtin_aarch64_set_qregxi##qmode (__o, \ -- (signedtype) __temp.val[1], \ -- 1); \ -- __o = __builtin_aarch64_set_qregxi##qmode (__o, \ -- (signedtype) __temp.val[2], \ -- 2); \ -- __o = __builtin_aarch64_set_qregxi##qmode (__o, \ -- (signedtype) __temp.val[3], \ -- 3); \ -- __o = __builtin_aarch64_ld4_lane##mode ( \ -- (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \ -- __b.val[0] = (vectype) __builtin_aarch64_get_dregxidi (__o, 0); \ -- __b.val[1] = (vectype) __builtin_aarch64_get_dregxidi (__o, 1); \ -- __b.val[2] = (vectype) __builtin_aarch64_get_dregxidi (__o, 2); \ -- __b.val[3] = (vectype) __builtin_aarch64_get_dregxidi (__o, 3); \ -- return __b; \ -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmov_n_u8 (uint8_t __a) -+{ -+ return vdup_n_u8 (__a); - } - --/* vld4q_lane */ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmov_n_u16 (uint16_t __a) -+{ -+ return vdup_n_u16 (__a); -+} - --__LD4_LANE_FUNC (float16x4x4_t, float16x4_t, float16x8x4_t, float16_t, v4hf, -- v8hf, hf, f16, float16x8_t) --__LD4_LANE_FUNC (float32x2x4_t, float32x2_t, float32x4x4_t, float32_t, v2sf, v4sf, -- sf, f32, float32x4_t) --__LD4_LANE_FUNC (float64x1x4_t, float64x1_t, float64x2x4_t, float64_t, df, v2df, -- df, f64, float64x2_t) --__LD4_LANE_FUNC (poly8x8x4_t, poly8x8_t, poly8x16x4_t, poly8_t, v8qi, v16qi, qi, p8, -- int8x16_t) --__LD4_LANE_FUNC (poly16x4x4_t, poly16x4_t, poly16x8x4_t, poly16_t, v4hi, v8hi, hi, -- p16, int16x8_t) --__LD4_LANE_FUNC (int8x8x4_t, int8x8_t, int8x16x4_t, int8_t, v8qi, v16qi, qi, s8, -- int8x16_t) --__LD4_LANE_FUNC (int16x4x4_t, int16x4_t, int16x8x4_t, int16_t, v4hi, v8hi, hi, s16, -- int16x8_t) --__LD4_LANE_FUNC (int32x2x4_t, int32x2_t, int32x4x4_t, int32_t, v2si, v4si, si, s32, -- int32x4_t) --__LD4_LANE_FUNC (int64x1x4_t, int64x1_t, int64x2x4_t, int64_t, di, v2di, di, s64, -- int64x2_t) --__LD4_LANE_FUNC (uint8x8x4_t, uint8x8_t, uint8x16x4_t, uint8_t, v8qi, v16qi, qi, u8, -- int8x16_t) --__LD4_LANE_FUNC (uint16x4x4_t, uint16x4_t, uint16x8x4_t, uint16_t, v4hi, v8hi, hi, -- u16, int16x8_t) --__LD4_LANE_FUNC (uint32x2x4_t, uint32x2_t, uint32x4x4_t, uint32_t, v2si, v4si, si, -- u32, int32x4_t) --__LD4_LANE_FUNC (uint64x1x4_t, uint64x1_t, uint64x2x4_t, uint64_t, di, v2di, di, -- u64, int64x2_t) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmov_n_u32 (uint32_t __a) -+{ -+ return vdup_n_u32 (__a); -+} -+ -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmov_n_u64 (uint64_t __a) -+{ -+ return (uint64x1_t) {__a}; -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovq_n_f16 (float16_t __a) -+{ -+ return vdupq_n_f16 (__a); -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovq_n_f32 (float32_t __a) -+{ -+ return vdupq_n_f32 (__a); -+} -+ -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovq_n_f64 (float64_t __a) -+{ -+ return vdupq_n_f64 (__a); -+} - --#undef __LD4_LANE_FUNC -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovq_n_p8 (poly8_t __a) -+{ -+ return vdupq_n_p8 (__a); -+} - --/* vld4q_lane */ -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovq_n_p16 (poly16_t __a) -+{ -+ return vdupq_n_p16 (__a); -+} - --#define __LD4_LANE_FUNC(intype, vtype, ptrtype, mode, ptrmode, funcsuffix) \ --__extension__ static __inline intype __attribute__ ((__always_inline__)) \ --vld4q_lane_##funcsuffix (const ptrtype * __ptr, intype __b, const int __c) \ --{ \ -- __builtin_aarch64_simd_xi __o; \ -- intype ret; \ -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[0], 0); \ -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[1], 1); \ -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[2], 2); \ -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) __b.val[3], 3); \ -- __o = __builtin_aarch64_ld4_lane##mode ( \ -- (__builtin_aarch64_simd_##ptrmode *) __ptr, __o, __c); \ -- ret.val[0] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 0); \ -- ret.val[1] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 1); \ -- ret.val[2] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 2); \ -- ret.val[3] = (vtype) __builtin_aarch64_get_qregxiv4si (__o, 3); \ -- return ret; \ -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovq_n_p64 (poly64_t __a) -+{ -+ return vdupq_n_p64 (__a); - } - --__LD4_LANE_FUNC (float16x8x4_t, float16x8_t, float16_t, v8hf, hf, f16) --__LD4_LANE_FUNC (float32x4x4_t, float32x4_t, float32_t, v4sf, sf, f32) --__LD4_LANE_FUNC (float64x2x4_t, float64x2_t, float64_t, v2df, df, f64) --__LD4_LANE_FUNC (poly8x16x4_t, poly8x16_t, poly8_t, v16qi, qi, p8) --__LD4_LANE_FUNC (poly16x8x4_t, poly16x8_t, poly16_t, v8hi, hi, p16) --__LD4_LANE_FUNC (int8x16x4_t, int8x16_t, int8_t, v16qi, qi, s8) --__LD4_LANE_FUNC (int16x8x4_t, int16x8_t, int16_t, v8hi, hi, s16) --__LD4_LANE_FUNC (int32x4x4_t, int32x4_t, int32_t, v4si, si, s32) --__LD4_LANE_FUNC (int64x2x4_t, int64x2_t, int64_t, v2di, di, s64) --__LD4_LANE_FUNC (uint8x16x4_t, uint8x16_t, uint8_t, v16qi, qi, u8) --__LD4_LANE_FUNC (uint16x8x4_t, uint16x8_t, uint16_t, v8hi, hi, u16) --__LD4_LANE_FUNC (uint32x4x4_t, uint32x4_t, uint32_t, v4si, si, u32) --__LD4_LANE_FUNC (uint64x2x4_t, uint64x2_t, uint64_t, v2di, di, u64) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovq_n_s8 (int8_t __a) -+{ -+ return vdupq_n_s8 (__a); -+} - --#undef __LD4_LANE_FUNC -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovq_n_s16 (int16_t __a) -+{ -+ return vdupq_n_s16 (__a); -+} - --/* vmax */ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovq_n_s32 (int32_t __a) -+{ -+ return vdupq_n_s32 (__a); -+} - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vmax_f32 (float32x2_t __a, float32x2_t __b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovq_n_s64 (int64_t __a) - { -- return __builtin_aarch64_smax_nanv2sf (__a, __b); -+ return vdupq_n_s64 (__a); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vmax_s8 (int8x8_t __a, int8x8_t __b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovq_n_u8 (uint8_t __a) - { -- return __builtin_aarch64_smaxv8qi (__a, __b); -+ return vdupq_n_u8 (__a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vmax_s16 (int16x4_t __a, int16x4_t __b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovq_n_u16 (uint16_t __a) - { -- return __builtin_aarch64_smaxv4hi (__a, __b); -+ return vdupq_n_u16 (__a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vmax_s32 (int32x2_t __a, int32x2_t __b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovq_n_u32 (uint32_t __a) - { -- return __builtin_aarch64_smaxv2si (__a, __b); -+ return vdupq_n_u32 (__a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vmax_u8 (uint8x8_t __a, uint8x8_t __b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovq_n_u64 (uint64_t __a) - { -- return (uint8x8_t) __builtin_aarch64_umaxv8qi ((int8x8_t) __a, -- (int8x8_t) __b); -+ return vdupq_n_u64 (__a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vmax_u16 (uint16x4_t __a, uint16x4_t __b) -+/* vmul_lane */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane) - { -- return (uint16x4_t) __builtin_aarch64_umaxv4hi ((int16x4_t) __a, -- (int16x4_t) __b); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vmax_u32 (uint32x2_t __a, uint32x2_t __b) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane) - { -- return (uint32x2_t) __builtin_aarch64_umaxv2si ((int32x2_t) __a, -- (int32x2_t) __b); -+ return __a * __b; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vmaxq_f32 (float32x4_t __a, float32x4_t __b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane) - { -- return __builtin_aarch64_smax_nanv4sf (__a, __b); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vmaxq_f64 (float64x2_t __a, float64x2_t __b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane) - { -- return __builtin_aarch64_smax_nanv2df (__a, __b); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vmaxq_s8 (int8x16_t __a, int8x16_t __b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane) - { -- return __builtin_aarch64_smaxv16qi (__a, __b); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmaxq_s16 (int16x8_t __a, int16x8_t __b) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane) - { -- return __builtin_aarch64_smaxv8hi (__a, __b); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmaxq_s32 (int32x4_t __a, int32x4_t __b) -+/* vmuld_lane */ -+ -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmuld_lane_f64 (float64_t __a, float64x1_t __b, const int __lane) - { -- return __builtin_aarch64_smaxv4si (__a, __b); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vmaxq_u8 (uint8x16_t __a, uint8x16_t __b) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmuld_laneq_f64 (float64_t __a, float64x2_t __b, const int __lane) - { -- return (uint8x16_t) __builtin_aarch64_umaxv16qi ((int8x16_t) __a, -- (int8x16_t) __b); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmaxq_u16 (uint16x8_t __a, uint16x8_t __b) -+/* vmuls_lane */ -+ -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmuls_lane_f32 (float32_t __a, float32x2_t __b, const int __lane) - { -- return (uint16x8_t) __builtin_aarch64_umaxv8hi ((int16x8_t) __a, -- (int16x8_t) __b); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmaxq_u32 (uint32x4_t __a, uint32x4_t __b) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmuls_laneq_f32 (float32_t __a, float32x4_t __b, const int __lane) - { -- return (uint32x4_t) __builtin_aarch64_umaxv4si ((int32x4_t) __a, -- (int32x4_t) __b); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } --/* vmulx */ - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vmulx_f32 (float32x2_t __a, float32x2_t __b) -+/* vmul_laneq */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane) - { -- return __builtin_aarch64_fmulxv2sf (__a, __b); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vmulxq_f32 (float32x4_t __a, float32x4_t __b) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane) - { -- return __builtin_aarch64_fmulxv4sf (__a, __b); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vmulx_f64 (float64x1_t __a, float64x1_t __b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane) - { -- return (float64x1_t) {__builtin_aarch64_fmulxdf (__a[0], __b[0])}; -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vmulxq_f64 (float64x2_t __a, float64x2_t __b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane) - { -- return __builtin_aarch64_fmulxv2df (__a, __b); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vmulxs_f32 (float32_t __a, float32_t __b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane) - { -- return __builtin_aarch64_fmulxsf (__a, __b); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vmulxd_f64 (float64_t __a, float64_t __b) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane) - { -- return __builtin_aarch64_fmulxdf (__a, __b); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vmulx_lane_f32 (float32x2_t __a, float32x2_t __v, const int __lane) -+/* vmul_n */ -+ -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_n_f64 (float64x1_t __a, float64_t __b) - { -- return vmulx_f32 (__a, __aarch64_vdup_lane_f32 (__v, __lane)); -+ return (float64x1_t) { vget_lane_f64 (__a, 0) * __b }; - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vmulx_lane_f64 (float64x1_t __a, float64x1_t __v, const int __lane) -+/* vmulq_lane */ -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane) - { -- return vmulx_f64 (__a, __aarch64_vdup_lane_f64 (__v, __lane)); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vmulxq_lane_f32 (float32x4_t __a, float32x2_t __v, const int __lane) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane) - { -- return vmulxq_f32 (__a, __aarch64_vdupq_lane_f32 (__v, __lane)); -+ __AARCH64_LANE_CHECK (__a, __lane); -+ return __a * __b[0]; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vmulxq_lane_f64 (float64x2_t __a, float64x1_t __v, const int __lane) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane) - { -- return vmulxq_f64 (__a, __aarch64_vdupq_lane_f64 (__v, __lane)); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vmulx_laneq_f32 (float32x2_t __a, float32x4_t __v, const int __lane) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane) - { -- return vmulx_f32 (__a, __aarch64_vdup_laneq_f32 (__v, __lane)); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vmulx_laneq_f64 (float64x1_t __a, float64x2_t __v, const int __lane) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane) - { -- return vmulx_f64 (__a, __aarch64_vdup_laneq_f64 (__v, __lane)); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vmulxq_laneq_f32 (float32x4_t __a, float32x4_t __v, const int __lane) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane) - { -- return vmulxq_f32 (__a, __aarch64_vdupq_laneq_f32 (__v, __lane)); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vmulxq_laneq_f64 (float64x2_t __a, float64x2_t __v, const int __lane) -+/* vmulq_laneq */ -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane) - { -- return vmulxq_f64 (__a, __aarch64_vdupq_laneq_f64 (__v, __lane)); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vmulxs_lane_f32 (float32_t __a, float32x2_t __v, const int __lane) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane) - { -- return vmulxs_f32 (__a, __aarch64_vget_lane_any (__v, __lane)); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vmulxs_laneq_f32 (float32_t __a, float32x4_t __v, const int __lane) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane) - { -- return vmulxs_f32 (__a, __aarch64_vget_lane_any (__v, __lane)); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vmulxd_lane_f64 (float64_t __a, float64x1_t __v, const int __lane) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane) - { -- return vmulxd_f64 (__a, __aarch64_vget_lane_any (__v, __lane)); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vmulxd_laneq_f64 (float64_t __a, float64x2_t __v, const int __lane) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane) - { -- return vmulxd_f64 (__a, __aarch64_vget_lane_any (__v, __lane)); -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --/* vpmax */ -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane) -+{ -+ return __a * __aarch64_vget_lane_any (__b, __lane); -+} - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vpmax_s8 (int8x8_t a, int8x8_t b) -+/* vmul_n. */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_n_f32 (float32x2_t __a, float32_t __b) - { -- return __builtin_aarch64_smaxpv8qi (a, b); -+ return __a * __b; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vpmax_s16 (int16x4_t a, int16x4_t b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_n_f32 (float32x4_t __a, float32_t __b) - { -- return __builtin_aarch64_smaxpv4hi (a, b); -+ return __a * __b; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vpmax_s32 (int32x2_t a, int32x2_t b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_n_f64 (float64x2_t __a, float64_t __b) - { -- return __builtin_aarch64_smaxpv2si (a, b); -+ return __a * __b; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vpmax_u8 (uint8x8_t a, uint8x8_t b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_n_s16 (int16x4_t __a, int16_t __b) - { -- return (uint8x8_t) __builtin_aarch64_umaxpv8qi ((int8x8_t) a, -- (int8x8_t) b); -+ return __a * __b; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vpmax_u16 (uint16x4_t a, uint16x4_t b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_n_s16 (int16x8_t __a, int16_t __b) - { -- return (uint16x4_t) __builtin_aarch64_umaxpv4hi ((int16x4_t) a, -- (int16x4_t) b); -+ return __a * __b; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vpmax_u32 (uint32x2_t a, uint32x2_t b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_n_s32 (int32x2_t __a, int32_t __b) - { -- return (uint32x2_t) __builtin_aarch64_umaxpv2si ((int32x2_t) a, -- (int32x2_t) b); -+ return __a * __b; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vpmaxq_s8 (int8x16_t a, int8x16_t b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_n_s32 (int32x4_t __a, int32_t __b) - { -- return __builtin_aarch64_smaxpv16qi (a, b); -+ return __a * __b; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vpmaxq_s16 (int16x8_t a, int16x8_t b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_n_u16 (uint16x4_t __a, uint16_t __b) - { -- return __builtin_aarch64_smaxpv8hi (a, b); -+ return __a * __b; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vpmaxq_s32 (int32x4_t a, int32x4_t b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_n_u16 (uint16x8_t __a, uint16_t __b) -+{ -+ return __a * __b; -+} -+ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_n_u32 (uint32x2_t __a, uint32_t __b) -+{ -+ return __a * __b; -+} -+ -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_n_u32 (uint32x4_t __a, uint32_t __b) -+{ -+ return __a * __b; -+} -+ -+/* vmvn */ -+ -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmvn_p8 (poly8x8_t __a) - { -- return __builtin_aarch64_smaxpv4si (a, b); -+ return (poly8x8_t) ~((int8x8_t) __a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vpmaxq_u8 (uint8x16_t a, uint8x16_t b) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmvn_s8 (int8x8_t __a) - { -- return (uint8x16_t) __builtin_aarch64_umaxpv16qi ((int8x16_t) a, -- (int8x16_t) b); -+ return ~__a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vpmaxq_u16 (uint16x8_t a, uint16x8_t b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmvn_s16 (int16x4_t __a) - { -- return (uint16x8_t) __builtin_aarch64_umaxpv8hi ((int16x8_t) a, -- (int16x8_t) b); -+ return ~__a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vpmaxq_u32 (uint32x4_t a, uint32x4_t b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmvn_s32 (int32x2_t __a) - { -- return (uint32x4_t) __builtin_aarch64_umaxpv4si ((int32x4_t) a, -- (int32x4_t) b); -+ return ~__a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vpmax_f32 (float32x2_t a, float32x2_t b) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmvn_u8 (uint8x8_t __a) - { -- return __builtin_aarch64_smax_nanpv2sf (a, b); -+ return ~__a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vpmaxq_f32 (float32x4_t a, float32x4_t b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmvn_u16 (uint16x4_t __a) - { -- return __builtin_aarch64_smax_nanpv4sf (a, b); -+ return ~__a; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vpmaxq_f64 (float64x2_t a, float64x2_t b) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmvn_u32 (uint32x2_t __a) - { -- return __builtin_aarch64_smax_nanpv2df (a, b); -+ return ~__a; - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vpmaxqd_f64 (float64x2_t a) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmvnq_p8 (poly8x16_t __a) - { -- return __builtin_aarch64_reduc_smax_nan_scal_v2df (a); -+ return (poly8x16_t) ~((int8x16_t) __a); - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vpmaxs_f32 (float32x2_t a) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmvnq_s8 (int8x16_t __a) - { -- return __builtin_aarch64_reduc_smax_nan_scal_v2sf (a); -+ return ~__a; - } - --/* vpmaxnm */ -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vpmaxnm_f32 (float32x2_t a, float32x2_t b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmvnq_s16 (int16x8_t __a) - { -- return __builtin_aarch64_smaxpv2sf (a, b); -+ return ~__a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vpmaxnmq_f32 (float32x4_t a, float32x4_t b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmvnq_s32 (int32x4_t __a) - { -- return __builtin_aarch64_smaxpv4sf (a, b); -+ return ~__a; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vpmaxnmq_f64 (float64x2_t a, float64x2_t b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmvnq_u8 (uint8x16_t __a) - { -- return __builtin_aarch64_smaxpv2df (a, b); -+ return ~__a; - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vpmaxnmqd_f64 (float64x2_t a) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmvnq_u16 (uint16x8_t __a) - { -- return __builtin_aarch64_reduc_smax_scal_v2df (a); -+ return ~__a; - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vpmaxnms_f32 (float32x2_t a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmvnq_u32 (uint32x4_t __a) - { -- return __builtin_aarch64_reduc_smax_scal_v2sf (a); -+ return ~__a; - } - --/* vpmin */ -+/* vneg */ - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vpmin_s8 (int8x8_t a, int8x8_t b) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vneg_f32 (float32x2_t __a) - { -- return __builtin_aarch64_sminpv8qi (a, b); -+ return -__a; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vpmin_s16 (int16x4_t a, int16x4_t b) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vneg_f64 (float64x1_t __a) - { -- return __builtin_aarch64_sminpv4hi (a, b); -+ return -__a; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vpmin_s32 (int32x2_t a, int32x2_t b) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vneg_s8 (int8x8_t __a) - { -- return __builtin_aarch64_sminpv2si (a, b); -+ return -__a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vpmin_u8 (uint8x8_t a, uint8x8_t b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vneg_s16 (int16x4_t __a) - { -- return (uint8x8_t) __builtin_aarch64_uminpv8qi ((int8x8_t) a, -- (int8x8_t) b); -+ return -__a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vpmin_u16 (uint16x4_t a, uint16x4_t b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vneg_s32 (int32x2_t __a) - { -- return (uint16x4_t) __builtin_aarch64_uminpv4hi ((int16x4_t) a, -- (int16x4_t) b); -+ return -__a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vpmin_u32 (uint32x2_t a, uint32x2_t b) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vneg_s64 (int64x1_t __a) - { -- return (uint32x2_t) __builtin_aarch64_uminpv2si ((int32x2_t) a, -- (int32x2_t) b); -+ return -__a; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vpminq_s8 (int8x16_t a, int8x16_t b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vnegq_f32 (float32x4_t __a) - { -- return __builtin_aarch64_sminpv16qi (a, b); -+ return -__a; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vpminq_s16 (int16x8_t a, int16x8_t b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vnegq_f64 (float64x2_t __a) - { -- return __builtin_aarch64_sminpv8hi (a, b); -+ return -__a; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vpminq_s32 (int32x4_t a, int32x4_t b) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vnegq_s8 (int8x16_t __a) - { -- return __builtin_aarch64_sminpv4si (a, b); -+ return -__a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vpminq_u8 (uint8x16_t a, uint8x16_t b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vnegq_s16 (int16x8_t __a) - { -- return (uint8x16_t) __builtin_aarch64_uminpv16qi ((int8x16_t) a, -- (int8x16_t) b); -+ return -__a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vpminq_u16 (uint16x8_t a, uint16x8_t b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vnegq_s32 (int32x4_t __a) - { -- return (uint16x8_t) __builtin_aarch64_uminpv8hi ((int16x8_t) a, -- (int16x8_t) b); -+ return -__a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vpminq_u32 (uint32x4_t a, uint32x4_t b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vnegq_s64 (int64x2_t __a) - { -- return (uint32x4_t) __builtin_aarch64_uminpv4si ((int32x4_t) a, -- (int32x4_t) b); -+ return -__a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vpmin_f32 (float32x2_t a, float32x2_t b) -+/* vpadd */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpadd_f32 (float32x2_t __a, float32x2_t __b) - { -- return __builtin_aarch64_smin_nanpv2sf (a, b); -+ return __builtin_aarch64_faddpv2sf (__a, __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vpminq_f32 (float32x4_t a, float32x4_t b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddq_f32 (float32x4_t __a, float32x4_t __b) - { -- return __builtin_aarch64_smin_nanpv4sf (a, b); -+ return __builtin_aarch64_faddpv4sf (__a, __b); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vpminq_f64 (float64x2_t a, float64x2_t b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddq_f64 (float64x2_t __a, float64x2_t __b) - { -- return __builtin_aarch64_smin_nanpv2df (a, b); -+ return __builtin_aarch64_faddpv2df (__a, __b); - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vpminqd_f64 (float64x2_t a) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpadd_s8 (int8x8_t __a, int8x8_t __b) - { -- return __builtin_aarch64_reduc_smin_nan_scal_v2df (a); -+ return __builtin_aarch64_addpv8qi (__a, __b); - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vpmins_f32 (float32x2_t a) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpadd_s16 (int16x4_t __a, int16x4_t __b) - { -- return __builtin_aarch64_reduc_smin_nan_scal_v2sf (a); -+ return __builtin_aarch64_addpv4hi (__a, __b); - } - --/* vpminnm */ -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vpminnm_f32 (float32x2_t a, float32x2_t b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpadd_s32 (int32x2_t __a, int32x2_t __b) - { -- return __builtin_aarch64_sminpv2sf (a, b); -+ return __builtin_aarch64_addpv2si (__a, __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vpminnmq_f32 (float32x4_t a, float32x4_t b) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpadd_u8 (uint8x8_t __a, uint8x8_t __b) - { -- return __builtin_aarch64_sminpv4sf (a, b); -+ return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a, -+ (int8x8_t) __b); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vpminnmq_f64 (float64x2_t a, float64x2_t b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpadd_u16 (uint16x4_t __a, uint16x4_t __b) - { -- return __builtin_aarch64_sminpv2df (a, b); -+ return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a, -+ (int16x4_t) __b); - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vpminnmqd_f64 (float64x2_t a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpadd_u32 (uint32x2_t __a, uint32x2_t __b) - { -- return __builtin_aarch64_reduc_smin_scal_v2df (a); -+ return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a, -+ (int32x2_t) __b); - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vpminnms_f32 (float32x2_t a) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpadds_f32 (float32x2_t __a) - { -- return __builtin_aarch64_reduc_smin_scal_v2sf (a); -+ return __builtin_aarch64_reduc_plus_scal_v2sf (__a); - } - --/* vmaxnm */ -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vmaxnm_f32 (float32x2_t __a, float32x2_t __b) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddd_f64 (float64x2_t __a) - { -- return __builtin_aarch64_fmaxv2sf (__a, __b); -+ return __builtin_aarch64_reduc_plus_scal_v2df (__a); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vmaxnmq_f32 (float32x4_t __a, float32x4_t __b) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddd_s64 (int64x2_t __a) - { -- return __builtin_aarch64_fmaxv4sf (__a, __b); -+ return __builtin_aarch64_addpdi (__a); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vmaxnmq_f64 (float64x2_t __a, float64x2_t __b) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddd_u64 (uint64x2_t __a) - { -- return __builtin_aarch64_fmaxv2df (__a, __b); -+ return __builtin_aarch64_addpdi ((int64x2_t) __a); - } - --/* vmaxv */ -+/* vqabs */ - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vmaxv_f32 (float32x2_t __a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqabsq_s64 (int64x2_t __a) - { -- return __builtin_aarch64_reduc_smax_nan_scal_v2sf (__a); -+ return (int64x2_t) __builtin_aarch64_sqabsv2di (__a); - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vmaxv_s8 (int8x8_t __a) -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqabsb_s8 (int8_t __a) - { -- return __builtin_aarch64_reduc_smax_scal_v8qi (__a); -+ return (int8_t) __builtin_aarch64_sqabsqi (__a); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vmaxv_s16 (int16x4_t __a) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqabsh_s16 (int16_t __a) - { -- return __builtin_aarch64_reduc_smax_scal_v4hi (__a); -+ return (int16_t) __builtin_aarch64_sqabshi (__a); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vmaxv_s32 (int32x2_t __a) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqabss_s32 (int32_t __a) - { -- return __builtin_aarch64_reduc_smax_scal_v2si (__a); -+ return (int32_t) __builtin_aarch64_sqabssi (__a); - } - --__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) --vmaxv_u8 (uint8x8_t __a) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqabsd_s64 (int64_t __a) - { -- return __builtin_aarch64_reduc_umax_scal_v8qi_uu (__a); -+ return __builtin_aarch64_sqabsdi (__a); - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) --vmaxv_u16 (uint16x4_t __a) --{ -- return __builtin_aarch64_reduc_umax_scal_v4hi_uu (__a); --} -+/* vqadd */ - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vmaxv_u32 (uint32x2_t __a) -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqaddb_s8 (int8_t __a, int8_t __b) - { -- return __builtin_aarch64_reduc_umax_scal_v2si_uu (__a); -+ return (int8_t) __builtin_aarch64_sqaddqi (__a, __b); - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vmaxvq_f32 (float32x4_t __a) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqaddh_s16 (int16_t __a, int16_t __b) - { -- return __builtin_aarch64_reduc_smax_nan_scal_v4sf (__a); -+ return (int16_t) __builtin_aarch64_sqaddhi (__a, __b); - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vmaxvq_f64 (float64x2_t __a) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqadds_s32 (int32_t __a, int32_t __b) - { -- return __builtin_aarch64_reduc_smax_nan_scal_v2df (__a); -+ return (int32_t) __builtin_aarch64_sqaddsi (__a, __b); - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vmaxvq_s8 (int8x16_t __a) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqaddd_s64 (int64_t __a, int64_t __b) - { -- return __builtin_aarch64_reduc_smax_scal_v16qi (__a); -+ return __builtin_aarch64_sqadddi (__a, __b); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vmaxvq_s16 (int16x8_t __a) -+__extension__ extern __inline uint8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqaddb_u8 (uint8_t __a, uint8_t __b) - { -- return __builtin_aarch64_reduc_smax_scal_v8hi (__a); -+ return (uint8_t) __builtin_aarch64_uqaddqi_uuu (__a, __b); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vmaxvq_s32 (int32x4_t __a) -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqaddh_u16 (uint16_t __a, uint16_t __b) - { -- return __builtin_aarch64_reduc_smax_scal_v4si (__a); -+ return (uint16_t) __builtin_aarch64_uqaddhi_uuu (__a, __b); - } - --__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) --vmaxvq_u8 (uint8x16_t __a) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqadds_u32 (uint32_t __a, uint32_t __b) - { -- return __builtin_aarch64_reduc_umax_scal_v16qi_uu (__a); -+ return (uint32_t) __builtin_aarch64_uqaddsi_uuu (__a, __b); - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) --vmaxvq_u16 (uint16x8_t __a) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqaddd_u64 (uint64_t __a, uint64_t __b) - { -- return __builtin_aarch64_reduc_umax_scal_v8hi_uu (__a); -+ return __builtin_aarch64_uqadddi_uuu (__a, __b); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vmaxvq_u32 (uint32x4_t __a) -+/* vqdmlal */ -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) - { -- return __builtin_aarch64_reduc_umax_scal_v4si_uu (__a); -+ return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c); - } - --/* vmaxnmv */ -- --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vmaxnmv_f32 (float32x2_t __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c) - { -- return __builtin_aarch64_reduc_smax_scal_v2sf (__a); -+ return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c); - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vmaxnmvq_f32 (float32x4_t __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c, -+ int const __d) - { -- return __builtin_aarch64_reduc_smax_scal_v4sf (__a); -+ return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d); - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vmaxnmvq_f64 (float64x2_t __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c, -+ int const __d) - { -- return __builtin_aarch64_reduc_smax_scal_v2df (__a); -+ return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d); - } - --/* vmin */ -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vmin_f32 (float32x2_t __a, float32x2_t __b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c) - { -- return __builtin_aarch64_smin_nanv2sf (__a, __b); -+ return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vmin_s8 (int8x8_t __a, int8x8_t __b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d) - { -- return __builtin_aarch64_sminv8qi (__a, __b); -+ return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vmin_s16 (int16x4_t __a, int16x4_t __b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d) - { -- return __builtin_aarch64_sminv4hi (__a, __b); -+ return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vmin_s32 (int32x2_t __a, int32x2_t __b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) - { -- return __builtin_aarch64_sminv2si (__a, __b); -+ return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vmin_u8 (uint8x8_t __a, uint8x8_t __b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) - { -- return (uint8x8_t) __builtin_aarch64_uminv8qi ((int8x8_t) __a, -- (int8x8_t) __b); -+ return __builtin_aarch64_sqdmlalv2si (__a, __b, __c); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vmin_u16 (uint16x4_t __a, uint16x4_t __b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c) - { -- return (uint16x4_t) __builtin_aarch64_uminv4hi ((int16x4_t) __a, -- (int16x4_t) __b); -+ return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vmin_u32 (uint32x2_t __a, uint32x2_t __b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c, -+ int const __d) - { -- return (uint32x2_t) __builtin_aarch64_uminv2si ((int32x2_t) __a, -- (int32x2_t) __b); -+ return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vminq_f32 (float32x4_t __a, float32x4_t __b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c, -+ int const __d) - { -- return __builtin_aarch64_smin_nanv4sf (__a, __b); -+ return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vminq_f64 (float64x2_t __a, float64x2_t __b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c) - { -- return __builtin_aarch64_smin_nanv2df (__a, __b); -+ return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vminq_s8 (int8x16_t __a, int8x16_t __b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d) - { -- return __builtin_aarch64_sminv16qi (__a, __b); -+ return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vminq_s16 (int16x8_t __a, int16x8_t __b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d) - { -- return __builtin_aarch64_sminv8hi (__a, __b); -+ return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vminq_s32 (int32x4_t __a, int32x4_t __b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) - { -- return __builtin_aarch64_sminv4si (__a, __b); -+ return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vminq_u8 (uint8x16_t __a, uint8x16_t __b) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlalh_s16 (int32_t __a, int16_t __b, int16_t __c) - { -- return (uint8x16_t) __builtin_aarch64_uminv16qi ((int8x16_t) __a, -- (int8x16_t) __b); -+ return __builtin_aarch64_sqdmlalhi (__a, __b, __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vminq_u16 (uint16x8_t __a, uint16x8_t __b) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlalh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d) - { -- return (uint16x8_t) __builtin_aarch64_uminv8hi ((int16x8_t) __a, -- (int16x8_t) __b); -+ return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vminq_u32 (uint32x4_t __a, uint32x4_t __b) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlalh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d) - { -- return (uint32x4_t) __builtin_aarch64_uminv4si ((int32x4_t) __a, -- (int32x4_t) __b); -+ return __builtin_aarch64_sqdmlal_laneqhi (__a, __b, __c, __d); - } - --/* vminnm */ -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vminnm_f32 (float32x2_t __a, float32x2_t __b) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlals_s32 (int64_t __a, int32_t __b, int32_t __c) - { -- return __builtin_aarch64_fminv2sf (__a, __b); -+ return __builtin_aarch64_sqdmlalsi (__a, __b, __c); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vminnmq_f32 (float32x4_t __a, float32x4_t __b) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlals_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d) - { -- return __builtin_aarch64_fminv4sf (__a, __b); -+ return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vminnmq_f64 (float64x2_t __a, float64x2_t __b) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlals_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d) - { -- return __builtin_aarch64_fminv2df (__a, __b); -+ return __builtin_aarch64_sqdmlal_laneqsi (__a, __b, __c, __d); - } - --/* vminv */ -+/* vqdmlsl */ - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vminv_f32 (float32x2_t __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) - { -- return __builtin_aarch64_reduc_smin_nan_scal_v2sf (__a); -+ return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c); - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vminv_s8 (int8x8_t __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c) - { -- return __builtin_aarch64_reduc_smin_scal_v8qi (__a); -+ return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vminv_s16 (int16x4_t __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c, -+ int const __d) - { -- return __builtin_aarch64_reduc_smin_scal_v4hi (__a); -+ return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vminv_s32 (int32x2_t __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c, -+ int const __d) - { -- return __builtin_aarch64_reduc_smin_scal_v2si (__a); -+ return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d); - } - --__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) --vminv_u8 (uint8x8_t __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c) - { -- return __builtin_aarch64_reduc_umin_scal_v8qi_uu (__a); -+ return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c); - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) --vminv_u16 (uint16x4_t __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d) - { -- return __builtin_aarch64_reduc_umin_scal_v4hi_uu (__a); -+ return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vminv_u32 (uint32x2_t __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d) - { -- return __builtin_aarch64_reduc_umin_scal_v2si_uu (__a); -+ return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d); - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vminvq_f32 (float32x4_t __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) - { -- return __builtin_aarch64_reduc_smin_nan_scal_v4sf (__a); -+ return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c); - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vminvq_f64 (float64x2_t __a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) - { -- return __builtin_aarch64_reduc_smin_nan_scal_v2df (__a); -+ return __builtin_aarch64_sqdmlslv2si (__a, __b, __c); - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vminvq_s8 (int8x16_t __a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c) - { -- return __builtin_aarch64_reduc_smin_scal_v16qi (__a); -+ return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vminvq_s16 (int16x8_t __a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c, -+ int const __d) - { -- return __builtin_aarch64_reduc_smin_scal_v8hi (__a); -+ return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vminvq_s32 (int32x4_t __a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c, -+ int const __d) - { -- return __builtin_aarch64_reduc_smin_scal_v4si (__a); -+ return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d); - } - --__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) --vminvq_u8 (uint8x16_t __a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c) - { -- return __builtin_aarch64_reduc_umin_scal_v16qi_uu (__a); -+ return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c); - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) --vminvq_u16 (uint16x8_t __a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d) - { -- return __builtin_aarch64_reduc_umin_scal_v8hi_uu (__a); -+ return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vminvq_u32 (uint32x4_t __a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d) - { -- return __builtin_aarch64_reduc_umin_scal_v4si_uu (__a); -+ return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d); - } - --/* vminnmv */ -- --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vminnmv_f32 (float32x2_t __a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) - { -- return __builtin_aarch64_reduc_smin_scal_v2sf (__a); -+ return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c); - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vminnmvq_f32 (float32x4_t __a) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlslh_s16 (int32_t __a, int16_t __b, int16_t __c) - { -- return __builtin_aarch64_reduc_smin_scal_v4sf (__a); -+ return __builtin_aarch64_sqdmlslhi (__a, __b, __c); - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vminnmvq_f64 (float64x2_t __a) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlslh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d) - { -- return __builtin_aarch64_reduc_smin_scal_v2df (__a); -+ return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d); - } - --/* vmla */ -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlslh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d) - { -- return a + b * c; -+ return __builtin_aarch64_sqdmlsl_laneqhi (__a, __b, __c, __d); - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vmla_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlsls_s32 (int64_t __a, int32_t __b, int32_t __c) - { -- return __a + __b * __c; -+ return __builtin_aarch64_sqdmlslsi (__a, __b, __c); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlsls_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d) - { -- return a + b * c; -+ return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vmlaq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmlsls_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d) - { -- return a + b * c; -+ return __builtin_aarch64_sqdmlsl_laneqsi (__a, __b, __c, __d); - } - --/* vmla_lane */ -+/* vqdmulh */ - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vmla_lane_f32 (float32x2_t __a, float32x2_t __b, -- float32x2_t __c, const int __lane) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) - { -- return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vmla_lane_s16 (int16x4_t __a, int16x4_t __b, -- int16x4_t __c, const int __lane) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) - { -- return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vmla_lane_s32 (int32x2_t __a, int32x2_t __b, -- int32x2_t __c, const int __lane) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c) - { -- return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b, -- uint16x4_t __c, const int __lane) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) - { -- return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b, -- uint32x2_t __c, const int __lane) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmulhh_s16 (int16_t __a, int16_t __b) - { -- return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return (int16_t) __builtin_aarch64_sqdmulhhi (__a, __b); - } - --/* vmla_laneq */ -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c) -+{ -+ return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c); -+} - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vmla_laneq_f32 (float32x2_t __a, float32x2_t __b, -- float32x4_t __c, const int __lane) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c) - { -- return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmulh_laneqhi (__a, __b, __c); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vmla_laneq_s16 (int16x4_t __a, int16x4_t __b, -- int16x8_t __c, const int __lane) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmulhs_s32 (int32_t __a, int32_t __b) - { -- return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return (int32_t) __builtin_aarch64_sqdmulhsi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vmla_laneq_s32 (int32x2_t __a, int32x2_t __b, -- int32x4_t __c, const int __lane) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c) - { -- return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vmla_laneq_u16 (uint16x4_t __a, uint16x4_t __b, -- uint16x8_t __c, const int __lane) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c) - { -- return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmulh_laneqsi (__a, __b, __c); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vmla_laneq_u32 (uint32x2_t __a, uint32x2_t __b, -- uint32x4_t __c, const int __lane) -+/* vqdmull */ -+ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmull_s16 (int16x4_t __a, int16x4_t __b) - { -- return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmullv4hi (__a, __b); - } - --/* vmlaq_lane */ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmull_high_s16 (int16x8_t __a, int16x8_t __b) -+{ -+ return __builtin_aarch64_sqdmull2v8hi (__a, __b); -+} - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b, -- float32x2_t __c, const int __lane) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c) - { -- return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b, -- int16x4_t __c, const int __lane) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c) - { -- return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b, -- int32x2_t __c, const int __lane) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmull_high_n_s16 (int16x8_t __a, int16_t __b) - { -- return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmull2_nv8hi (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b, -- uint16x4_t __c, const int __lane) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c) - { -- return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b, -- uint32x2_t __c, const int __lane) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c) - { -- return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c); - } - -- /* vmlaq_laneq */ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmull_n_s16 (int16x4_t __a, int16_t __b) -+{ -+ return __builtin_aarch64_sqdmull_nv4hi (__a, __b); -+} - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vmlaq_laneq_f32 (float32x4_t __a, float32x4_t __b, -- float32x4_t __c, const int __lane) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmull_s32 (int32x2_t __a, int32x2_t __b) - { -- return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmullv2si (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmlaq_laneq_s16 (int16x8_t __a, int16x8_t __b, -- int16x8_t __c, const int __lane) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmull_high_s32 (int32x4_t __a, int32x4_t __b) - { -- return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmull2v4si (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmlaq_laneq_s32 (int32x4_t __a, int32x4_t __b, -- int32x4_t __c, const int __lane) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c) - { -- return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmlaq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, -- uint16x8_t __c, const int __lane) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c) - { -- return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmlaq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, -- uint32x4_t __c, const int __lane) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmull_high_n_s32 (int32x4_t __a, int32_t __b) - { -- return (__a + (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmull2_nv4si (__a, __b); - } - --/* vmls */ -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c) - { -- return a - b * c; -+ return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c); - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vmls_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c) - { -- return __a - __b * __c; -+ return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmull_n_s32 (int32x2_t __a, int32_t __b) - { -- return a - b * c; -+ return __builtin_aarch64_sqdmull_nv2si (__a, __b); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vmlsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmullh_s16 (int16_t __a, int16_t __b) - { -- return a - b * c; -+ return (int32_t) __builtin_aarch64_sqdmullhi (__a, __b); - } - --/* vmls_lane */ -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vmls_lane_f32 (float32x2_t __a, float32x2_t __b, -- float32x2_t __c, const int __lane) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmullh_lane_s16 (int16_t __a, int16x4_t __b, const int __c) - { -- return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vmls_lane_s16 (int16x4_t __a, int16x4_t __b, -- int16x4_t __c, const int __lane) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmullh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c) - { -- return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmull_laneqhi (__a, __b, __c); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vmls_lane_s32 (int32x2_t __a, int32x2_t __b, -- int32x2_t __c, const int __lane) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmulls_s32 (int32_t __a, int32_t __b) - { -- return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmullsi (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b, -- uint16x4_t __c, const int __lane) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmulls_lane_s32 (int32_t __a, int32x2_t __b, const int __c) - { -- return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b, -- uint32x2_t __c, const int __lane) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqdmulls_laneq_s32 (int32_t __a, int32x4_t __b, const int __c) - { -- return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return __builtin_aarch64_sqdmull_laneqsi (__a, __b, __c); - } - --/* vmls_laneq */ -+/* vqmovn */ - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vmls_laneq_f32 (float32x2_t __a, float32x2_t __b, -- float32x4_t __c, const int __lane) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovn_s16 (int16x8_t __a) - { -- return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vmls_laneq_s16 (int16x4_t __a, int16x4_t __b, -- int16x8_t __c, const int __lane) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovn_s32 (int32x4_t __a) - { -- return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vmls_laneq_s32 (int32x2_t __a, int32x2_t __b, -- int32x4_t __c, const int __lane) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovn_s64 (int64x2_t __a) - { -- return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vmls_laneq_u16 (uint16x4_t __a, uint16x4_t __b, -- uint16x8_t __c, const int __lane) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovn_u16 (uint16x8_t __a) - { -- return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vmls_laneq_u32 (uint32x2_t __a, uint32x2_t __b, -- uint32x4_t __c, const int __lane) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovn_u32 (uint32x4_t __a) - { -- return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a); - } - --/* vmlsq_lane */ -- --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b, -- float32x2_t __c, const int __lane) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovn_u64 (uint64x2_t __a) - { -- return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b, -- int16x4_t __c, const int __lane) -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovnh_s16 (int16_t __a) - { -- return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return (int8_t) __builtin_aarch64_sqmovnhi (__a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b, -- int32x2_t __c, const int __lane) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovns_s32 (int32_t __a) - { -- return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return (int16_t) __builtin_aarch64_sqmovnsi (__a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b, -- uint16x4_t __c, const int __lane) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovnd_s64 (int64_t __a) - { -- return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return (int32_t) __builtin_aarch64_sqmovndi (__a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b, -- uint32x2_t __c, const int __lane) -+__extension__ extern __inline uint8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovnh_u16 (uint16_t __a) - { -- return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return (uint8_t) __builtin_aarch64_uqmovnhi (__a); - } - -- /* vmlsq_laneq */ -- --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vmlsq_laneq_f32 (float32x4_t __a, float32x4_t __b, -- float32x4_t __c, const int __lane) -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovns_u32 (uint32_t __a) - { -- return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return (uint16_t) __builtin_aarch64_uqmovnsi (__a); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmlsq_laneq_s16 (int16x8_t __a, int16x8_t __b, -- int16x8_t __c, const int __lane) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovnd_u64 (uint64_t __a) - { -- return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return (uint32_t) __builtin_aarch64_uqmovndi (__a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmlsq_laneq_s32 (int32x4_t __a, int32x4_t __b, -- int32x4_t __c, const int __lane) -+/* vqmovun */ -+ -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovun_s16 (int16x8_t __a) - { -- return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a); - } --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmlsq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, -- uint16x8_t __c, const int __lane) -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovun_s32 (int32x4_t __a) - { -- return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmlsq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, -- uint32x4_t __c, const int __lane) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovun_s64 (int64x2_t __a) - { -- return (__a - (__b * __aarch64_vget_lane_any (__c, __lane))); -+ return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a); - } - --/* vmov_n_ */ -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vmov_n_f32 (float32_t __a) -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovunh_s16 (int16_t __a) - { -- return vdup_n_f32 (__a); -+ return (int8_t) __builtin_aarch64_sqmovunhi (__a); - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vmov_n_f64 (float64_t __a) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovuns_s32 (int32_t __a) - { -- return (float64x1_t) {__a}; -+ return (int16_t) __builtin_aarch64_sqmovunsi (__a); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vmov_n_p8 (poly8_t __a) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqmovund_s64 (int64_t __a) - { -- return vdup_n_p8 (__a); -+ return (int32_t) __builtin_aarch64_sqmovundi (__a); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vmov_n_p16 (poly16_t __a) -+/* vqneg */ -+ -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqnegq_s64 (int64x2_t __a) - { -- return vdup_n_p16 (__a); -+ return (int64x2_t) __builtin_aarch64_sqnegv2di (__a); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vmov_n_s8 (int8_t __a) -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqnegb_s8 (int8_t __a) - { -- return vdup_n_s8 (__a); -+ return (int8_t) __builtin_aarch64_sqnegqi (__a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vmov_n_s16 (int16_t __a) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqnegh_s16 (int16_t __a) - { -- return vdup_n_s16 (__a); -+ return (int16_t) __builtin_aarch64_sqneghi (__a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vmov_n_s32 (int32_t __a) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqnegs_s32 (int32_t __a) - { -- return vdup_n_s32 (__a); -+ return (int32_t) __builtin_aarch64_sqnegsi (__a); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vmov_n_s64 (int64_t __a) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqnegd_s64 (int64_t __a) - { -- return (int64x1_t) {__a}; -+ return __builtin_aarch64_sqnegdi (__a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vmov_n_u8 (uint8_t __a) -+/* vqrdmulh */ -+ -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) - { -- return vdup_n_u8 (__a); -+ return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vmov_n_u16 (uint16_t __a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) - { -- return vdup_n_u16 (__a); -+ return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vmov_n_u32 (uint32_t __a) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c) - { -- return vdup_n_u32 (__a); -+ return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vmov_n_u64 (uint64_t __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) - { -- return (uint64x1_t) {__a}; -+ return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vmovq_n_f32 (float32_t __a) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmulhh_s16 (int16_t __a, int16_t __b) - { -- return vdupq_n_f32 (__a); -+ return (int16_t) __builtin_aarch64_sqrdmulhhi (__a, __b); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vmovq_n_f64 (float64_t __a) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c) - { -- return vdupq_n_f64 (__a); -+ return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vmovq_n_p8 (poly8_t __a) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c) - { -- return vdupq_n_p8 (__a); -+ return __builtin_aarch64_sqrdmulh_laneqhi (__a, __b, __c); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vmovq_n_p16 (poly16_t __a) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmulhs_s32 (int32_t __a, int32_t __b) - { -- return vdupq_n_p16 (__a); -+ return (int32_t) __builtin_aarch64_sqrdmulhsi (__a, __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vmovq_n_s8 (int8_t __a) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c) - { -- return vdupq_n_s8 (__a); -+ return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmovq_n_s16 (int16_t __a) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c) - { -- return vdupq_n_s16 (__a); -+ return __builtin_aarch64_sqrdmulh_laneqsi (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmovq_n_s32 (int32_t __a) -+/* vqrshl */ -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshl_s8 (int8x8_t __a, int8x8_t __b) - { -- return vdupq_n_s32 (__a); -+ return __builtin_aarch64_sqrshlv8qi (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vmovq_n_s64 (int64_t __a) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshl_s16 (int16x4_t __a, int16x4_t __b) - { -- return vdupq_n_s64 (__a); -+ return __builtin_aarch64_sqrshlv4hi (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vmovq_n_u8 (uint8_t __a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshl_s32 (int32x2_t __a, int32x2_t __b) - { -- return vdupq_n_u8 (__a); -+ return __builtin_aarch64_sqrshlv2si (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmovq_n_u16 (uint16_t __a) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshl_s64 (int64x1_t __a, int64x1_t __b) - { -- return vdupq_n_u16 (__a); -+ return (int64x1_t) {__builtin_aarch64_sqrshldi (__a[0], __b[0])}; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmovq_n_u32 (uint32_t __a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshl_u8 (uint8x8_t __a, int8x8_t __b) - { -- return vdupq_n_u32 (__a); -+ return __builtin_aarch64_uqrshlv8qi_uus ( __a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vmovq_n_u64 (uint64_t __a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshl_u16 (uint16x4_t __a, int16x4_t __b) - { -- return vdupq_n_u64 (__a); -+ return __builtin_aarch64_uqrshlv4hi_uus ( __a, __b); - } - --/* vmul_lane */ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshl_u32 (uint32x2_t __a, int32x2_t __b) -+{ -+ return __builtin_aarch64_uqrshlv2si_uus ( __a, __b); -+} - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __lane) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshl_u64 (uint64x1_t __a, int64x1_t __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return (uint64x1_t) {__builtin_aarch64_uqrshldi_uus (__a[0], __b[0])}; - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vmul_lane_f64 (float64x1_t __a, float64x1_t __b, const int __lane) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshlq_s8 (int8x16_t __a, int8x16_t __b) - { -- return __a * __b; -+ return __builtin_aarch64_sqrshlv16qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __lane) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshlq_s16 (int16x8_t __a, int16x8_t __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return __builtin_aarch64_sqrshlv8hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __lane) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshlq_s32 (int32x4_t __a, int32x4_t __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return __builtin_aarch64_sqrshlv4si (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __lane) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshlq_s64 (int64x2_t __a, int64x2_t __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return __builtin_aarch64_sqrshlv2di (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshlq_u8 (uint8x16_t __a, int8x16_t __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return __builtin_aarch64_uqrshlv16qi_uus ( __a, __b); - } - --/* vmuld_lane */ -- --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vmuld_lane_f64 (float64_t __a, float64x1_t __b, const int __lane) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshlq_u16 (uint16x8_t __a, int16x8_t __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return __builtin_aarch64_uqrshlv8hi_uus ( __a, __b); - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vmuld_laneq_f64 (float64_t __a, float64x2_t __b, const int __lane) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshlq_u32 (uint32x4_t __a, int32x4_t __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return __builtin_aarch64_uqrshlv4si_uus ( __a, __b); - } - --/* vmuls_lane */ -- --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vmuls_lane_f32 (float32_t __a, float32x2_t __b, const int __lane) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshlq_u64 (uint64x2_t __a, int64x2_t __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return __builtin_aarch64_uqrshlv2di_uus ( __a, __b); - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vmuls_laneq_f32 (float32_t __a, float32x4_t __b, const int __lane) -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshlb_s8 (int8_t __a, int8_t __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return __builtin_aarch64_sqrshlqi (__a, __b); - } - --/* vmul_laneq */ -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vmul_laneq_f32 (float32x2_t __a, float32x4_t __b, const int __lane) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshlh_s16 (int16_t __a, int16_t __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return __builtin_aarch64_sqrshlhi (__a, __b); - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vmul_laneq_f64 (float64x1_t __a, float64x2_t __b, const int __lane) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshls_s32 (int32_t __a, int32_t __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return __builtin_aarch64_sqrshlsi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vmul_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __lane) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshld_s64 (int64_t __a, int64_t __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return __builtin_aarch64_sqrshldi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vmul_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __lane) -+__extension__ extern __inline uint8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshlb_u8 (uint8_t __a, uint8_t __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return __builtin_aarch64_uqrshlqi_uus (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vmul_laneq_u16 (uint16x4_t __a, uint16x8_t __b, const int __lane) -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshlh_u16 (uint16_t __a, uint16_t __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return __builtin_aarch64_uqrshlhi_uus (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshls_u32 (uint32_t __a, uint32_t __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return __builtin_aarch64_uqrshlsi_uus (__a, __b); - } - --/* vmul_n */ -- --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vmul_n_f64 (float64x1_t __a, float64_t __b) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshld_u64 (uint64_t __a, uint64_t __b) - { -- return (float64x1_t) { vget_lane_f64 (__a, 0) * __b }; -+ return __builtin_aarch64_uqrshldi_uus (__a, __b); - } - --/* vmulq_lane */ -+/* vqrshrn */ - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __lane) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshrn_n_s16 (int16x8_t __a, const int __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vmulq_lane_f64 (float64x2_t __a, float64x1_t __b, const int __lane) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshrn_n_s32 (int32x4_t __a, const int __b) - { -- __AARCH64_LANE_CHECK (__a, __lane); -- return __a * __b[0]; -+ return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __lane) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshrn_n_s64 (int64x2_t __a, const int __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __lane) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshrn_n_u16 (uint16x8_t __a, const int __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return __builtin_aarch64_uqrshrn_nv8hi_uus ( __a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __lane) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshrn_n_u32 (uint32x4_t __a, const int __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return __builtin_aarch64_uqrshrn_nv4si_uus ( __a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __lane) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshrn_n_u64 (uint64x2_t __a, const int __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return __builtin_aarch64_uqrshrn_nv2di_uus ( __a, __b); - } - --/* vmulq_laneq */ -- --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vmulq_laneq_f32 (float32x4_t __a, float32x4_t __b, const int __lane) -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshrnh_n_s16 (int16_t __a, const int __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return (int8_t) __builtin_aarch64_sqrshrn_nhi (__a, __b); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vmulq_laneq_f64 (float64x2_t __a, float64x2_t __b, const int __lane) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshrns_n_s32 (int32_t __a, const int __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return (int16_t) __builtin_aarch64_sqrshrn_nsi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vmulq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __lane) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshrnd_n_s64 (int64_t __a, const int __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return (int32_t) __builtin_aarch64_sqrshrn_ndi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vmulq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __lane) -+__extension__ extern __inline uint8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshrnh_n_u16 (uint16_t __a, const int __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return __builtin_aarch64_uqrshrn_nhi_uus (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vmulq_laneq_u16 (uint16x8_t __a, uint16x8_t __b, const int __lane) -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshrns_n_u32 (uint32_t __a, const int __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return __builtin_aarch64_uqrshrn_nsi_uus (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshrnd_n_u64 (uint64_t __a, const int __b) - { -- return __a * __aarch64_vget_lane_any (__b, __lane); -+ return __builtin_aarch64_uqrshrn_ndi_uus (__a, __b); - } - --/* vneg */ -+/* vqrshrun */ - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vneg_f32 (float32x2_t __a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshrun_n_s16 (int16x8_t __a, const int __b) - { -- return -__a; -+ return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b); - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vneg_f64 (float64x1_t __a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshrun_n_s32 (int32x4_t __a, const int __b) - { -- return -__a; -+ return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vneg_s8 (int8x8_t __a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshrun_n_s64 (int64x2_t __a, const int __b) - { -- return -__a; -+ return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vneg_s16 (int16x4_t __a) -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshrunh_n_s16 (int16_t __a, const int __b) - { -- return -__a; -+ return (int8_t) __builtin_aarch64_sqrshrun_nhi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vneg_s32 (int32x2_t __a) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshruns_n_s32 (int32_t __a, const int __b) - { -- return -__a; -+ return (int16_t) __builtin_aarch64_sqrshrun_nsi (__a, __b); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vneg_s64 (int64x1_t __a) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqrshrund_n_s64 (int64_t __a, const int __b) - { -- return -__a; -+ return (int32_t) __builtin_aarch64_sqrshrun_ndi (__a, __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vnegq_f32 (float32x4_t __a) -+/* vqshl */ -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshl_s8 (int8x8_t __a, int8x8_t __b) - { -- return -__a; -+ return __builtin_aarch64_sqshlv8qi (__a, __b); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vnegq_f64 (float64x2_t __a) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshl_s16 (int16x4_t __a, int16x4_t __b) - { -- return -__a; -+ return __builtin_aarch64_sqshlv4hi (__a, __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vnegq_s8 (int8x16_t __a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshl_s32 (int32x2_t __a, int32x2_t __b) - { -- return -__a; -+ return __builtin_aarch64_sqshlv2si (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vnegq_s16 (int16x8_t __a) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshl_s64 (int64x1_t __a, int64x1_t __b) - { -- return -__a; -+ return (int64x1_t) {__builtin_aarch64_sqshldi (__a[0], __b[0])}; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vnegq_s32 (int32x4_t __a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshl_u8 (uint8x8_t __a, int8x8_t __b) - { -- return -__a; -+ return __builtin_aarch64_uqshlv8qi_uus ( __a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vnegq_s64 (int64x2_t __a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshl_u16 (uint16x4_t __a, int16x4_t __b) - { -- return -__a; -+ return __builtin_aarch64_uqshlv4hi_uus ( __a, __b); - } - --/* vpadd */ -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vpadd_s8 (int8x8_t __a, int8x8_t __b) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshl_u32 (uint32x2_t __a, int32x2_t __b) - { -- return __builtin_aarch64_addpv8qi (__a, __b); -+ return __builtin_aarch64_uqshlv2si_uus ( __a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vpadd_s16 (int16x4_t __a, int16x4_t __b) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshl_u64 (uint64x1_t __a, int64x1_t __b) - { -- return __builtin_aarch64_addpv4hi (__a, __b); -+ return (uint64x1_t) {__builtin_aarch64_uqshldi_uus (__a[0], __b[0])}; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vpadd_s32 (int32x2_t __a, int32x2_t __b) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlq_s8 (int8x16_t __a, int8x16_t __b) - { -- return __builtin_aarch64_addpv2si (__a, __b); -+ return __builtin_aarch64_sqshlv16qi (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vpadd_u8 (uint8x8_t __a, uint8x8_t __b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlq_s16 (int16x8_t __a, int16x8_t __b) - { -- return (uint8x8_t) __builtin_aarch64_addpv8qi ((int8x8_t) __a, -- (int8x8_t) __b); -+ return __builtin_aarch64_sqshlv8hi (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vpadd_u16 (uint16x4_t __a, uint16x4_t __b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlq_s32 (int32x4_t __a, int32x4_t __b) - { -- return (uint16x4_t) __builtin_aarch64_addpv4hi ((int16x4_t) __a, -- (int16x4_t) __b); -+ return __builtin_aarch64_sqshlv4si (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vpadd_u32 (uint32x2_t __a, uint32x2_t __b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlq_s64 (int64x2_t __a, int64x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_addpv2si ((int32x2_t) __a, -- (int32x2_t) __b); -+ return __builtin_aarch64_sqshlv2di (__a, __b); - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vpaddd_f64 (float64x2_t __a) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlq_u8 (uint8x16_t __a, int8x16_t __b) - { -- return __builtin_aarch64_reduc_plus_scal_v2df (__a); -+ return __builtin_aarch64_uqshlv16qi_uus ( __a, __b); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vpaddd_s64 (int64x2_t __a) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlq_u16 (uint16x8_t __a, int16x8_t __b) - { -- return __builtin_aarch64_addpdi (__a); -+ return __builtin_aarch64_uqshlv8hi_uus ( __a, __b); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vpaddd_u64 (uint64x2_t __a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlq_u32 (uint32x4_t __a, int32x4_t __b) - { -- return __builtin_aarch64_addpdi ((int64x2_t) __a); -+ return __builtin_aarch64_uqshlv4si_uus ( __a, __b); - } - --/* vqabs */ -- --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqabsq_s64 (int64x2_t __a) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlq_u64 (uint64x2_t __a, int64x2_t __b) - { -- return (int64x2_t) __builtin_aarch64_sqabsv2di (__a); -+ return __builtin_aarch64_uqshlv2di_uus ( __a, __b); - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vqabsb_s8 (int8_t __a) -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlb_s8 (int8_t __a, int8_t __b) - { -- return (int8_t) __builtin_aarch64_sqabsqi (__a); -+ return __builtin_aarch64_sqshlqi (__a, __b); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqabsh_s16 (int16_t __a) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlh_s16 (int16_t __a, int16_t __b) - { -- return (int16_t) __builtin_aarch64_sqabshi (__a); -+ return __builtin_aarch64_sqshlhi (__a, __b); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqabss_s32 (int32_t __a) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshls_s32 (int32_t __a, int32_t __b) - { -- return (int32_t) __builtin_aarch64_sqabssi (__a); -+ return __builtin_aarch64_sqshlsi (__a, __b); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vqabsd_s64 (int64_t __a) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshld_s64 (int64_t __a, int64_t __b) - { -- return __builtin_aarch64_sqabsdi (__a); -+ return __builtin_aarch64_sqshldi (__a, __b); - } - --/* vqadd */ -- --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vqaddb_s8 (int8_t __a, int8_t __b) -+__extension__ extern __inline uint8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlb_u8 (uint8_t __a, uint8_t __b) - { -- return (int8_t) __builtin_aarch64_sqaddqi (__a, __b); -+ return __builtin_aarch64_uqshlqi_uus (__a, __b); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqaddh_s16 (int16_t __a, int16_t __b) -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlh_u16 (uint16_t __a, uint16_t __b) - { -- return (int16_t) __builtin_aarch64_sqaddhi (__a, __b); -+ return __builtin_aarch64_uqshlhi_uus (__a, __b); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqadds_s32 (int32_t __a, int32_t __b) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshls_u32 (uint32_t __a, uint32_t __b) - { -- return (int32_t) __builtin_aarch64_sqaddsi (__a, __b); -+ return __builtin_aarch64_uqshlsi_uus (__a, __b); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vqaddd_s64 (int64_t __a, int64_t __b) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshld_u64 (uint64_t __a, uint64_t __b) - { -- return __builtin_aarch64_sqadddi (__a, __b); -+ return __builtin_aarch64_uqshldi_uus (__a, __b); - } - --__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) --vqaddb_u8 (uint8_t __a, uint8_t __b) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshl_n_s8 (int8x8_t __a, const int __b) - { -- return (uint8_t) __builtin_aarch64_uqaddqi_uuu (__a, __b); -+ return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b); - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) --vqaddh_u16 (uint16_t __a, uint16_t __b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshl_n_s16 (int16x4_t __a, const int __b) - { -- return (uint16_t) __builtin_aarch64_uqaddhi_uuu (__a, __b); -+ return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vqadds_u32 (uint32_t __a, uint32_t __b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshl_n_s32 (int32x2_t __a, const int __b) - { -- return (uint32_t) __builtin_aarch64_uqaddsi_uuu (__a, __b); -+ return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vqaddd_u64 (uint64_t __a, uint64_t __b) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshl_n_s64 (int64x1_t __a, const int __b) - { -- return __builtin_aarch64_uqadddi_uuu (__a, __b); -+ return (int64x1_t) {__builtin_aarch64_sqshl_ndi (__a[0], __b)}; - } - --/* vqdmlal */ -- --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshl_n_u8 (uint8x8_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlalv4hi (__a, __b, __c); -+ return __builtin_aarch64_uqshl_nv8qi_uus (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmlal_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshl_n_u16 (uint16x4_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlal2v8hi (__a, __b, __c); -+ return __builtin_aarch64_uqshl_nv4hi_uus (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmlal_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c, -- int const __d) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshl_n_u32 (uint32x2_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlal2_lanev8hi (__a, __b, __c, __d); -+ return __builtin_aarch64_uqshl_nv2si_uus (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmlal_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c, -- int const __d) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshl_n_u64 (uint64x1_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlal2_laneqv8hi (__a, __b, __c, __d); -+ return (uint64x1_t) {__builtin_aarch64_uqshl_ndi_uus (__a[0], __b)}; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmlal_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlq_n_s8 (int8x16_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlal2_nv8hi (__a, __b, __c); -+ return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlq_n_s16 (int16x8_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlal_lanev4hi (__a, __b, __c, __d); -+ return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmlal_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlq_n_s32 (int32x4_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlal_laneqv4hi (__a, __b, __c, __d); -+ return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlq_n_s64 (int64x2_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlal_nv4hi (__a, __b, __c); -+ return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlq_n_u8 (uint8x16_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlalv2si (__a, __b, __c); -+ return __builtin_aarch64_uqshl_nv16qi_uus (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmlal_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlq_n_u16 (uint16x8_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlal2v4si (__a, __b, __c); -+ return __builtin_aarch64_uqshl_nv8hi_uus (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmlal_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c, -- int const __d) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlq_n_u32 (uint32x4_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlal2_lanev4si (__a, __b, __c, __d); -+ return __builtin_aarch64_uqshl_nv4si_uus (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmlal_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c, -- int const __d) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlq_n_u64 (uint64x2_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlal2_laneqv4si (__a, __b, __c, __d); -+ return __builtin_aarch64_uqshl_nv2di_uus (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmlal_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c) -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlb_n_s8 (int8_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlal2_nv4si (__a, __b, __c); -+ return (int8_t) __builtin_aarch64_sqshl_nqi (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlh_n_s16 (int16_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlal_lanev2si (__a, __b, __c, __d); -+ return (int16_t) __builtin_aarch64_sqshl_nhi (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmlal_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshls_n_s32 (int32_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlal_laneqv2si (__a, __b, __c, __d); -+ return (int32_t) __builtin_aarch64_sqshl_nsi (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshld_n_s64 (int64_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlal_nv2si (__a, __b, __c); -+ return __builtin_aarch64_sqshl_ndi (__a, __b); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqdmlalh_s16 (int32_t __a, int16_t __b, int16_t __c) -+__extension__ extern __inline uint8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlb_n_u8 (uint8_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlalhi (__a, __b, __c); -+ return __builtin_aarch64_uqshl_nqi_uus (__a, __b); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqdmlalh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d) -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlh_n_u16 (uint16_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d); -+ return __builtin_aarch64_uqshl_nhi_uus (__a, __b); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqdmlalh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshls_n_u32 (uint32_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlal_laneqhi (__a, __b, __c, __d); -+ return __builtin_aarch64_uqshl_nsi_uus (__a, __b); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vqdmlals_s32 (int64_t __a, int32_t __b, int32_t __c) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshld_n_u64 (uint64_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlalsi (__a, __b, __c); -+ return __builtin_aarch64_uqshl_ndi_uus (__a, __b); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vqdmlals_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d) -+/* vqshlu */ -+ -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlu_n_s8 (int8x8_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlal_lanesi (__a, __b, __c, __d); -+ return __builtin_aarch64_sqshlu_nv8qi_uss (__a, __b); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vqdmlals_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlu_n_s16 (int16x4_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlal_laneqsi (__a, __b, __c, __d); -+ return __builtin_aarch64_sqshlu_nv4hi_uss (__a, __b); - } - --/* vqdmlsl */ -- --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlu_n_s32 (int32x2_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlslv4hi (__a, __b, __c); -+ return __builtin_aarch64_sqshlu_nv2si_uss (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmlsl_high_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlu_n_s64 (int64x1_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlsl2v8hi (__a, __b, __c); -+ return (uint64x1_t) {__builtin_aarch64_sqshlu_ndi_uss (__a[0], __b)}; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmlsl_high_lane_s16 (int32x4_t __a, int16x8_t __b, int16x4_t __c, -- int const __d) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshluq_n_s8 (int8x16_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlsl2_lanev8hi (__a, __b, __c, __d); -+ return __builtin_aarch64_sqshlu_nv16qi_uss (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmlsl_high_laneq_s16 (int32x4_t __a, int16x8_t __b, int16x8_t __c, -- int const __d) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshluq_n_s16 (int16x8_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlsl2_laneqv8hi (__a, __b, __c, __d); -+ return __builtin_aarch64_sqshlu_nv8hi_uss (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmlsl_high_n_s16 (int32x4_t __a, int16x8_t __b, int16_t __c) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshluq_n_s32 (int32x4_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlsl2_nv8hi (__a, __b, __c); -+ return __builtin_aarch64_sqshlu_nv4si_uss (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, int const __d) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshluq_n_s64 (int64x2_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlsl_lanev4hi (__a, __b, __c, __d); -+ return __builtin_aarch64_sqshlu_nv2di_uss (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmlsl_laneq_s16 (int32x4_t __a, int16x4_t __b, int16x8_t __c, int const __d) -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlub_n_s8 (int8_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlsl_laneqv4hi (__a, __b, __c, __d); -+ return (int8_t) __builtin_aarch64_sqshlu_nqi_uss (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshluh_n_s16 (int16_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlsl_nv4hi (__a, __b, __c); -+ return (int16_t) __builtin_aarch64_sqshlu_nhi_uss (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlus_n_s32 (int32_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlslv2si (__a, __b, __c); -+ return (int32_t) __builtin_aarch64_sqshlu_nsi_uss (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmlsl_high_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshlud_n_s64 (int64_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlsl2v4si (__a, __b, __c); -+ return __builtin_aarch64_sqshlu_ndi_uss (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmlsl_high_lane_s32 (int64x2_t __a, int32x4_t __b, int32x2_t __c, -- int const __d) -+/* vqshrn */ -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshrn_n_s16 (int16x8_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlsl2_lanev4si (__a, __b, __c, __d); -+ return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmlsl_high_laneq_s32 (int64x2_t __a, int32x4_t __b, int32x4_t __c, -- int const __d) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshrn_n_s32 (int32x4_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlsl2_laneqv4si (__a, __b, __c, __d); -+ return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmlsl_high_n_s32 (int64x2_t __a, int32x4_t __b, int32_t __c) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshrn_n_s64 (int64x2_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlsl2_nv4si (__a, __b, __c); -+ return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, int const __d) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshrn_n_u16 (uint16x8_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlsl_lanev2si (__a, __b, __c, __d); -+ return __builtin_aarch64_uqshrn_nv8hi_uus ( __a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmlsl_laneq_s32 (int64x2_t __a, int32x2_t __b, int32x4_t __c, int const __d) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshrn_n_u32 (uint32x4_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlsl_laneqv2si (__a, __b, __c, __d); -+ return __builtin_aarch64_uqshrn_nv4si_uus ( __a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshrn_n_u64 (uint64x2_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlsl_nv2si (__a, __b, __c); -+ return __builtin_aarch64_uqshrn_nv2di_uus ( __a, __b); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqdmlslh_s16 (int32_t __a, int16_t __b, int16_t __c) -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshrnh_n_s16 (int16_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlslhi (__a, __b, __c); -+ return (int8_t) __builtin_aarch64_sqshrn_nhi (__a, __b); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqdmlslh_lane_s16 (int32_t __a, int16_t __b, int16x4_t __c, const int __d) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshrns_n_s32 (int32_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d); -+ return (int16_t) __builtin_aarch64_sqshrn_nsi (__a, __b); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqdmlslh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshrnd_n_s64 (int64_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlsl_laneqhi (__a, __b, __c, __d); -+ return (int32_t) __builtin_aarch64_sqshrn_ndi (__a, __b); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vqdmlsls_s32 (int64_t __a, int32_t __b, int32_t __c) -+__extension__ extern __inline uint8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshrnh_n_u16 (uint16_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlslsi (__a, __b, __c); -+ return __builtin_aarch64_uqshrn_nhi_uus (__a, __b); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vqdmlsls_lane_s32 (int64_t __a, int32_t __b, int32x2_t __c, const int __d) -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshrns_n_u32 (uint32_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlsl_lanesi (__a, __b, __c, __d); -+ return __builtin_aarch64_uqshrn_nsi_uus (__a, __b); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vqdmlsls_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshrnd_n_u64 (uint64_t __a, const int __b) - { -- return __builtin_aarch64_sqdmlsl_laneqsi (__a, __b, __c, __d); -+ return __builtin_aarch64_uqshrn_ndi_uus (__a, __b); - } - --/* vqdmulh */ -+/* vqshrun */ - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshrun_n_s16 (int16x8_t __a, const int __b) - { -- return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c); -+ return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshrun_n_s32 (int32x4_t __a, const int __b) - { -- return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c); -+ return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshrun_n_s64 (int64x2_t __a, const int __b) - { -- return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c); -+ return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshrunh_n_s16 (int16_t __a, const int __b) - { -- return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c); -+ return (int8_t) __builtin_aarch64_sqshrun_nhi (__a, __b); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqdmulhh_s16 (int16_t __a, int16_t __b) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshruns_n_s32 (int32_t __a, const int __b) - { -- return (int16_t) __builtin_aarch64_sqdmulhhi (__a, __b); -+ return (int16_t) __builtin_aarch64_sqshrun_nsi (__a, __b); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqshrund_n_s64 (int64_t __a, const int __b) - { -- return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c); -+ return (int32_t) __builtin_aarch64_sqshrun_ndi (__a, __b); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c) -+/* vqsub */ -+ -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqsubb_s8 (int8_t __a, int8_t __b) - { -- return __builtin_aarch64_sqdmulh_laneqhi (__a, __b, __c); -+ return (int8_t) __builtin_aarch64_sqsubqi (__a, __b); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqdmulhs_s32 (int32_t __a, int32_t __b) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqsubh_s16 (int16_t __a, int16_t __b) - { -- return (int32_t) __builtin_aarch64_sqdmulhsi (__a, __b); -+ return (int16_t) __builtin_aarch64_sqsubhi (__a, __b); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqsubs_s32 (int32_t __a, int32_t __b) - { -- return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c); -+ return (int32_t) __builtin_aarch64_sqsubsi (__a, __b); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqsubd_s64 (int64_t __a, int64_t __b) - { -- return __builtin_aarch64_sqdmulh_laneqsi (__a, __b, __c); -+ return __builtin_aarch64_sqsubdi (__a, __b); - } - --/* vqdmull */ -- --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmull_s16 (int16x4_t __a, int16x4_t __b) -+__extension__ extern __inline uint8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqsubb_u8 (uint8_t __a, uint8_t __b) - { -- return __builtin_aarch64_sqdmullv4hi (__a, __b); -+ return (uint8_t) __builtin_aarch64_uqsubqi_uuu (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmull_high_s16 (int16x8_t __a, int16x8_t __b) -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqsubh_u16 (uint16_t __a, uint16_t __b) - { -- return __builtin_aarch64_sqdmull2v8hi (__a, __b); -+ return (uint16_t) __builtin_aarch64_uqsubhi_uuu (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmull_high_lane_s16 (int16x8_t __a, int16x4_t __b, int const __c) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqsubs_u32 (uint32_t __a, uint32_t __b) - { -- return __builtin_aarch64_sqdmull2_lanev8hi (__a, __b,__c); -+ return (uint32_t) __builtin_aarch64_uqsubsi_uuu (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmull_high_laneq_s16 (int16x8_t __a, int16x8_t __b, int const __c) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqsubd_u64 (uint64_t __a, uint64_t __b) - { -- return __builtin_aarch64_sqdmull2_laneqv8hi (__a, __b,__c); -+ return __builtin_aarch64_uqsubdi_uuu (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmull_high_n_s16 (int16x8_t __a, int16_t __b) -+/* vqtbl2 */ -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx) - { -- return __builtin_aarch64_sqdmull2_nv8hi (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1); -+ return __builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, int const __c) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx) - { -- return __builtin_aarch64_sqdmull_lanev4hi (__a, __b, __c); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -+ return (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmull_laneq_s16 (int16x4_t __a, int16x8_t __b, int const __c) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx) - { -- return __builtin_aarch64_sqdmull_laneqv4hi (__a, __b, __c); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -+ return (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqdmull_n_s16 (int16x4_t __a, int16_t __b) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx) - { -- return __builtin_aarch64_sqdmull_nv4hi (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -+ return __builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmull_s32 (int32x2_t __a, int32x2_t __b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx) - { -- return __builtin_aarch64_sqdmullv2si (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -+ return (uint8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmull_high_s32 (int32x4_t __a, int32x4_t __b) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx) - { -- return __builtin_aarch64_sqdmull2v4si (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -+ return (poly8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmull_high_lane_s32 (int32x4_t __a, int32x2_t __b, int const __c) -+/* vqtbl3 */ -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx) - { -- return __builtin_aarch64_sqdmull2_lanev4si (__a, __b, __c); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -+ return __builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmull_high_laneq_s32 (int32x4_t __a, int32x4_t __b, int const __c) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx) - { -- return __builtin_aarch64_sqdmull2_laneqv4si (__a, __b, __c); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -+ return (uint8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmull_high_n_s32 (int32x4_t __a, int32_t __b) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx) - { -- return __builtin_aarch64_sqdmull2_nv4si (__a, __b); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -+ return (poly8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, int const __c) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx) - { -- return __builtin_aarch64_sqdmull_lanev2si (__a, __b, __c); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -+ return __builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmull_laneq_s32 (int32x2_t __a, int32x4_t __b, int const __c) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx) - { -- return __builtin_aarch64_sqdmull_laneqv2si (__a, __b, __c); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -+ return (uint8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqdmull_n_s32 (int32x2_t __a, int32_t __b) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx) - { -- return __builtin_aarch64_sqdmull_nv2si (__a, __b); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -+ return (poly8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqdmullh_s16 (int16_t __a, int16_t __b) -+/* vqtbl4 */ -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx) - { -- return (int32_t) __builtin_aarch64_sqdmullhi (__a, __b); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -+ return __builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqdmullh_lane_s16 (int16_t __a, int16x4_t __b, const int __c) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx) - { -- return __builtin_aarch64_sqdmull_lanehi (__a, __b, __c); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -+ return (uint8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqdmullh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx) - { -- return __builtin_aarch64_sqdmull_laneqhi (__a, __b, __c); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -+ return (poly8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vqdmulls_s32 (int32_t __a, int32_t __b) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx) - { -- return __builtin_aarch64_sqdmullsi (__a, __b); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -+ return __builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vqdmulls_lane_s32 (int32_t __a, int32x2_t __b, const int __c) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx) - { -- return __builtin_aarch64_sqdmull_lanesi (__a, __b, __c); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -+ return (uint8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vqdmulls_laneq_s32 (int32_t __a, int32x4_t __b, const int __c) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx) - { -- return __builtin_aarch64_sqdmull_laneqsi (__a, __b, __c); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -+ return (poly8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx); - } - --/* vqmovn */ - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vqmovn_s16 (int16x8_t __a) -+/* vqtbx2 */ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx) - { -- return (int8x8_t) __builtin_aarch64_sqmovnv8hi (__a); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1); -+ return __builtin_aarch64_tbx4v8qi (r, __o, (int8x8_t)idx); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vqmovn_s32 (int32x4_t __a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx) - { -- return (int16x4_t) __builtin_aarch64_sqmovnv4si (__a); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -+ return (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)r, __o, -+ (int8x8_t)idx); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vqmovn_s64 (int64x2_t __a) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx) - { -- return (int32x2_t) __builtin_aarch64_sqmovnv2di (__a); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -+ return (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)r, __o, -+ (int8x8_t)idx); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vqmovn_u16 (uint16x8_t __a) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx) - { -- return (uint8x8_t) __builtin_aarch64_uqmovnv8hi ((int16x8_t) __a); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1); -+ return __builtin_aarch64_tbx4v16qi (r, __o, (int8x16_t)idx); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vqmovn_u32 (uint32x4_t __a) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx) - { -- return (uint16x4_t) __builtin_aarch64_uqmovnv4si ((int32x4_t) __a); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -+ return (uint8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)r, __o, -+ (int8x16_t)idx); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vqmovn_u64 (uint64x2_t __a) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx) - { -- return (uint32x2_t) __builtin_aarch64_uqmovnv2di ((int64x2_t) __a); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -+ return (poly8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)r, __o, -+ (int8x16_t)idx); - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vqmovnh_s16 (int16_t __a) -+/* vqtbx3 */ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx) - { -- return (int8_t) __builtin_aarch64_sqmovnhi (__a); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[2], 2); -+ return __builtin_aarch64_qtbx3v8qi (r, __o, (int8x8_t)idx); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqmovns_s32 (int32_t __a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx) - { -- return (int16_t) __builtin_aarch64_sqmovnsi (__a); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -+ return (uint8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)r, __o, -+ (int8x8_t)idx); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqmovnd_s64 (int64_t __a) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx) - { -- return (int32_t) __builtin_aarch64_sqmovndi (__a); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -+ return (poly8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)r, __o, -+ (int8x8_t)idx); - } - --__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) --vqmovnh_u16 (uint16_t __a) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx) - { -- return (uint8_t) __builtin_aarch64_uqmovnhi (__a); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[2], 2); -+ return __builtin_aarch64_qtbx3v16qi (r, __o, (int8x16_t)idx); - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) --vqmovns_u32 (uint32_t __a) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx) - { -- return (uint16_t) __builtin_aarch64_uqmovnsi (__a); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -+ return (uint8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)r, __o, -+ (int8x16_t)idx); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vqmovnd_u64 (uint64_t __a) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx) - { -- return (uint32_t) __builtin_aarch64_uqmovndi (__a); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -+ return (poly8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)r, __o, -+ (int8x16_t)idx); - } - --/* vqmovun */ -+/* vqtbx4 */ - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vqmovun_s16 (int16x8_t __a) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx) - { -- return (uint8x8_t) __builtin_aarch64_sqmovunv8hi (__a); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[3], 3); -+ return __builtin_aarch64_qtbx4v8qi (r, __o, (int8x8_t)idx); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vqmovun_s32 (int32x4_t __a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx) - { -- return (uint16x4_t) __builtin_aarch64_sqmovunv4si (__a); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -+ return (uint8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)r, __o, -+ (int8x8_t)idx); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vqmovun_s64 (int64x2_t __a) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx) - { -- return (uint32x2_t) __builtin_aarch64_sqmovunv2di (__a); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -+ return (poly8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)r, __o, -+ (int8x8_t)idx); - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vqmovunh_s16 (int16_t __a) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx) - { -- return (int8_t) __builtin_aarch64_sqmovunhi (__a); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[3], 3); -+ return __builtin_aarch64_qtbx4v16qi (r, __o, (int8x16_t)idx); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqmovuns_s32 (int32_t __a) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx) - { -- return (int16_t) __builtin_aarch64_sqmovunsi (__a); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -+ return (uint8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)r, __o, -+ (int8x16_t)idx); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqmovund_s64 (int64_t __a) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx) - { -- return (int32_t) __builtin_aarch64_sqmovundi (__a); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -+ return (poly8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)r, __o, -+ (int8x16_t)idx); - } - --/* vqneg */ -+/* vrbit */ - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqnegq_s64 (int64x2_t __a) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrbit_p8 (poly8x8_t __a) - { -- return (int64x2_t) __builtin_aarch64_sqnegv2di (__a); -+ return (poly8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a); - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vqnegb_s8 (int8_t __a) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrbit_s8 (int8x8_t __a) - { -- return (int8_t) __builtin_aarch64_sqnegqi (__a); -+ return __builtin_aarch64_rbitv8qi (__a); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqnegh_s16 (int16_t __a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrbit_u8 (uint8x8_t __a) - { -- return (int16_t) __builtin_aarch64_sqneghi (__a); -+ return (uint8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqnegs_s32 (int32_t __a) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrbitq_p8 (poly8x16_t __a) - { -- return (int32_t) __builtin_aarch64_sqnegsi (__a); -+ return (poly8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t)__a); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vqnegd_s64 (int64_t __a) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrbitq_s8 (int8x16_t __a) - { -- return __builtin_aarch64_sqnegdi (__a); -+ return __builtin_aarch64_rbitv16qi (__a); - } - --/* vqrdmulh */ -- --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrbitq_u8 (uint8x16_t __a) - { -- return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c); -+ return (uint8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t) __a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) -+/* vrecpe */ -+ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecpe_u32 (uint32x2_t __a) - { -- return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c); -+ return (uint32x2_t) __builtin_aarch64_urecpev2si ((int32x2_t) __a); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecpeq_u32 (uint32x4_t __a) - { -- return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c); -+ return (uint32x4_t) __builtin_aarch64_urecpev4si ((int32x4_t) __a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecpes_f32 (float32_t __a) - { -- return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c); -+ return __builtin_aarch64_frecpesf (__a); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqrdmulhh_s16 (int16_t __a, int16_t __b) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecped_f64 (float64_t __a) - { -- return (int16_t) __builtin_aarch64_sqrdmulhhi (__a, __b); -+ return __builtin_aarch64_frecpedf (__a); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqrdmulhh_lane_s16 (int16_t __a, int16x4_t __b, const int __c) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecpe_f32 (float32x2_t __a) - { -- return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c); -+ return __builtin_aarch64_frecpev2sf (__a); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqrdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecpe_f64 (float64x1_t __a) - { -- return __builtin_aarch64_sqrdmulh_laneqhi (__a, __b, __c); -+ return (float64x1_t) { vrecped_f64 (vget_lane_f64 (__a, 0)) }; - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqrdmulhs_s32 (int32_t __a, int32_t __b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecpeq_f32 (float32x4_t __a) - { -- return (int32_t) __builtin_aarch64_sqrdmulhsi (__a, __b); -+ return __builtin_aarch64_frecpev4sf (__a); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqrdmulhs_lane_s32 (int32_t __a, int32x2_t __b, const int __c) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecpeq_f64 (float64x2_t __a) - { -- return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c); -+ return __builtin_aarch64_frecpev2df (__a); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqrdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c) -+/* vrecps */ -+ -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecpss_f32 (float32_t __a, float32_t __b) - { -- return __builtin_aarch64_sqrdmulh_laneqsi (__a, __b, __c); -+ return __builtin_aarch64_frecpssf (__a, __b); - } - --/* vqrshl */ -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vqrshl_s8 (int8x8_t __a, int8x8_t __b) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecpsd_f64 (float64_t __a, float64_t __b) - { -- return __builtin_aarch64_sqrshlv8qi (__a, __b); -+ return __builtin_aarch64_frecpsdf (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vqrshl_s16 (int16x4_t __a, int16x4_t __b) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecps_f32 (float32x2_t __a, float32x2_t __b) - { -- return __builtin_aarch64_sqrshlv4hi (__a, __b); -+ return __builtin_aarch64_frecpsv2sf (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vqrshl_s32 (int32x2_t __a, int32x2_t __b) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecps_f64 (float64x1_t __a, float64x1_t __b) - { -- return __builtin_aarch64_sqrshlv2si (__a, __b); -+ return (float64x1_t) { vrecpsd_f64 (vget_lane_f64 (__a, 0), -+ vget_lane_f64 (__b, 0)) }; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vqrshl_s64 (int64x1_t __a, int64x1_t __b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecpsq_f32 (float32x4_t __a, float32x4_t __b) - { -- return (int64x1_t) {__builtin_aarch64_sqrshldi (__a[0], __b[0])}; -+ return __builtin_aarch64_frecpsv4sf (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vqrshl_u8 (uint8x8_t __a, int8x8_t __b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecpsq_f64 (float64x2_t __a, float64x2_t __b) - { -- return __builtin_aarch64_uqrshlv8qi_uus ( __a, __b); -+ return __builtin_aarch64_frecpsv2df (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vqrshl_u16 (uint16x4_t __a, int16x4_t __b) -+/* vrecpx */ -+ -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecpxs_f32 (float32_t __a) - { -- return __builtin_aarch64_uqrshlv4hi_uus ( __a, __b); -+ return __builtin_aarch64_frecpxsf (__a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vqrshl_u32 (uint32x2_t __a, int32x2_t __b) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecpxd_f64 (float64_t __a) - { -- return __builtin_aarch64_uqrshlv2si_uus ( __a, __b); -+ return __builtin_aarch64_frecpxdf (__a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vqrshl_u64 (uint64x1_t __a, int64x1_t __b) -+ -+/* vrev */ -+ -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev16_p8 (poly8x8_t a) - { -- return (uint64x1_t) {__builtin_aarch64_uqrshldi_uus (__a[0], __b[0])}; -+ return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vqrshlq_s8 (int8x16_t __a, int8x16_t __b) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev16_s8 (int8x8_t a) - { -- return __builtin_aarch64_sqrshlv16qi (__a, __b); -+ return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vqrshlq_s16 (int16x8_t __a, int16x8_t __b) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev16_u8 (uint8x8_t a) - { -- return __builtin_aarch64_sqrshlv8hi (__a, __b); -+ return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqrshlq_s32 (int32x4_t __a, int32x4_t __b) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev16q_p8 (poly8x16_t a) - { -- return __builtin_aarch64_sqrshlv4si (__a, __b); -+ return __builtin_shuffle (a, -+ (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqrshlq_s64 (int64x2_t __a, int64x2_t __b) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev16q_s8 (int8x16_t a) - { -- return __builtin_aarch64_sqrshlv2di (__a, __b); -+ return __builtin_shuffle (a, -+ (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vqrshlq_u8 (uint8x16_t __a, int8x16_t __b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev16q_u8 (uint8x16_t a) - { -- return __builtin_aarch64_uqrshlv16qi_uus ( __a, __b); -+ return __builtin_shuffle (a, -+ (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vqrshlq_u16 (uint16x8_t __a, int16x8_t __b) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev32_p8 (poly8x8_t a) - { -- return __builtin_aarch64_uqrshlv8hi_uus ( __a, __b); -+ return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vqrshlq_u32 (uint32x4_t __a, int32x4_t __b) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev32_p16 (poly16x4_t a) - { -- return __builtin_aarch64_uqrshlv4si_uus ( __a, __b); -+ return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vqrshlq_u64 (uint64x2_t __a, int64x2_t __b) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev32_s8 (int8x8_t a) - { -- return __builtin_aarch64_uqrshlv2di_uus ( __a, __b); -+ return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vqrshlb_s8 (int8_t __a, int8_t __b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev32_s16 (int16x4_t a) - { -- return __builtin_aarch64_sqrshlqi (__a, __b); -+ return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqrshlh_s16 (int16_t __a, int16_t __b) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev32_u8 (uint8x8_t a) - { -- return __builtin_aarch64_sqrshlhi (__a, __b); -+ return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqrshls_s32 (int32_t __a, int32_t __b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev32_u16 (uint16x4_t a) - { -- return __builtin_aarch64_sqrshlsi (__a, __b); -+ return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vqrshld_s64 (int64_t __a, int64_t __b) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev32q_p8 (poly8x16_t a) - { -- return __builtin_aarch64_sqrshldi (__a, __b); -+ return __builtin_shuffle (a, -+ (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); - } - --__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) --vqrshlb_u8 (uint8_t __a, uint8_t __b) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev32q_p16 (poly16x8_t a) - { -- return __builtin_aarch64_uqrshlqi_uus (__a, __b); -+ return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) --vqrshlh_u16 (uint16_t __a, uint16_t __b) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev32q_s8 (int8x16_t a) - { -- return __builtin_aarch64_uqrshlhi_uus (__a, __b); -+ return __builtin_shuffle (a, -+ (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vqrshls_u32 (uint32_t __a, uint32_t __b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev32q_s16 (int16x8_t a) - { -- return __builtin_aarch64_uqrshlsi_uus (__a, __b); -+ return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vqrshld_u64 (uint64_t __a, uint64_t __b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev32q_u8 (uint8x16_t a) - { -- return __builtin_aarch64_uqrshldi_uus (__a, __b); -+ return __builtin_shuffle (a, -+ (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); - } - --/* vqrshrn */ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev32q_u16 (uint16x8_t a) -+{ -+ return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); -+} - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vqrshrn_n_s16 (int16x8_t __a, const int __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev64_f16 (float16x4_t __a) - { -- return (int8x8_t) __builtin_aarch64_sqrshrn_nv8hi (__a, __b); -+ return __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 }); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vqrshrn_n_s32 (int32x4_t __a, const int __b) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev64_f32 (float32x2_t a) - { -- return (int16x4_t) __builtin_aarch64_sqrshrn_nv4si (__a, __b); -+ return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vqrshrn_n_s64 (int64x2_t __a, const int __b) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev64_p8 (poly8x8_t a) - { -- return (int32x2_t) __builtin_aarch64_sqrshrn_nv2di (__a, __b); -+ return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vqrshrn_n_u16 (uint16x8_t __a, const int __b) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev64_p16 (poly16x4_t a) - { -- return __builtin_aarch64_uqrshrn_nv8hi_uus ( __a, __b); -+ return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vqrshrn_n_u32 (uint32x4_t __a, const int __b) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev64_s8 (int8x8_t a) - { -- return __builtin_aarch64_uqrshrn_nv4si_uus ( __a, __b); -+ return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vqrshrn_n_u64 (uint64x2_t __a, const int __b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev64_s16 (int16x4_t a) - { -- return __builtin_aarch64_uqrshrn_nv2di_uus ( __a, __b); -+ return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vqrshrnh_n_s16 (int16_t __a, const int __b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev64_s32 (int32x2_t a) - { -- return (int8_t) __builtin_aarch64_sqrshrn_nhi (__a, __b); -+ return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqrshrns_n_s32 (int32_t __a, const int __b) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev64_u8 (uint8x8_t a) - { -- return (int16_t) __builtin_aarch64_sqrshrn_nsi (__a, __b); -+ return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqrshrnd_n_s64 (int64_t __a, const int __b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev64_u16 (uint16x4_t a) - { -- return (int32_t) __builtin_aarch64_sqrshrn_ndi (__a, __b); -+ return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); - } - --__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) --vqrshrnh_n_u16 (uint16_t __a, const int __b) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev64_u32 (uint32x2_t a) - { -- return __builtin_aarch64_uqrshrn_nhi_uus (__a, __b); -+ return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) --vqrshrns_n_u32 (uint32_t __a, const int __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev64q_f16 (float16x8_t __a) - { -- return __builtin_aarch64_uqrshrn_nsi_uus (__a, __b); -+ return __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vqrshrnd_n_u64 (uint64_t __a, const int __b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev64q_f32 (float32x4_t a) - { -- return __builtin_aarch64_uqrshrn_ndi_uus (__a, __b); -+ return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); - } - --/* vqrshrun */ -- --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vqrshrun_n_s16 (int16x8_t __a, const int __b) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev64q_p8 (poly8x16_t a) - { -- return (uint8x8_t) __builtin_aarch64_sqrshrun_nv8hi (__a, __b); -+ return __builtin_shuffle (a, -+ (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vqrshrun_n_s32 (int32x4_t __a, const int __b) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev64q_p16 (poly16x8_t a) - { -- return (uint16x4_t) __builtin_aarch64_sqrshrun_nv4si (__a, __b); -+ return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vqrshrun_n_s64 (int64x2_t __a, const int __b) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev64q_s8 (int8x16_t a) - { -- return (uint32x2_t) __builtin_aarch64_sqrshrun_nv2di (__a, __b); -+ return __builtin_shuffle (a, -+ (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vqrshrunh_n_s16 (int16_t __a, const int __b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev64q_s16 (int16x8_t a) - { -- return (int8_t) __builtin_aarch64_sqrshrun_nhi (__a, __b); -+ return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqrshruns_n_s32 (int32_t __a, const int __b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev64q_s32 (int32x4_t a) - { -- return (int16_t) __builtin_aarch64_sqrshrun_nsi (__a, __b); -+ return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqrshrund_n_s64 (int64_t __a, const int __b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev64q_u8 (uint8x16_t a) - { -- return (int32_t) __builtin_aarch64_sqrshrun_ndi (__a, __b); -+ return __builtin_shuffle (a, -+ (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); - } - --/* vqshl */ -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vqshl_s8 (int8x8_t __a, int8x8_t __b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev64q_u16 (uint16x8_t a) - { -- return __builtin_aarch64_sqshlv8qi (__a, __b); -+ return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vqshl_s16 (int16x4_t __a, int16x4_t __b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev64q_u32 (uint32x4_t a) - { -- return __builtin_aarch64_sqshlv4hi (__a, __b); -+ return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vqshl_s32 (int32x2_t __a, int32x2_t __b) -+/* vrnd */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnd_f32 (float32x2_t __a) - { -- return __builtin_aarch64_sqshlv2si (__a, __b); -+ return __builtin_aarch64_btruncv2sf (__a); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vqshl_s64 (int64x1_t __a, int64x1_t __b) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnd_f64 (float64x1_t __a) - { -- return (int64x1_t) {__builtin_aarch64_sqshldi (__a[0], __b[0])}; -+ return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vqshl_u8 (uint8x8_t __a, int8x8_t __b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndq_f32 (float32x4_t __a) - { -- return __builtin_aarch64_uqshlv8qi_uus ( __a, __b); -+ return __builtin_aarch64_btruncv4sf (__a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vqshl_u16 (uint16x4_t __a, int16x4_t __b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndq_f64 (float64x2_t __a) - { -- return __builtin_aarch64_uqshlv4hi_uus ( __a, __b); -+ return __builtin_aarch64_btruncv2df (__a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vqshl_u32 (uint32x2_t __a, int32x2_t __b) -+/* vrnda */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnda_f32 (float32x2_t __a) - { -- return __builtin_aarch64_uqshlv2si_uus ( __a, __b); -+ return __builtin_aarch64_roundv2sf (__a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vqshl_u64 (uint64x1_t __a, int64x1_t __b) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnda_f64 (float64x1_t __a) - { -- return (uint64x1_t) {__builtin_aarch64_uqshldi_uus (__a[0], __b[0])}; -+ return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vqshlq_s8 (int8x16_t __a, int8x16_t __b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndaq_f32 (float32x4_t __a) - { -- return __builtin_aarch64_sqshlv16qi (__a, __b); -+ return __builtin_aarch64_roundv4sf (__a); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vqshlq_s16 (int16x8_t __a, int16x8_t __b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndaq_f64 (float64x2_t __a) - { -- return __builtin_aarch64_sqshlv8hi (__a, __b); -+ return __builtin_aarch64_roundv2df (__a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqshlq_s32 (int32x4_t __a, int32x4_t __b) -+/* vrndi */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndi_f32 (float32x2_t __a) - { -- return __builtin_aarch64_sqshlv4si (__a, __b); -+ return __builtin_aarch64_nearbyintv2sf (__a); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqshlq_s64 (int64x2_t __a, int64x2_t __b) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndi_f64 (float64x1_t __a) - { -- return __builtin_aarch64_sqshlv2di (__a, __b); -+ return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vqshlq_u8 (uint8x16_t __a, int8x16_t __b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndiq_f32 (float32x4_t __a) - { -- return __builtin_aarch64_uqshlv16qi_uus ( __a, __b); -+ return __builtin_aarch64_nearbyintv4sf (__a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vqshlq_u16 (uint16x8_t __a, int16x8_t __b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndiq_f64 (float64x2_t __a) - { -- return __builtin_aarch64_uqshlv8hi_uus ( __a, __b); -+ return __builtin_aarch64_nearbyintv2df (__a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vqshlq_u32 (uint32x4_t __a, int32x4_t __b) -+/* vrndm */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndm_f32 (float32x2_t __a) - { -- return __builtin_aarch64_uqshlv4si_uus ( __a, __b); -+ return __builtin_aarch64_floorv2sf (__a); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vqshlq_u64 (uint64x2_t __a, int64x2_t __b) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndm_f64 (float64x1_t __a) - { -- return __builtin_aarch64_uqshlv2di_uus ( __a, __b); -+ return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0); - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vqshlb_s8 (int8_t __a, int8_t __b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndmq_f32 (float32x4_t __a) - { -- return __builtin_aarch64_sqshlqi (__a, __b); -+ return __builtin_aarch64_floorv4sf (__a); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqshlh_s16 (int16_t __a, int16_t __b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndmq_f64 (float64x2_t __a) - { -- return __builtin_aarch64_sqshlhi (__a, __b); -+ return __builtin_aarch64_floorv2df (__a); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqshls_s32 (int32_t __a, int32_t __b) -+/* vrndn */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndn_f32 (float32x2_t __a) - { -- return __builtin_aarch64_sqshlsi (__a, __b); -+ return __builtin_aarch64_frintnv2sf (__a); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vqshld_s64 (int64_t __a, int64_t __b) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndn_f64 (float64x1_t __a) - { -- return __builtin_aarch64_sqshldi (__a, __b); -+ return (float64x1_t) {__builtin_aarch64_frintndf (__a[0])}; - } - --__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) --vqshlb_u8 (uint8_t __a, uint8_t __b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndnq_f32 (float32x4_t __a) - { -- return __builtin_aarch64_uqshlqi_uus (__a, __b); -+ return __builtin_aarch64_frintnv4sf (__a); - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) --vqshlh_u16 (uint16_t __a, uint16_t __b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndnq_f64 (float64x2_t __a) - { -- return __builtin_aarch64_uqshlhi_uus (__a, __b); -+ return __builtin_aarch64_frintnv2df (__a); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vqshls_u32 (uint32_t __a, uint32_t __b) -+/* vrndp */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndp_f32 (float32x2_t __a) - { -- return __builtin_aarch64_uqshlsi_uus (__a, __b); -+ return __builtin_aarch64_ceilv2sf (__a); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vqshld_u64 (uint64_t __a, uint64_t __b) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndp_f64 (float64x1_t __a) - { -- return __builtin_aarch64_uqshldi_uus (__a, __b); -+ return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vqshl_n_s8 (int8x8_t __a, const int __b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndpq_f32 (float32x4_t __a) - { -- return (int8x8_t) __builtin_aarch64_sqshl_nv8qi (__a, __b); -+ return __builtin_aarch64_ceilv4sf (__a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vqshl_n_s16 (int16x4_t __a, const int __b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndpq_f64 (float64x2_t __a) - { -- return (int16x4_t) __builtin_aarch64_sqshl_nv4hi (__a, __b); -+ return __builtin_aarch64_ceilv2df (__a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vqshl_n_s32 (int32x2_t __a, const int __b) -+/* vrndx */ -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndx_f32 (float32x2_t __a) - { -- return (int32x2_t) __builtin_aarch64_sqshl_nv2si (__a, __b); -+ return __builtin_aarch64_rintv2sf (__a); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vqshl_n_s64 (int64x1_t __a, const int __b) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndx_f64 (float64x1_t __a) - { -- return (int64x1_t) {__builtin_aarch64_sqshl_ndi (__a[0], __b)}; -+ return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vqshl_n_u8 (uint8x8_t __a, const int __b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndxq_f32 (float32x4_t __a) - { -- return __builtin_aarch64_uqshl_nv8qi_uus (__a, __b); -+ return __builtin_aarch64_rintv4sf (__a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vqshl_n_u16 (uint16x4_t __a, const int __b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndxq_f64 (float64x2_t __a) - { -- return __builtin_aarch64_uqshl_nv4hi_uus (__a, __b); -+ return __builtin_aarch64_rintv2df (__a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vqshl_n_u32 (uint32x2_t __a, const int __b) -+/* vrshl */ -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshl_s8 (int8x8_t __a, int8x8_t __b) - { -- return __builtin_aarch64_uqshl_nv2si_uus (__a, __b); -+ return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vqshl_n_u64 (uint64x1_t __a, const int __b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshl_s16 (int16x4_t __a, int16x4_t __b) - { -- return (uint64x1_t) {__builtin_aarch64_uqshl_ndi_uus (__a[0], __b)}; -+ return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vqshlq_n_s8 (int8x16_t __a, const int __b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshl_s32 (int32x2_t __a, int32x2_t __b) - { -- return (int8x16_t) __builtin_aarch64_sqshl_nv16qi (__a, __b); -+ return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vqshlq_n_s16 (int16x8_t __a, const int __b) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshl_s64 (int64x1_t __a, int64x1_t __b) - { -- return (int16x8_t) __builtin_aarch64_sqshl_nv8hi (__a, __b); -+ return (int64x1_t) {__builtin_aarch64_srshldi (__a[0], __b[0])}; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vqshlq_n_s32 (int32x4_t __a, const int __b) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshl_u8 (uint8x8_t __a, int8x8_t __b) - { -- return (int32x4_t) __builtin_aarch64_sqshl_nv4si (__a, __b); -+ return __builtin_aarch64_urshlv8qi_uus (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vqshlq_n_s64 (int64x2_t __a, const int __b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshl_u16 (uint16x4_t __a, int16x4_t __b) - { -- return (int64x2_t) __builtin_aarch64_sqshl_nv2di (__a, __b); -+ return __builtin_aarch64_urshlv4hi_uus (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vqshlq_n_u8 (uint8x16_t __a, const int __b) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshl_u32 (uint32x2_t __a, int32x2_t __b) - { -- return __builtin_aarch64_uqshl_nv16qi_uus (__a, __b); -+ return __builtin_aarch64_urshlv2si_uus (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vqshlq_n_u16 (uint16x8_t __a, const int __b) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshl_u64 (uint64x1_t __a, int64x1_t __b) - { -- return __builtin_aarch64_uqshl_nv8hi_uus (__a, __b); -+ return (uint64x1_t) {__builtin_aarch64_urshldi_uus (__a[0], __b[0])}; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vqshlq_n_u32 (uint32x4_t __a, const int __b) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshlq_s8 (int8x16_t __a, int8x16_t __b) - { -- return __builtin_aarch64_uqshl_nv4si_uus (__a, __b); -+ return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vqshlq_n_u64 (uint64x2_t __a, const int __b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshlq_s16 (int16x8_t __a, int16x8_t __b) - { -- return __builtin_aarch64_uqshl_nv2di_uus (__a, __b); -+ return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b); - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vqshlb_n_s8 (int8_t __a, const int __b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshlq_s32 (int32x4_t __a, int32x4_t __b) - { -- return (int8_t) __builtin_aarch64_sqshl_nqi (__a, __b); -+ return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqshlh_n_s16 (int16_t __a, const int __b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshlq_s64 (int64x2_t __a, int64x2_t __b) - { -- return (int16_t) __builtin_aarch64_sqshl_nhi (__a, __b); -+ return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqshls_n_s32 (int32_t __a, const int __b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshlq_u8 (uint8x16_t __a, int8x16_t __b) - { -- return (int32_t) __builtin_aarch64_sqshl_nsi (__a, __b); -+ return __builtin_aarch64_urshlv16qi_uus (__a, __b); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vqshld_n_s64 (int64_t __a, const int __b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshlq_u16 (uint16x8_t __a, int16x8_t __b) - { -- return __builtin_aarch64_sqshl_ndi (__a, __b); -+ return __builtin_aarch64_urshlv8hi_uus (__a, __b); - } - --__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) --vqshlb_n_u8 (uint8_t __a, const int __b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshlq_u32 (uint32x4_t __a, int32x4_t __b) - { -- return __builtin_aarch64_uqshl_nqi_uus (__a, __b); -+ return __builtin_aarch64_urshlv4si_uus (__a, __b); - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) --vqshlh_n_u16 (uint16_t __a, const int __b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshlq_u64 (uint64x2_t __a, int64x2_t __b) - { -- return __builtin_aarch64_uqshl_nhi_uus (__a, __b); -+ return __builtin_aarch64_urshlv2di_uus (__a, __b); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vqshls_n_u32 (uint32_t __a, const int __b) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshld_s64 (int64_t __a, int64_t __b) - { -- return __builtin_aarch64_uqshl_nsi_uus (__a, __b); -+ return __builtin_aarch64_srshldi (__a, __b); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vqshld_n_u64 (uint64_t __a, const int __b) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshld_u64 (uint64_t __a, int64_t __b) - { -- return __builtin_aarch64_uqshl_ndi_uus (__a, __b); -+ return __builtin_aarch64_urshldi_uus (__a, __b); - } - --/* vqshlu */ -+/* vrshr */ - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vqshlu_n_s8 (int8x8_t __a, const int __b) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshr_n_s8 (int8x8_t __a, const int __b) - { -- return __builtin_aarch64_sqshlu_nv8qi_uss (__a, __b); -+ return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vqshlu_n_s16 (int16x4_t __a, const int __b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshr_n_s16 (int16x4_t __a, const int __b) - { -- return __builtin_aarch64_sqshlu_nv4hi_uss (__a, __b); -+ return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vqshlu_n_s32 (int32x2_t __a, const int __b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshr_n_s32 (int32x2_t __a, const int __b) - { -- return __builtin_aarch64_sqshlu_nv2si_uss (__a, __b); -+ return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vqshlu_n_s64 (int64x1_t __a, const int __b) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshr_n_s64 (int64x1_t __a, const int __b) - { -- return (uint64x1_t) {__builtin_aarch64_sqshlu_ndi_uss (__a[0], __b)}; -+ return (int64x1_t) {__builtin_aarch64_srshr_ndi (__a[0], __b)}; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vqshluq_n_s8 (int8x16_t __a, const int __b) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshr_n_u8 (uint8x8_t __a, const int __b) - { -- return __builtin_aarch64_sqshlu_nv16qi_uss (__a, __b); -+ return __builtin_aarch64_urshr_nv8qi_uus (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vqshluq_n_s16 (int16x8_t __a, const int __b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshr_n_u16 (uint16x4_t __a, const int __b) - { -- return __builtin_aarch64_sqshlu_nv8hi_uss (__a, __b); -+ return __builtin_aarch64_urshr_nv4hi_uus (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vqshluq_n_s32 (int32x4_t __a, const int __b) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshr_n_u32 (uint32x2_t __a, const int __b) - { -- return __builtin_aarch64_sqshlu_nv4si_uss (__a, __b); -+ return __builtin_aarch64_urshr_nv2si_uus (__a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vqshluq_n_s64 (int64x2_t __a, const int __b) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshr_n_u64 (uint64x1_t __a, const int __b) - { -- return __builtin_aarch64_sqshlu_nv2di_uss (__a, __b); -+ return (uint64x1_t) {__builtin_aarch64_urshr_ndi_uus (__a[0], __b)}; - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vqshlub_n_s8 (int8_t __a, const int __b) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshrq_n_s8 (int8x16_t __a, const int __b) - { -- return (int8_t) __builtin_aarch64_sqshlu_nqi_uss (__a, __b); -+ return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqshluh_n_s16 (int16_t __a, const int __b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshrq_n_s16 (int16x8_t __a, const int __b) - { -- return (int16_t) __builtin_aarch64_sqshlu_nhi_uss (__a, __b); -+ return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqshlus_n_s32 (int32_t __a, const int __b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshrq_n_s32 (int32x4_t __a, const int __b) - { -- return (int32_t) __builtin_aarch64_sqshlu_nsi_uss (__a, __b); -+ return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vqshlud_n_s64 (int64_t __a, const int __b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshrq_n_s64 (int64x2_t __a, const int __b) - { -- return __builtin_aarch64_sqshlu_ndi_uss (__a, __b); -+ return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b); - } - --/* vqshrn */ -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vqshrn_n_s16 (int16x8_t __a, const int __b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshrq_n_u8 (uint8x16_t __a, const int __b) - { -- return (int8x8_t) __builtin_aarch64_sqshrn_nv8hi (__a, __b); -+ return __builtin_aarch64_urshr_nv16qi_uus (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vqshrn_n_s32 (int32x4_t __a, const int __b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshrq_n_u16 (uint16x8_t __a, const int __b) - { -- return (int16x4_t) __builtin_aarch64_sqshrn_nv4si (__a, __b); -+ return __builtin_aarch64_urshr_nv8hi_uus (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vqshrn_n_s64 (int64x2_t __a, const int __b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshrq_n_u32 (uint32x4_t __a, const int __b) - { -- return (int32x2_t) __builtin_aarch64_sqshrn_nv2di (__a, __b); -+ return __builtin_aarch64_urshr_nv4si_uus (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vqshrn_n_u16 (uint16x8_t __a, const int __b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshrq_n_u64 (uint64x2_t __a, const int __b) - { -- return __builtin_aarch64_uqshrn_nv8hi_uus ( __a, __b); -+ return __builtin_aarch64_urshr_nv2di_uus (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vqshrn_n_u32 (uint32x4_t __a, const int __b) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshrd_n_s64 (int64_t __a, const int __b) - { -- return __builtin_aarch64_uqshrn_nv4si_uus ( __a, __b); -+ return __builtin_aarch64_srshr_ndi (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vqshrn_n_u64 (uint64x2_t __a, const int __b) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrshrd_n_u64 (uint64_t __a, const int __b) - { -- return __builtin_aarch64_uqshrn_nv2di_uus ( __a, __b); -+ return __builtin_aarch64_urshr_ndi_uus (__a, __b); - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vqshrnh_n_s16 (int16_t __a, const int __b) -+/* vrsqrte. */ -+ -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsqrtes_f32 (float32_t __a) - { -- return (int8_t) __builtin_aarch64_sqshrn_nhi (__a, __b); -+ return __builtin_aarch64_rsqrtesf (__a); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqshrns_n_s32 (int32_t __a, const int __b) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsqrted_f64 (float64_t __a) - { -- return (int16_t) __builtin_aarch64_sqshrn_nsi (__a, __b); -+ return __builtin_aarch64_rsqrtedf (__a); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqshrnd_n_s64 (int64_t __a, const int __b) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsqrte_f32 (float32x2_t __a) - { -- return (int32_t) __builtin_aarch64_sqshrn_ndi (__a, __b); -+ return __builtin_aarch64_rsqrtev2sf (__a); - } - --__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) --vqshrnh_n_u16 (uint16_t __a, const int __b) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsqrte_f64 (float64x1_t __a) - { -- return __builtin_aarch64_uqshrn_nhi_uus (__a, __b); -+ return (float64x1_t) {vrsqrted_f64 (vget_lane_f64 (__a, 0))}; - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) --vqshrns_n_u32 (uint32_t __a, const int __b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsqrteq_f32 (float32x4_t __a) - { -- return __builtin_aarch64_uqshrn_nsi_uus (__a, __b); -+ return __builtin_aarch64_rsqrtev4sf (__a); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vqshrnd_n_u64 (uint64_t __a, const int __b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsqrteq_f64 (float64x2_t __a) - { -- return __builtin_aarch64_uqshrn_ndi_uus (__a, __b); -+ return __builtin_aarch64_rsqrtev2df (__a); - } - --/* vqshrun */ -+/* vrsqrts. */ - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vqshrun_n_s16 (int16x8_t __a, const int __b) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsqrtss_f32 (float32_t __a, float32_t __b) - { -- return (uint8x8_t) __builtin_aarch64_sqshrun_nv8hi (__a, __b); -+ return __builtin_aarch64_rsqrtssf (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vqshrun_n_s32 (int32x4_t __a, const int __b) -+__extension__ extern __inline float64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsqrtsd_f64 (float64_t __a, float64_t __b) - { -- return (uint16x4_t) __builtin_aarch64_sqshrun_nv4si (__a, __b); -+ return __builtin_aarch64_rsqrtsdf (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vqshrun_n_s64 (int64x2_t __a, const int __b) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsqrts_f32 (float32x2_t __a, float32x2_t __b) - { -- return (uint32x2_t) __builtin_aarch64_sqshrun_nv2di (__a, __b); -+ return __builtin_aarch64_rsqrtsv2sf (__a, __b); - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vqshrunh_n_s16 (int16_t __a, const int __b) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsqrts_f64 (float64x1_t __a, float64x1_t __b) - { -- return (int8_t) __builtin_aarch64_sqshrun_nhi (__a, __b); -+ return (float64x1_t) {vrsqrtsd_f64 (vget_lane_f64 (__a, 0), -+ vget_lane_f64 (__b, 0))}; - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqshruns_n_s32 (int32_t __a, const int __b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsqrtsq_f32 (float32x4_t __a, float32x4_t __b) - { -- return (int16_t) __builtin_aarch64_sqshrun_nsi (__a, __b); -+ return __builtin_aarch64_rsqrtsv4sf (__a, __b); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqshrund_n_s64 (int64_t __a, const int __b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsqrtsq_f64 (float64x2_t __a, float64x2_t __b) - { -- return (int32_t) __builtin_aarch64_sqshrun_ndi (__a, __b); -+ return __builtin_aarch64_rsqrtsv2df (__a, __b); - } - --/* vqsub */ -+/* vrsra */ - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vqsubb_s8 (int8_t __a, int8_t __b) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) - { -- return (int8_t) __builtin_aarch64_sqsubqi (__a, __b); -+ return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vqsubh_s16 (int16_t __a, int16_t __b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) - { -- return (int16_t) __builtin_aarch64_sqsubhi (__a, __b); -+ return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vqsubs_s32 (int32_t __a, int32_t __b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) - { -- return (int32_t) __builtin_aarch64_sqsubsi (__a, __b); -+ return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vqsubd_s64 (int64_t __a, int64_t __b) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) - { -- return __builtin_aarch64_sqsubdi (__a, __b); -+ return (int64x1_t) {__builtin_aarch64_srsra_ndi (__a[0], __b[0], __c)}; - } - --__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) --vqsubb_u8 (uint8_t __a, uint8_t __b) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) - { -- return (uint8_t) __builtin_aarch64_uqsubqi_uuu (__a, __b); -+ return __builtin_aarch64_ursra_nv8qi_uuus (__a, __b, __c); - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) --vqsubh_u16 (uint16_t __a, uint16_t __b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) - { -- return (uint16_t) __builtin_aarch64_uqsubhi_uuu (__a, __b); -+ return __builtin_aarch64_ursra_nv4hi_uuus (__a, __b, __c); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vqsubs_u32 (uint32_t __a, uint32_t __b) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) - { -- return (uint32_t) __builtin_aarch64_uqsubsi_uuu (__a, __b); -+ return __builtin_aarch64_ursra_nv2si_uuus (__a, __b, __c); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vqsubd_u64 (uint64_t __a, uint64_t __b) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) - { -- return __builtin_aarch64_uqsubdi_uuu (__a, __b); -+ return (uint64x1_t) {__builtin_aarch64_ursra_ndi_uuus (__a[0], __b[0], __c)}; - } - --/* vqtbl2 */ -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) -+{ -+ return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c); -+} - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vqtbl2_s8 (int8x16x2_t tab, uint8x8_t idx) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1); -- return __builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); -+ return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vqtbl2_u8 (uint8x16x2_t tab, uint8x8_t idx) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -- return (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); -+ return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vqtbl2_p8 (poly8x16x2_t tab, uint8x8_t idx) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -- return (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)idx); -+ return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vqtbl2q_s8 (int8x16x2_t tab, uint8x16_t idx) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -- return __builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx); -+ return __builtin_aarch64_ursra_nv16qi_uuus (__a, __b, __c); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vqtbl2q_u8 (uint8x16x2_t tab, uint8x16_t idx) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -- return (uint8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx); -+ return __builtin_aarch64_ursra_nv8hi_uuus (__a, __b, __c); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vqtbl2q_p8 (poly8x16x2_t tab, uint8x16_t idx) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -- return (poly8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)idx); -+ return __builtin_aarch64_ursra_nv4si_uuus (__a, __b, __c); - } - --/* vqtbl3 */ -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) -+{ -+ return __builtin_aarch64_ursra_nv2di_uuus (__a, __b, __c); -+} - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vqtbl3_s8 (int8x16x3_t tab, uint8x8_t idx) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsrad_n_s64 (int64_t __a, int64_t __b, const int __c) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -- return __builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx); -+ return __builtin_aarch64_srsra_ndi (__a, __b, __c); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vqtbl3_u8 (uint8x16x3_t tab, uint8x8_t idx) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -- return (uint8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx); -+ return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vqtbl3_p8 (poly8x16x3_t tab, uint8x8_t idx) -+#pragma GCC push_options -+#pragma GCC target ("+nothing+crypto") -+ -+/* vsha1 */ -+ -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -- return (poly8x8_t)__builtin_aarch64_qtbl3v8qi (__o, (int8x8_t)idx); -+ return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vqtbl3q_s8 (int8x16x3_t tab, uint8x16_t idx) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -- return __builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx); -+ return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vqtbl3q_u8 (uint8x16x3_t tab, uint8x16_t idx) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -- return (uint8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx); -+ return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk); -+} -+ -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsha1h_u32 (uint32_t hash_e) -+{ -+ return __builtin_aarch64_crypto_sha1hsi_uu (hash_e); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vqtbl3q_p8 (poly8x16x3_t tab, uint8x16_t idx) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -- return (poly8x16_t)__builtin_aarch64_qtbl3v16qi (__o, (int8x16_t)idx); -+ return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11); - } - --/* vqtbl4 */ -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vqtbl4_s8 (int8x16x4_t tab, uint8x8_t idx) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -- return __builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx); -+ return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vqtbl4_u8 (uint8x16x4_t tab, uint8x8_t idx) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -- return (uint8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx); -+ return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vqtbl4_p8 (poly8x16x4_t tab, uint8x8_t idx) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -- return (poly8x8_t)__builtin_aarch64_qtbl4v8qi (__o, (int8x8_t)idx); -+ return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vqtbl4q_s8 (int8x16x4_t tab, uint8x16_t idx) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -- return __builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx); -+ return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vqtbl4q_u8 (uint8x16x4_t tab, uint8x16_t idx) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -- return (uint8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx); -+ return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vqtbl4q_p8 (poly8x16x4_t tab, uint8x16_t idx) -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_p64 (poly64_t a, poly64_t b) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -- return (poly8x16_t)__builtin_aarch64_qtbl4v16qi (__o, (int8x16_t)idx); -+ return -+ __builtin_aarch64_crypto_pmulldi_ppp (a, b); - } - -- --/* vqtbx2 */ --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vqtbx2_s8 (int8x8_t r, int8x16x2_t tab, uint8x8_t idx) -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmull_high_p64 (poly64x2_t a, poly64x2_t b) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1); -- return __builtin_aarch64_tbx4v8qi (r, __o, (int8x8_t)idx); -+ return __builtin_aarch64_crypto_pmullv2di_ppp (a, b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vqtbx2_u8 (uint8x8_t r, uint8x16x2_t tab, uint8x8_t idx) -+#pragma GCC pop_options -+ -+/* vshl */ -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshl_n_s8 (int8x8_t __a, const int __b) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -- return (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)r, __o, -- (int8x8_t)idx); -+ return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vqtbx2_p8 (poly8x8_t r, poly8x16x2_t tab, uint8x8_t idx) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshl_n_s16 (int16x4_t __a, const int __b) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -- return (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)r, __o, -- (int8x8_t)idx); -+ return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vqtbx2q_s8 (int8x16_t r, int8x16x2_t tab, uint8x16_t idx) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshl_n_s32 (int32x2_t __a, const int __b) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, tab.val[1], 1); -- return __builtin_aarch64_tbx4v16qi (r, __o, (int8x16_t)idx); -+ return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vqtbx2q_u8 (uint8x16_t r, uint8x16x2_t tab, uint8x16_t idx) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshl_n_s64 (int64x1_t __a, const int __b) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -- return (uint8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)r, __o, -- (int8x16_t)idx); -+ return (int64x1_t) {__builtin_aarch64_ashldi (__a[0], __b)}; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vqtbx2q_p8 (poly8x16_t r, poly8x16x2_t tab, uint8x16_t idx) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshl_n_u8 (uint8x8_t __a, const int __b) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)tab.val[1], 1); -- return (poly8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)r, __o, -- (int8x16_t)idx); -+ return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b); - } - --/* vqtbx3 */ --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vqtbx3_s8 (int8x8_t r, int8x16x3_t tab, uint8x8_t idx) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshl_n_u16 (uint16x4_t __a, const int __b) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[2], 2); -- return __builtin_aarch64_qtbx3v8qi (r, __o, (int8x8_t)idx); -+ return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vqtbx3_u8 (uint8x8_t r, uint8x16x3_t tab, uint8x8_t idx) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshl_n_u32 (uint32x2_t __a, const int __b) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -- return (uint8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)r, __o, -- (int8x8_t)idx); -+ return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vqtbx3_p8 (poly8x8_t r, poly8x16x3_t tab, uint8x8_t idx) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshl_n_u64 (uint64x1_t __a, const int __b) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -- return (poly8x8_t)__builtin_aarch64_qtbx3v8qi ((int8x8_t)r, __o, -- (int8x8_t)idx); -+ return (uint64x1_t) {__builtin_aarch64_ashldi ((int64_t) __a[0], __b)}; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vqtbx3q_s8 (int8x16_t r, int8x16x3_t tab, uint8x16_t idx) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshlq_n_s8 (int8x16_t __a, const int __b) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, tab.val[2], 2); -- return __builtin_aarch64_qtbx3v16qi (r, __o, (int8x16_t)idx); -+ return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vqtbx3q_u8 (uint8x16_t r, uint8x16x3_t tab, uint8x16_t idx) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshlq_n_s16 (int16x8_t __a, const int __b) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -- return (uint8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)r, __o, -- (int8x16_t)idx); -+ return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vqtbx3q_p8 (poly8x16_t r, poly8x16x3_t tab, uint8x16_t idx) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshlq_n_s32 (int32x4_t __a, const int __b) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t)tab.val[2], 2); -- return (poly8x16_t)__builtin_aarch64_qtbx3v16qi ((int8x16_t)r, __o, -- (int8x16_t)idx); -+ return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b); - } - --/* vqtbx4 */ -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vqtbx4_s8 (int8x8_t r, int8x16x4_t tab, uint8x8_t idx) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshlq_n_s64 (int64x2_t __a, const int __b) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[3], 3); -- return __builtin_aarch64_qtbx4v8qi (r, __o, (int8x8_t)idx); -+ return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vqtbx4_u8 (uint8x8_t r, uint8x16x4_t tab, uint8x8_t idx) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshlq_n_u8 (uint8x16_t __a, const int __b) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -- return (uint8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)r, __o, -- (int8x8_t)idx); -+ return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vqtbx4_p8 (poly8x8_t r, poly8x16x4_t tab, uint8x8_t idx) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshlq_n_u16 (uint16x8_t __a, const int __b) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -- return (poly8x8_t)__builtin_aarch64_qtbx4v8qi ((int8x8_t)r, __o, -- (int8x8_t)idx); -+ return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vqtbx4q_s8 (int8x16_t r, int8x16x4_t tab, uint8x16_t idx) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshlq_n_u32 (uint32x4_t __a, const int __b) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, tab.val[3], 3); -- return __builtin_aarch64_qtbx4v16qi (r, __o, (int8x16_t)idx); -+ return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vqtbx4q_u8 (uint8x16_t r, uint8x16x4_t tab, uint8x16_t idx) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshlq_n_u64 (uint64x2_t __a, const int __b) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -- return (uint8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)r, __o, -- (int8x16_t)idx); -+ return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vqtbx4q_p8 (poly8x16_t r, poly8x16x4_t tab, uint8x16_t idx) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshld_n_s64 (int64_t __a, const int __b) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t)tab.val[3], 3); -- return (poly8x16_t)__builtin_aarch64_qtbx4v16qi ((int8x16_t)r, __o, -- (int8x16_t)idx); -+ return __builtin_aarch64_ashldi (__a, __b); - } - --/* vrbit */ -- --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vrbit_p8 (poly8x8_t __a) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshld_n_u64 (uint64_t __a, const int __b) - { -- return (poly8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a); -+ return (uint64_t) __builtin_aarch64_ashldi (__a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vrbit_s8 (int8x8_t __a) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshl_s8 (int8x8_t __a, int8x8_t __b) - { -- return __builtin_aarch64_rbitv8qi (__a); -+ return __builtin_aarch64_sshlv8qi (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vrbit_u8 (uint8x8_t __a) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshl_s16 (int16x4_t __a, int16x4_t __b) - { -- return (uint8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a); -+ return __builtin_aarch64_sshlv4hi (__a, __b); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vrbitq_p8 (poly8x16_t __a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshl_s32 (int32x2_t __a, int32x2_t __b) - { -- return (poly8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t)__a); -+ return __builtin_aarch64_sshlv2si (__a, __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vrbitq_s8 (int8x16_t __a) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshl_s64 (int64x1_t __a, int64x1_t __b) - { -- return __builtin_aarch64_rbitv16qi (__a); -+ return (int64x1_t) {__builtin_aarch64_sshldi (__a[0], __b[0])}; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vrbitq_u8 (uint8x16_t __a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshl_u8 (uint8x8_t __a, int8x8_t __b) - { -- return (uint8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t) __a); -+ return __builtin_aarch64_ushlv8qi_uus (__a, __b); - } - --/* vrecpe */ -- --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vrecpe_u32 (uint32x2_t __a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshl_u16 (uint16x4_t __a, int16x4_t __b) - { -- return (uint32x2_t) __builtin_aarch64_urecpev2si ((int32x2_t) __a); -+ return __builtin_aarch64_ushlv4hi_uus (__a, __b); - } -- --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vrecpeq_u32 (uint32x4_t __a) -+ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshl_u32 (uint32x2_t __a, int32x2_t __b) - { -- return (uint32x4_t) __builtin_aarch64_urecpev4si ((int32x4_t) __a); -+ return __builtin_aarch64_ushlv2si_uus (__a, __b); - } - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vrecpes_f32 (float32_t __a) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshl_u64 (uint64x1_t __a, int64x1_t __b) - { -- return __builtin_aarch64_frecpesf (__a); -+ return (uint64x1_t) {__builtin_aarch64_ushldi_uus (__a[0], __b[0])}; - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vrecped_f64 (float64_t __a) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshlq_s8 (int8x16_t __a, int8x16_t __b) - { -- return __builtin_aarch64_frecpedf (__a); -+ return __builtin_aarch64_sshlv16qi (__a, __b); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vrecpe_f32 (float32x2_t __a) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshlq_s16 (int16x8_t __a, int16x8_t __b) - { -- return __builtin_aarch64_frecpev2sf (__a); -+ return __builtin_aarch64_sshlv8hi (__a, __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vrecpeq_f32 (float32x4_t __a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshlq_s32 (int32x4_t __a, int32x4_t __b) - { -- return __builtin_aarch64_frecpev4sf (__a); -+ return __builtin_aarch64_sshlv4si (__a, __b); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vrecpeq_f64 (float64x2_t __a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshlq_s64 (int64x2_t __a, int64x2_t __b) - { -- return __builtin_aarch64_frecpev2df (__a); -+ return __builtin_aarch64_sshlv2di (__a, __b); - } - --/* vrecps */ -- --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vrecpss_f32 (float32_t __a, float32_t __b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshlq_u8 (uint8x16_t __a, int8x16_t __b) - { -- return __builtin_aarch64_frecpssf (__a, __b); -+ return __builtin_aarch64_ushlv16qi_uus (__a, __b); - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vrecpsd_f64 (float64_t __a, float64_t __b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshlq_u16 (uint16x8_t __a, int16x8_t __b) - { -- return __builtin_aarch64_frecpsdf (__a, __b); -+ return __builtin_aarch64_ushlv8hi_uus (__a, __b); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vrecps_f32 (float32x2_t __a, float32x2_t __b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshlq_u32 (uint32x4_t __a, int32x4_t __b) - { -- return __builtin_aarch64_frecpsv2sf (__a, __b); -+ return __builtin_aarch64_ushlv4si_uus (__a, __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vrecpsq_f32 (float32x4_t __a, float32x4_t __b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshlq_u64 (uint64x2_t __a, int64x2_t __b) - { -- return __builtin_aarch64_frecpsv4sf (__a, __b); -+ return __builtin_aarch64_ushlv2di_uus (__a, __b); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vrecpsq_f64 (float64x2_t __a, float64x2_t __b) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshld_s64 (int64_t __a, int64_t __b) - { -- return __builtin_aarch64_frecpsv2df (__a, __b); -+ return __builtin_aarch64_sshldi (__a, __b); - } - --/* vrecpx */ -- --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) --vrecpxs_f32 (float32_t __a) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshld_u64 (uint64_t __a, uint64_t __b) - { -- return __builtin_aarch64_frecpxsf (__a); -+ return __builtin_aarch64_ushldi_uus (__a, __b); - } - --__extension__ static __inline float64_t __attribute__ ((__always_inline__)) --vrecpxd_f64 (float64_t __a) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshll_high_n_s8 (int8x16_t __a, const int __b) - { -- return __builtin_aarch64_frecpxdf (__a); -+ return __builtin_aarch64_sshll2_nv16qi (__a, __b); - } - -- --/* vrev */ -- --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vrev16_p8 (poly8x8_t a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshll_high_n_s16 (int16x8_t __a, const int __b) - { -- return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); -+ return __builtin_aarch64_sshll2_nv8hi (__a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vrev16_s8 (int8x8_t a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshll_high_n_s32 (int32x4_t __a, const int __b) - { -- return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); -+ return __builtin_aarch64_sshll2_nv4si (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vrev16_u8 (uint8x8_t a) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshll_high_n_u8 (uint8x16_t __a, const int __b) - { -- return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); -+ return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vrev16q_p8 (poly8x16_t a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshll_high_n_u16 (uint16x8_t __a, const int __b) - { -- return __builtin_shuffle (a, -- (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); -+ return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vrev16q_s8 (int8x16_t a) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshll_high_n_u32 (uint32x4_t __a, const int __b) - { -- return __builtin_shuffle (a, -- (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); -+ return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vrev16q_u8 (uint8x16_t a) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshll_n_s8 (int8x8_t __a, const int __b) - { -- return __builtin_shuffle (a, -- (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); -+ return __builtin_aarch64_sshll_nv8qi (__a, __b); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vrev32_p8 (poly8x8_t a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshll_n_s16 (int16x4_t __a, const int __b) - { -- return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); -+ return __builtin_aarch64_sshll_nv4hi (__a, __b); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vrev32_p16 (poly16x4_t a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshll_n_s32 (int32x2_t __a, const int __b) - { -- return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); -+ return __builtin_aarch64_sshll_nv2si (__a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vrev32_s8 (int8x8_t a) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshll_n_u8 (uint8x8_t __a, const int __b) - { -- return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); -+ return __builtin_aarch64_ushll_nv8qi_uus (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vrev32_s16 (int16x4_t a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshll_n_u16 (uint16x4_t __a, const int __b) - { -- return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); -+ return __builtin_aarch64_ushll_nv4hi_uus (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vrev32_u8 (uint8x8_t a) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshll_n_u32 (uint32x2_t __a, const int __b) - { -- return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); -+ return __builtin_aarch64_ushll_nv2si_uus (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vrev32_u16 (uint16x4_t a) -+/* vshr */ -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshr_n_s8 (int8x8_t __a, const int __b) - { -- return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); -+ return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vrev32q_p8 (poly8x16_t a) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshr_n_s16 (int16x4_t __a, const int __b) - { -- return __builtin_shuffle (a, -- (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); -+ return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vrev32q_p16 (poly16x8_t a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshr_n_s32 (int32x2_t __a, const int __b) - { -- return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); -+ return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vrev32q_s8 (int8x16_t a) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshr_n_s64 (int64x1_t __a, const int __b) - { -- return __builtin_shuffle (a, -- (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); -+ return (int64x1_t) {__builtin_aarch64_ashr_simddi (__a[0], __b)}; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vrev32q_s16 (int16x8_t a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshr_n_u8 (uint8x8_t __a, const int __b) - { -- return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); -+ return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vrev32q_u8 (uint8x16_t a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshr_n_u16 (uint16x4_t __a, const int __b) - { -- return __builtin_shuffle (a, -- (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); -+ return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vrev32q_u16 (uint16x8_t a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshr_n_u32 (uint32x2_t __a, const int __b) - { -- return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); -+ return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vrev64_f32 (float32x2_t a) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshr_n_u64 (uint64x1_t __a, const int __b) - { -- return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); -+ return (uint64x1_t) {__builtin_aarch64_lshr_simddi_uus ( __a[0], __b)}; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vrev64_p8 (poly8x8_t a) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshrq_n_s8 (int8x16_t __a, const int __b) - { -- return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); -+ return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vrev64_p16 (poly16x4_t a) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshrq_n_s16 (int16x8_t __a, const int __b) - { -- return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); -+ return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vrev64_s8 (int8x8_t a) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshrq_n_s32 (int32x4_t __a, const int __b) - { -- return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); -+ return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vrev64_s16 (int16x4_t a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshrq_n_s64 (int64x2_t __a, const int __b) - { -- return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); -+ return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vrev64_s32 (int32x2_t a) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshrq_n_u8 (uint8x16_t __a, const int __b) - { -- return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); -+ return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vrev64_u8 (uint8x8_t a) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshrq_n_u16 (uint16x8_t __a, const int __b) - { -- return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); -+ return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vrev64_u16 (uint16x4_t a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshrq_n_u32 (uint32x4_t __a, const int __b) - { -- return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); -+ return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vrev64_u32 (uint32x2_t a) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshrq_n_u64 (uint64x2_t __a, const int __b) - { -- return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); -+ return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vrev64q_f32 (float32x4_t a) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshrd_n_s64 (int64_t __a, const int __b) - { -- return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); -+ return __builtin_aarch64_ashr_simddi (__a, __b); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vrev64q_p8 (poly8x16_t a) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vshrd_n_u64 (uint64_t __a, const int __b) - { -- return __builtin_shuffle (a, -- (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); -+ return __builtin_aarch64_lshr_simddi_uus (__a, __b); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vrev64q_p16 (poly16x8_t a) -+/* vsli */ -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) - { -- return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); -+ return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vrev64q_s8 (int8x16_t a) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) - { -- return __builtin_shuffle (a, -- (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); -+ return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vrev64q_s16 (int16x8_t a) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) - { -- return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); -+ return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vrev64q_s32 (int32x4_t a) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) - { -- return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); -+ return (int64x1_t) {__builtin_aarch64_ssli_ndi (__a[0], __b[0], __c)}; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vrev64q_u8 (uint8x16_t a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) - { -- return __builtin_shuffle (a, -- (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); -+ return __builtin_aarch64_usli_nv8qi_uuus (__a, __b, __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vrev64q_u16 (uint16x8_t a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) - { -- return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); -+ return __builtin_aarch64_usli_nv4hi_uuus (__a, __b, __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vrev64q_u32 (uint32x4_t a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) - { -- return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); -+ return __builtin_aarch64_usli_nv2si_uuus (__a, __b, __c); - } - --/* vrnd */ -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vrnd_f32 (float32x2_t __a) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) - { -- return __builtin_aarch64_btruncv2sf (__a); -+ return (uint64x1_t) {__builtin_aarch64_usli_ndi_uuus (__a[0], __b[0], __c)}; - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vrnd_f64 (float64x1_t __a) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsli_n_p64 (poly64x1_t __a, poly64x1_t __b, const int __c) - { -- return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0); -+ return (poly64x1_t) {__builtin_aarch64_ssli_ndi_ppps (__a[0], __b[0], __c)}; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vrndq_f32 (float32x4_t __a) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) - { -- return __builtin_aarch64_btruncv4sf (__a); -+ return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vrndq_f64 (float64x2_t __a) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) - { -- return __builtin_aarch64_btruncv2df (__a); -+ return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c); - } - --/* vrnda */ -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) -+{ -+ return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c); -+} - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vrnda_f32 (float32x2_t __a) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) - { -- return __builtin_aarch64_roundv2sf (__a); -+ return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c); - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vrnda_f64 (float64x1_t __a) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) - { -- return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0); -+ return __builtin_aarch64_usli_nv16qi_uuus (__a, __b, __c); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vrndaq_f32 (float32x4_t __a) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) - { -- return __builtin_aarch64_roundv4sf (__a); -+ return __builtin_aarch64_usli_nv8hi_uuus (__a, __b, __c); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vrndaq_f64 (float64x2_t __a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) - { -- return __builtin_aarch64_roundv2df (__a); -+ return __builtin_aarch64_usli_nv4si_uuus (__a, __b, __c); - } - --/* vrndi */ -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vrndi_f32 (float32x2_t __a) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) - { -- return __builtin_aarch64_nearbyintv2sf (__a); -+ return __builtin_aarch64_usli_nv2di_uuus (__a, __b, __c); - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vrndi_f64 (float64x1_t __a) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsliq_n_p64 (poly64x2_t __a, poly64x2_t __b, const int __c) - { -- return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0); -+ return __builtin_aarch64_ssli_nv2di_ppps (__a, __b, __c); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vrndiq_f32 (float32x4_t __a) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vslid_n_s64 (int64_t __a, int64_t __b, const int __c) - { -- return __builtin_aarch64_nearbyintv4sf (__a); -+ return __builtin_aarch64_ssli_ndi (__a, __b, __c); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vrndiq_f64 (float64x2_t __a) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vslid_n_u64 (uint64_t __a, uint64_t __b, const int __c) - { -- return __builtin_aarch64_nearbyintv2df (__a); -+ return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c); - } - --/* vrndm */ -+/* vsqadd */ - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vrndm_f32 (float32x2_t __a) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsqadd_u8 (uint8x8_t __a, int8x8_t __b) - { -- return __builtin_aarch64_floorv2sf (__a); -+ return __builtin_aarch64_usqaddv8qi_uus (__a, __b); - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vrndm_f64 (float64x1_t __a) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsqadd_u16 (uint16x4_t __a, int16x4_t __b) - { -- return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0); -+ return __builtin_aarch64_usqaddv4hi_uus (__a, __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vrndmq_f32 (float32x4_t __a) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsqadd_u32 (uint32x2_t __a, int32x2_t __b) - { -- return __builtin_aarch64_floorv4sf (__a); -+ return __builtin_aarch64_usqaddv2si_uus (__a, __b); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vrndmq_f64 (float64x2_t __a) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsqadd_u64 (uint64x1_t __a, int64x1_t __b) - { -- return __builtin_aarch64_floorv2df (__a); -+ return (uint64x1_t) {__builtin_aarch64_usqadddi_uus (__a[0], __b[0])}; - } - --/* vrndn */ -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vrndn_f32 (float32x2_t __a) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsqaddq_u8 (uint8x16_t __a, int8x16_t __b) - { -- return __builtin_aarch64_frintnv2sf (__a); -+ return __builtin_aarch64_usqaddv16qi_uus (__a, __b); - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vrndn_f64 (float64x1_t __a) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsqaddq_u16 (uint16x8_t __a, int16x8_t __b) - { -- return (float64x1_t) {__builtin_aarch64_frintndf (__a[0])}; -+ return __builtin_aarch64_usqaddv8hi_uus (__a, __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vrndnq_f32 (float32x4_t __a) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsqaddq_u32 (uint32x4_t __a, int32x4_t __b) - { -- return __builtin_aarch64_frintnv4sf (__a); -+ return __builtin_aarch64_usqaddv4si_uus (__a, __b); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vrndnq_f64 (float64x2_t __a) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsqaddq_u64 (uint64x2_t __a, int64x2_t __b) - { -- return __builtin_aarch64_frintnv2df (__a); -+ return __builtin_aarch64_usqaddv2di_uus (__a, __b); - } - --/* vrndp */ -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vrndp_f32 (float32x2_t __a) -+__extension__ extern __inline uint8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsqaddb_u8 (uint8_t __a, int8_t __b) - { -- return __builtin_aarch64_ceilv2sf (__a); -+ return __builtin_aarch64_usqaddqi_uus (__a, __b); - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vrndp_f64 (float64x1_t __a) -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsqaddh_u16 (uint16_t __a, int16_t __b) - { -- return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0); -+ return __builtin_aarch64_usqaddhi_uus (__a, __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vrndpq_f32 (float32x4_t __a) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsqadds_u32 (uint32_t __a, int32_t __b) - { -- return __builtin_aarch64_ceilv4sf (__a); -+ return __builtin_aarch64_usqaddsi_uus (__a, __b); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vrndpq_f64 (float64x2_t __a) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsqaddd_u64 (uint64_t __a, int64_t __b) - { -- return __builtin_aarch64_ceilv2df (__a); -+ return __builtin_aarch64_usqadddi_uus (__a, __b); - } - --/* vrndx */ -- --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vrndx_f32 (float32x2_t __a) -+/* vsqrt */ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsqrt_f32 (float32x2_t a) - { -- return __builtin_aarch64_rintv2sf (__a); -+ return __builtin_aarch64_sqrtv2sf (a); - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vrndx_f64 (float64x1_t __a) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsqrtq_f32 (float32x4_t a) - { -- return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0); -+ return __builtin_aarch64_sqrtv4sf (a); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vrndxq_f32 (float32x4_t __a) -+__extension__ extern __inline float64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsqrt_f64 (float64x1_t a) - { -- return __builtin_aarch64_rintv4sf (__a); -+ return (float64x1_t) { __builtin_aarch64_sqrtdf (a[0]) }; - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vrndxq_f64 (float64x2_t __a) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsqrtq_f64 (float64x2_t a) - { -- return __builtin_aarch64_rintv2df (__a); -+ return __builtin_aarch64_sqrtv2df (a); - } - --/* vrshl */ -+/* vsra */ - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vrshl_s8 (int8x8_t __a, int8x8_t __b) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) - { -- return (int8x8_t) __builtin_aarch64_srshlv8qi (__a, __b); -+ return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vrshl_s16 (int16x4_t __a, int16x4_t __b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) - { -- return (int16x4_t) __builtin_aarch64_srshlv4hi (__a, __b); -+ return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vrshl_s32 (int32x2_t __a, int32x2_t __b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) - { -- return (int32x2_t) __builtin_aarch64_srshlv2si (__a, __b); -+ return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vrshl_s64 (int64x1_t __a, int64x1_t __b) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) - { -- return (int64x1_t) {__builtin_aarch64_srshldi (__a[0], __b[0])}; -+ return (int64x1_t) {__builtin_aarch64_ssra_ndi (__a[0], __b[0], __c)}; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vrshl_u8 (uint8x8_t __a, int8x8_t __b) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) - { -- return __builtin_aarch64_urshlv8qi_uus (__a, __b); -+ return __builtin_aarch64_usra_nv8qi_uuus (__a, __b, __c); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vrshl_u16 (uint16x4_t __a, int16x4_t __b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) - { -- return __builtin_aarch64_urshlv4hi_uus (__a, __b); -+ return __builtin_aarch64_usra_nv4hi_uuus (__a, __b, __c); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vrshl_u32 (uint32x2_t __a, int32x2_t __b) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) - { -- return __builtin_aarch64_urshlv2si_uus (__a, __b); -+ return __builtin_aarch64_usra_nv2si_uuus (__a, __b, __c); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vrshl_u64 (uint64x1_t __a, int64x1_t __b) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) - { -- return (uint64x1_t) {__builtin_aarch64_urshldi_uus (__a[0], __b[0])}; -+ return (uint64x1_t) {__builtin_aarch64_usra_ndi_uuus (__a[0], __b[0], __c)}; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vrshlq_s8 (int8x16_t __a, int8x16_t __b) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) - { -- return (int8x16_t) __builtin_aarch64_srshlv16qi (__a, __b); -+ return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vrshlq_s16 (int16x8_t __a, int16x8_t __b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) - { -- return (int16x8_t) __builtin_aarch64_srshlv8hi (__a, __b); -+ return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vrshlq_s32 (int32x4_t __a, int32x4_t __b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) - { -- return (int32x4_t) __builtin_aarch64_srshlv4si (__a, __b); -+ return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vrshlq_s64 (int64x2_t __a, int64x2_t __b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) - { -- return (int64x2_t) __builtin_aarch64_srshlv2di (__a, __b); -+ return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vrshlq_u8 (uint8x16_t __a, int8x16_t __b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) - { -- return __builtin_aarch64_urshlv16qi_uus (__a, __b); -+ return __builtin_aarch64_usra_nv16qi_uuus (__a, __b, __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vrshlq_u16 (uint16x8_t __a, int16x8_t __b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) - { -- return __builtin_aarch64_urshlv8hi_uus (__a, __b); -+ return __builtin_aarch64_usra_nv8hi_uuus (__a, __b, __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vrshlq_u32 (uint32x4_t __a, int32x4_t __b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) - { -- return __builtin_aarch64_urshlv4si_uus (__a, __b); -+ return __builtin_aarch64_usra_nv4si_uuus (__a, __b, __c); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vrshlq_u64 (uint64x2_t __a, int64x2_t __b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) - { -- return __builtin_aarch64_urshlv2di_uus (__a, __b); -+ return __builtin_aarch64_usra_nv2di_uuus (__a, __b, __c); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vrshld_s64 (int64_t __a, int64_t __b) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsrad_n_s64 (int64_t __a, int64_t __b, const int __c) - { -- return __builtin_aarch64_srshldi (__a, __b); -+ return __builtin_aarch64_ssra_ndi (__a, __b, __c); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vrshld_u64 (uint64_t __a, int64_t __b) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c) - { -- return __builtin_aarch64_urshldi_uus (__a, __b); -+ return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c); - } - --/* vrshr */ -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vrshr_n_s8 (int8x8_t __a, const int __b) --{ -- return (int8x8_t) __builtin_aarch64_srshr_nv8qi (__a, __b); --} -+/* vsri */ - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vrshr_n_s16 (int16x4_t __a, const int __b) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) - { -- return (int16x4_t) __builtin_aarch64_srshr_nv4hi (__a, __b); -+ return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vrshr_n_s32 (int32x2_t __a, const int __b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) - { -- return (int32x2_t) __builtin_aarch64_srshr_nv2si (__a, __b); -+ return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vrshr_n_s64 (int64x1_t __a, const int __b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) - { -- return (int64x1_t) {__builtin_aarch64_srshr_ndi (__a[0], __b)}; -+ return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vrshr_n_u8 (uint8x8_t __a, const int __b) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) - { -- return __builtin_aarch64_urshr_nv8qi_uus (__a, __b); -+ return (int64x1_t) {__builtin_aarch64_ssri_ndi (__a[0], __b[0], __c)}; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vrshr_n_u16 (uint16x4_t __a, const int __b) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) - { -- return __builtin_aarch64_urshr_nv4hi_uus (__a, __b); -+ return __builtin_aarch64_usri_nv8qi_uuus (__a, __b, __c); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vrshr_n_u32 (uint32x2_t __a, const int __b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) - { -- return __builtin_aarch64_urshr_nv2si_uus (__a, __b); -+ return __builtin_aarch64_usri_nv4hi_uuus (__a, __b, __c); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vrshr_n_u64 (uint64x1_t __a, const int __b) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) - { -- return (uint64x1_t) {__builtin_aarch64_urshr_ndi_uus (__a[0], __b)}; -+ return __builtin_aarch64_usri_nv2si_uuus (__a, __b, __c); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vrshrq_n_s8 (int8x16_t __a, const int __b) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) - { -- return (int8x16_t) __builtin_aarch64_srshr_nv16qi (__a, __b); -+ return (uint64x1_t) {__builtin_aarch64_usri_ndi_uuus (__a[0], __b[0], __c)}; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vrshrq_n_s16 (int16x8_t __a, const int __b) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) - { -- return (int16x8_t) __builtin_aarch64_srshr_nv8hi (__a, __b); -+ return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vrshrq_n_s32 (int32x4_t __a, const int __b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) - { -- return (int32x4_t) __builtin_aarch64_srshr_nv4si (__a, __b); -+ return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vrshrq_n_s64 (int64x2_t __a, const int __b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) - { -- return (int64x2_t) __builtin_aarch64_srshr_nv2di (__a, __b); -+ return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vrshrq_n_u8 (uint8x16_t __a, const int __b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) - { -- return __builtin_aarch64_urshr_nv16qi_uus (__a, __b); -+ return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vrshrq_n_u16 (uint16x8_t __a, const int __b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) - { -- return __builtin_aarch64_urshr_nv8hi_uus (__a, __b); -+ return __builtin_aarch64_usri_nv16qi_uuus (__a, __b, __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vrshrq_n_u32 (uint32x4_t __a, const int __b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) - { -- return __builtin_aarch64_urshr_nv4si_uus (__a, __b); -+ return __builtin_aarch64_usri_nv8hi_uuus (__a, __b, __c); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vrshrq_n_u64 (uint64x2_t __a, const int __b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) - { -- return __builtin_aarch64_urshr_nv2di_uus (__a, __b); -+ return __builtin_aarch64_usri_nv4si_uuus (__a, __b, __c); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vrshrd_n_s64 (int64_t __a, const int __b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) - { -- return __builtin_aarch64_srshr_ndi (__a, __b); -+ return __builtin_aarch64_usri_nv2di_uuus (__a, __b, __c); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vrshrd_n_u64 (uint64_t __a, const int __b) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsrid_n_s64 (int64_t __a, int64_t __b, const int __c) - { -- return __builtin_aarch64_urshr_ndi_uus (__a, __b); -+ return __builtin_aarch64_ssri_ndi (__a, __b, __c); - } - --/* vrsra */ -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsrid_n_u64 (uint64_t __a, uint64_t __b, const int __c) - { -- return (int8x8_t) __builtin_aarch64_srsra_nv8qi (__a, __b, __c); -+ return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) --{ -- return (int16x4_t) __builtin_aarch64_srsra_nv4hi (__a, __b, __c); --} -+/* vst1 */ - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_f16 (float16_t *__a, float16x4_t __b) - { -- return (int32x2_t) __builtin_aarch64_srsra_nv2si (__a, __b, __c); -+ __builtin_aarch64_st1v4hf (__a, __b); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_f32 (float32_t *a, float32x2_t b) - { -- return (int64x1_t) {__builtin_aarch64_srsra_ndi (__a[0], __b[0], __c)}; -+ __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_f64 (float64_t *a, float64x1_t b) - { -- return __builtin_aarch64_ursra_nv8qi_uuus (__a, __b, __c); -+ *a = b[0]; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_p8 (poly8_t *a, poly8x8_t b) - { -- return __builtin_aarch64_ursra_nv4hi_uuus (__a, __b, __c); -+ __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, -+ (int8x8_t) b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_p16 (poly16_t *a, poly16x4_t b) - { -- return __builtin_aarch64_ursra_nv2si_uuus (__a, __b, __c); -+ __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, -+ (int16x4_t) b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_p64 (poly64_t *a, poly64x1_t b) - { -- return (uint64x1_t) {__builtin_aarch64_ursra_ndi_uuus (__a[0], __b[0], __c)}; -+ *a = b[0]; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_s8 (int8_t *a, int8x8_t b) - { -- return (int8x16_t) __builtin_aarch64_srsra_nv16qi (__a, __b, __c); -+ __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_s16 (int16_t *a, int16x4_t b) - { -- return (int16x8_t) __builtin_aarch64_srsra_nv8hi (__a, __b, __c); -+ __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_s32 (int32_t *a, int32x2_t b) - { -- return (int32x4_t) __builtin_aarch64_srsra_nv4si (__a, __b, __c); -+ __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_s64 (int64_t *a, int64x1_t b) - { -- return (int64x2_t) __builtin_aarch64_srsra_nv2di (__a, __b, __c); -+ *a = b[0]; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_u8 (uint8_t *a, uint8x8_t b) - { -- return __builtin_aarch64_ursra_nv16qi_uuus (__a, __b, __c); -+ __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, -+ (int8x8_t) b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_u16 (uint16_t *a, uint16x4_t b) - { -- return __builtin_aarch64_ursra_nv8hi_uuus (__a, __b, __c); -+ __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, -+ (int16x4_t) b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_u32 (uint32_t *a, uint32x2_t b) - { -- return __builtin_aarch64_ursra_nv4si_uuus (__a, __b, __c); -+ __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, -+ (int32x2_t) b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_u64 (uint64_t *a, uint64x1_t b) - { -- return __builtin_aarch64_ursra_nv2di_uuus (__a, __b, __c); -+ *a = b[0]; - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vrsrad_n_s64 (int64_t __a, int64_t __b, const int __c) -+/* vst1q */ -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_f16 (float16_t *__a, float16x8_t __b) - { -- return __builtin_aarch64_srsra_ndi (__a, __b, __c); -+ __builtin_aarch64_st1v8hf (__a, __b); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vrsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_f32 (float32_t *a, float32x4_t b) - { -- return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c); -+ __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b); - } - --#pragma GCC push_options --#pragma GCC target ("+nothing+crypto") -- --/* vsha1 */ -- --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vsha1cq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_f64 (float64_t *a, float64x2_t b) - { -- return __builtin_aarch64_crypto_sha1cv4si_uuuu (hash_abcd, hash_e, wk); -+ __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vsha1mq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_p8 (poly8_t *a, poly8x16_t b) - { -- return __builtin_aarch64_crypto_sha1mv4si_uuuu (hash_abcd, hash_e, wk); -+ __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, -+ (int8x16_t) b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vsha1pq_u32 (uint32x4_t hash_abcd, uint32_t hash_e, uint32x4_t wk) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_p16 (poly16_t *a, poly16x8_t b) - { -- return __builtin_aarch64_crypto_sha1pv4si_uuuu (hash_abcd, hash_e, wk); -+ __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, -+ (int16x8_t) b); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vsha1h_u32 (uint32_t hash_e) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_p64 (poly64_t *a, poly64x2_t b) - { -- return __builtin_aarch64_crypto_sha1hsi_uu (hash_e); -+ __builtin_aarch64_st1v2di_sp ((__builtin_aarch64_simd_di *) a, -+ (poly64x2_t) b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vsha1su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7, uint32x4_t w8_11) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_s8 (int8_t *a, int8x16_t b) - { -- return __builtin_aarch64_crypto_sha1su0v4si_uuuu (w0_3, w4_7, w8_11); -+ __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vsha1su1q_u32 (uint32x4_t tw0_3, uint32x4_t w12_15) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_s16 (int16_t *a, int16x8_t b) - { -- return __builtin_aarch64_crypto_sha1su1v4si_uuu (tw0_3, w12_15); -+ __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vsha256hq_u32 (uint32x4_t hash_abcd, uint32x4_t hash_efgh, uint32x4_t wk) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_s32 (int32_t *a, int32x4_t b) - { -- return __builtin_aarch64_crypto_sha256hv4si_uuuu (hash_abcd, hash_efgh, wk); -+ __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vsha256h2q_u32 (uint32x4_t hash_efgh, uint32x4_t hash_abcd, uint32x4_t wk) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_s64 (int64_t *a, int64x2_t b) - { -- return __builtin_aarch64_crypto_sha256h2v4si_uuuu (hash_efgh, hash_abcd, wk); -+ __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vsha256su0q_u32 (uint32x4_t w0_3, uint32x4_t w4_7) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_u8 (uint8_t *a, uint8x16_t b) - { -- return __builtin_aarch64_crypto_sha256su0v4si_uuu (w0_3, w4_7); -+ __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, -+ (int8x16_t) b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vsha256su1q_u32 (uint32x4_t tw0_3, uint32x4_t w8_11, uint32x4_t w12_15) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_u16 (uint16_t *a, uint16x8_t b) - { -- return __builtin_aarch64_crypto_sha256su1v4si_uuuu (tw0_3, w8_11, w12_15); -+ __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, -+ (int16x8_t) b); - } - --__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) --vmull_p64 (poly64_t a, poly64_t b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_u32 (uint32_t *a, uint32x4_t b) - { -- return -- __builtin_aarch64_crypto_pmulldi_ppp (a, b); -+ __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, -+ (int32x4_t) b); - } - --__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) --vmull_high_p64 (poly64x2_t a, poly64x2_t b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_u64 (uint64_t *a, uint64x2_t b) - { -- return __builtin_aarch64_crypto_pmullv2di_ppp (a, b); -+ __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, -+ (int64x2_t) b); - } - --#pragma GCC pop_options -+/* vst1_lane */ - --/* vshl */ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_lane_f16 (float16_t *__a, float16x4_t __b, const int __lane) -+{ -+ *__a = __aarch64_vget_lane_any (__b, __lane); -+} - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vshl_n_s8 (int8x8_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_lane_f32 (float32_t *__a, float32x2_t __b, const int __lane) - { -- return (int8x8_t) __builtin_aarch64_ashlv8qi (__a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vshl_n_s16 (int16x4_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_lane_f64 (float64_t *__a, float64x1_t __b, const int __lane) - { -- return (int16x4_t) __builtin_aarch64_ashlv4hi (__a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vshl_n_s32 (int32x2_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_lane_p8 (poly8_t *__a, poly8x8_t __b, const int __lane) - { -- return (int32x2_t) __builtin_aarch64_ashlv2si (__a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vshl_n_s64 (int64x1_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_lane_p16 (poly16_t *__a, poly16x4_t __b, const int __lane) - { -- return (int64x1_t) {__builtin_aarch64_ashldi (__a[0], __b)}; -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vshl_n_u8 (uint8x8_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_lane_p64 (poly64_t *__a, poly64x1_t __b, const int __lane) - { -- return (uint8x8_t) __builtin_aarch64_ashlv8qi ((int8x8_t) __a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vshl_n_u16 (uint16x4_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_lane_s8 (int8_t *__a, int8x8_t __b, const int __lane) - { -- return (uint16x4_t) __builtin_aarch64_ashlv4hi ((int16x4_t) __a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vshl_n_u32 (uint32x2_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_lane_s16 (int16_t *__a, int16x4_t __b, const int __lane) - { -- return (uint32x2_t) __builtin_aarch64_ashlv2si ((int32x2_t) __a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vshl_n_u64 (uint64x1_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_lane_s32 (int32_t *__a, int32x2_t __b, const int __lane) - { -- return (uint64x1_t) {__builtin_aarch64_ashldi ((int64_t) __a[0], __b)}; -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vshlq_n_s8 (int8x16_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_lane_s64 (int64_t *__a, int64x1_t __b, const int __lane) - { -- return (int8x16_t) __builtin_aarch64_ashlv16qi (__a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vshlq_n_s16 (int16x8_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_lane_u8 (uint8_t *__a, uint8x8_t __b, const int __lane) - { -- return (int16x8_t) __builtin_aarch64_ashlv8hi (__a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vshlq_n_s32 (int32x4_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_lane_u16 (uint16_t *__a, uint16x4_t __b, const int __lane) - { -- return (int32x4_t) __builtin_aarch64_ashlv4si (__a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vshlq_n_s64 (int64x2_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_lane_u32 (uint32_t *__a, uint32x2_t __b, const int __lane) - { -- return (int64x2_t) __builtin_aarch64_ashlv2di (__a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vshlq_n_u8 (uint8x16_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1_lane_u64 (uint64_t *__a, uint64x1_t __b, const int __lane) - { -- return (uint8x16_t) __builtin_aarch64_ashlv16qi ((int8x16_t) __a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vshlq_n_u16 (uint16x8_t __a, const int __b) -+/* vst1q_lane */ -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_lane_f16 (float16_t *__a, float16x8_t __b, const int __lane) - { -- return (uint16x8_t) __builtin_aarch64_ashlv8hi ((int16x8_t) __a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vshlq_n_u32 (uint32x4_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_lane_f32 (float32_t *__a, float32x4_t __b, const int __lane) - { -- return (uint32x4_t) __builtin_aarch64_ashlv4si ((int32x4_t) __a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vshlq_n_u64 (uint64x2_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_lane_f64 (float64_t *__a, float64x2_t __b, const int __lane) - { -- return (uint64x2_t) __builtin_aarch64_ashlv2di ((int64x2_t) __a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vshld_n_s64 (int64_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_lane_p8 (poly8_t *__a, poly8x16_t __b, const int __lane) - { -- return __builtin_aarch64_ashldi (__a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vshld_n_u64 (uint64_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_lane_p16 (poly16_t *__a, poly16x8_t __b, const int __lane) - { -- return (uint64_t) __builtin_aarch64_ashldi (__a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vshl_s8 (int8x8_t __a, int8x8_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_lane_p64 (poly64_t *__a, poly64x2_t __b, const int __lane) - { -- return __builtin_aarch64_sshlv8qi (__a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vshl_s16 (int16x4_t __a, int16x4_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_lane_s8 (int8_t *__a, int8x16_t __b, const int __lane) - { -- return __builtin_aarch64_sshlv4hi (__a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vshl_s32 (int32x2_t __a, int32x2_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_lane_s16 (int16_t *__a, int16x8_t __b, const int __lane) - { -- return __builtin_aarch64_sshlv2si (__a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vshl_s64 (int64x1_t __a, int64x1_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_lane_s32 (int32_t *__a, int32x4_t __b, const int __lane) - { -- return (int64x1_t) {__builtin_aarch64_sshldi (__a[0], __b[0])}; -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vshl_u8 (uint8x8_t __a, int8x8_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_lane_s64 (int64_t *__a, int64x2_t __b, const int __lane) - { -- return __builtin_aarch64_ushlv8qi_uus (__a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vshl_u16 (uint16x4_t __a, int16x4_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_lane_u8 (uint8_t *__a, uint8x16_t __b, const int __lane) - { -- return __builtin_aarch64_ushlv4hi_uus (__a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vshl_u32 (uint32x2_t __a, int32x2_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_lane_u16 (uint16_t *__a, uint16x8_t __b, const int __lane) - { -- return __builtin_aarch64_ushlv2si_uus (__a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vshl_u64 (uint64x1_t __a, int64x1_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_lane_u32 (uint32_t *__a, uint32x4_t __b, const int __lane) - { -- return (uint64x1_t) {__builtin_aarch64_ushldi_uus (__a[0], __b[0])}; -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vshlq_s8 (int8x16_t __a, int8x16_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst1q_lane_u64 (uint64_t *__a, uint64x2_t __b, const int __lane) - { -- return __builtin_aarch64_sshlv16qi (__a, __b); -+ *__a = __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vshlq_s16 (int16x8_t __a, int16x8_t __b) -+/* vstn */ -+ -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_s64 (int64_t * __a, int64x1x2_t val) - { -- return __builtin_aarch64_sshlv8hi (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ int64x2x2_t temp; -+ temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); -+ temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1); -+ __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vshlq_s32 (int32x4_t __a, int32x4_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_u64 (uint64_t * __a, uint64x1x2_t val) - { -- return __builtin_aarch64_sshlv4si (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ uint64x2x2_t temp; -+ temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1); -+ __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vshlq_s64 (int64x2_t __a, int64x2_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_f64 (float64_t * __a, float64x1x2_t val) - { -- return __builtin_aarch64_sshlv2di (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ float64x2x2_t temp; -+ temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1); -+ __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vshlq_u8 (uint8x16_t __a, int8x16_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_s8 (int8_t * __a, int8x8x2_t val) - { -- return __builtin_aarch64_ushlv16qi_uus (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ int8x16x2_t temp; -+ temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); -+ temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); -+ __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vshlq_u16 (uint16x8_t __a, int16x8_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_p8 (poly8_t * __a, poly8x8x2_t val) - { -- return __builtin_aarch64_ushlv8hi_uus (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ poly8x16x2_t temp; -+ temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); -+ __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vshlq_u32 (uint32x4_t __a, int32x4_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_s16 (int16_t * __a, int16x4x2_t val) - { -- return __builtin_aarch64_ushlv4si_uus (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ int16x8x2_t temp; -+ temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); -+ temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); -+ __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vshlq_u64 (uint64x2_t __a, int64x2_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_p16 (poly16_t * __a, poly16x4x2_t val) - { -- return __builtin_aarch64_ushlv2di_uus (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ poly16x8x2_t temp; -+ temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); -+ __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vshld_s64 (int64_t __a, int64_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_s32 (int32_t * __a, int32x2x2_t val) - { -- return __builtin_aarch64_sshldi (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ int32x4x2_t temp; -+ temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); -+ temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1); -+ __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vshld_u64 (uint64_t __a, uint64_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_u8 (uint8_t * __a, uint8x8x2_t val) - { -- return __builtin_aarch64_ushldi_uus (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ uint8x16x2_t temp; -+ temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); -+ __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vshll_high_n_s8 (int8x16_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_u16 (uint16_t * __a, uint16x4x2_t val) - { -- return __builtin_aarch64_sshll2_nv16qi (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ uint16x8x2_t temp; -+ temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); -+ __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vshll_high_n_s16 (int16x8_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_u32 (uint32_t * __a, uint32x2x2_t val) - { -- return __builtin_aarch64_sshll2_nv8hi (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ uint32x4x2_t temp; -+ temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1); -+ __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vshll_high_n_s32 (int32x4_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_f16 (float16_t * __a, float16x4x2_t val) - { -- return __builtin_aarch64_sshll2_nv4si (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ float16x8x2_t temp; -+ temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[1], 1); -+ __builtin_aarch64_st2v4hf (__a, __o); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vshll_high_n_u8 (uint8x16_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_f32 (float32_t * __a, float32x2x2_t val) - { -- return (uint16x8_t) __builtin_aarch64_ushll2_nv16qi ((int8x16_t) __a, __b); -+ __builtin_aarch64_simd_oi __o; -+ float32x4x2_t temp; -+ temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1); -+ __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vshll_high_n_u16 (uint16x8_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2_p64 (poly64_t * __a, poly64x1x2_t val) - { -- return (uint32x4_t) __builtin_aarch64_ushll2_nv8hi ((int16x8_t) __a, __b); -+ __builtin_aarch64_simd_oi __o; -+ poly64x2x2_t temp; -+ temp.val[0] = vcombine_p64 (val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_p64 (val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, -+ (poly64x2_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, -+ (poly64x2_t) temp.val[1], 1); -+ __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vshll_high_n_u32 (uint32x4_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_s8 (int8_t * __a, int8x16x2_t val) - { -- return (uint64x2_t) __builtin_aarch64_ushll2_nv4si ((int32x4_t) __a, __b); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); -+ __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vshll_n_s8 (int8x8_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_p8 (poly8_t * __a, poly8x16x2_t val) - { -- return __builtin_aarch64_sshll_nv8qi (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); -+ __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vshll_n_s16 (int16x4_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_s16 (int16_t * __a, int16x8x2_t val) - { -- return __builtin_aarch64_sshll_nv4hi (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); -+ __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vshll_n_s32 (int32x2_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_p16 (poly16_t * __a, poly16x8x2_t val) - { -- return __builtin_aarch64_sshll_nv2si (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); -+ __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vshll_n_u8 (uint8x8_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_s32 (int32_t * __a, int32x4x2_t val) - { -- return __builtin_aarch64_ushll_nv8qi_uus (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1); -+ __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vshll_n_u16 (uint16x4_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_s64 (int64_t * __a, int64x2x2_t val) - { -- return __builtin_aarch64_ushll_nv4hi_uus (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1); -+ __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vshll_n_u32 (uint32x2_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_u8 (uint8_t * __a, uint8x16x2_t val) - { -- return __builtin_aarch64_ushll_nv2si_uus (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); -+ __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - --/* vshr */ -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vshr_n_s8 (int8x8_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_u16 (uint16_t * __a, uint16x8x2_t val) - { -- return (int8x8_t) __builtin_aarch64_ashrv8qi (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); -+ __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vshr_n_s16 (int16x4_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_u32 (uint32_t * __a, uint32x4x2_t val) - { -- return (int16x4_t) __builtin_aarch64_ashrv4hi (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1); -+ __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vshr_n_s32 (int32x2_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_u64 (uint64_t * __a, uint64x2x2_t val) - { -- return (int32x2_t) __builtin_aarch64_ashrv2si (__a, __b); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1); -+ __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vshr_n_s64 (int64x1_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_f16 (float16_t * __a, float16x8x2_t val) - { -- return (int64x1_t) {__builtin_aarch64_ashr_simddi (__a[0], __b)}; -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[1], 1); -+ __builtin_aarch64_st2v8hf (__a, __o); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vshr_n_u8 (uint8x8_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_f32 (float32_t * __a, float32x4x2_t val) - { -- return (uint8x8_t) __builtin_aarch64_lshrv8qi ((int8x8_t) __a, __b); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1); -+ __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vshr_n_u16 (uint16x4_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_f64 (float64_t * __a, float64x2x2_t val) - { -- return (uint16x4_t) __builtin_aarch64_lshrv4hi ((int16x4_t) __a, __b); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1); -+ __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vshr_n_u32 (uint32x2_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst2q_p64 (poly64_t * __a, poly64x2x2_t val) - { -- return (uint32x2_t) __builtin_aarch64_lshrv2si ((int32x2_t) __a, __b); -+ __builtin_aarch64_simd_oi __o; -+ __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, -+ (poly64x2_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv2di_ssps (__o, -+ (poly64x2_t) val.val[1], 1); -+ __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vshr_n_u64 (uint64x1_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3_s64 (int64_t * __a, int64x1x3_t val) - { -- return (uint64x1_t) {__builtin_aarch64_lshr_simddi_uus ( __a[0], __b)}; -+ __builtin_aarch64_simd_ci __o; -+ int64x2x3_t temp; -+ temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); -+ temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); -+ temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2); -+ __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vshrq_n_s8 (int8x16_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3_u64 (uint64_t * __a, uint64x1x3_t val) - { -- return (int8x16_t) __builtin_aarch64_ashrv16qi (__a, __b); -+ __builtin_aarch64_simd_ci __o; -+ uint64x2x3_t temp; -+ temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2); -+ __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vshrq_n_s16 (int16x8_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3_f64 (float64_t * __a, float64x1x3_t val) - { -- return (int16x8_t) __builtin_aarch64_ashrv8hi (__a, __b); -+ __builtin_aarch64_simd_ci __o; -+ float64x2x3_t temp; -+ temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2); -+ __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vshrq_n_s32 (int32x4_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3_s8 (int8_t * __a, int8x8x3_t val) - { -- return (int32x4_t) __builtin_aarch64_ashrv4si (__a, __b); -+ __builtin_aarch64_simd_ci __o; -+ int8x16x3_t temp; -+ temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); -+ temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); -+ temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); -+ __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vshrq_n_s64 (int64x2_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3_p8 (poly8_t * __a, poly8x8x3_t val) - { -- return (int64x2_t) __builtin_aarch64_ashrv2di (__a, __b); -+ __builtin_aarch64_simd_ci __o; -+ poly8x16x3_t temp; -+ temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); -+ __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vshrq_n_u8 (uint8x16_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3_s16 (int16_t * __a, int16x4x3_t val) - { -- return (uint8x16_t) __builtin_aarch64_lshrv16qi ((int8x16_t) __a, __b); -+ __builtin_aarch64_simd_ci __o; -+ int16x8x3_t temp; -+ temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); -+ temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); -+ temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); -+ __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vshrq_n_u16 (uint16x8_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3_p16 (poly16_t * __a, poly16x4x3_t val) - { -- return (uint16x8_t) __builtin_aarch64_lshrv8hi ((int16x8_t) __a, __b); -+ __builtin_aarch64_simd_ci __o; -+ poly16x8x3_t temp; -+ temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); -+ __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vshrq_n_u32 (uint32x4_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3_s32 (int32_t * __a, int32x2x3_t val) - { -- return (uint32x4_t) __builtin_aarch64_lshrv4si ((int32x4_t) __a, __b); -+ __builtin_aarch64_simd_ci __o; -+ int32x4x3_t temp; -+ temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); -+ temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); -+ temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2); -+ __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vshrq_n_u64 (uint64x2_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3_u8 (uint8_t * __a, uint8x8x3_t val) - { -- return (uint64x2_t) __builtin_aarch64_lshrv2di ((int64x2_t) __a, __b); -+ __builtin_aarch64_simd_ci __o; -+ uint8x16x3_t temp; -+ temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); -+ __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vshrd_n_s64 (int64_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3_u16 (uint16_t * __a, uint16x4x3_t val) - { -- return __builtin_aarch64_ashr_simddi (__a, __b); -+ __builtin_aarch64_simd_ci __o; -+ uint16x8x3_t temp; -+ temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); -+ __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vshrd_n_u64 (uint64_t __a, const int __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3_u32 (uint32_t * __a, uint32x2x3_t val) - { -- return __builtin_aarch64_lshr_simddi_uus (__a, __b); -+ __builtin_aarch64_simd_ci __o; -+ uint32x4x3_t temp; -+ temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2); -+ __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o); - } - --/* vsli */ -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3_f16 (float16_t * __a, float16x4x3_t val) - { -- return (int8x8_t) __builtin_aarch64_ssli_nv8qi (__a, __b, __c); -+ __builtin_aarch64_simd_ci __o; -+ float16x8x3_t temp; -+ temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[2], 2); -+ __builtin_aarch64_st3v4hf ((__builtin_aarch64_simd_hf *) __a, __o); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3_f32 (float32_t * __a, float32x2x3_t val) - { -- return (int16x4_t) __builtin_aarch64_ssli_nv4hi (__a, __b, __c); -+ __builtin_aarch64_simd_ci __o; -+ float32x4x3_t temp; -+ temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2); -+ __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3_p64 (poly64_t * __a, poly64x1x3_t val) - { -- return (int32x2_t) __builtin_aarch64_ssli_nv2si (__a, __b, __c); -+ __builtin_aarch64_simd_ci __o; -+ poly64x2x3_t temp; -+ temp.val[0] = vcombine_p64 (val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_p64 (val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ temp.val[2] = vcombine_p64 (val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregciv2di_ssps (__o, -+ (poly64x2_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv2di_ssps (__o, -+ (poly64x2_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv2di_ssps (__o, -+ (poly64x2_t) temp.val[2], 2); -+ __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3q_s8 (int8_t * __a, int8x16x3_t val) - { -- return (int64x1_t) {__builtin_aarch64_ssli_ndi (__a[0], __b[0], __c)}; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); -+ __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3q_p8 (poly8_t * __a, poly8x16x3_t val) - { -- return __builtin_aarch64_usli_nv8qi_uuus (__a, __b, __c); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); -+ __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3q_s16 (int16_t * __a, int16x8x3_t val) - { -- return __builtin_aarch64_usli_nv4hi_uuus (__a, __b, __c); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); -+ __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3q_p16 (poly16_t * __a, poly16x8x3_t val) - { -- return __builtin_aarch64_usli_nv2si_uuus (__a, __b, __c); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); -+ __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3q_s32 (int32_t * __a, int32x4x3_t val) - { -- return (uint64x1_t) {__builtin_aarch64_usli_ndi_uuus (__a[0], __b[0], __c)}; -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2); -+ __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3q_s64 (int64_t * __a, int64x2x3_t val) - { -- return (int8x16_t) __builtin_aarch64_ssli_nv16qi (__a, __b, __c); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2); -+ __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3q_u8 (uint8_t * __a, uint8x16x3_t val) - { -- return (int16x8_t) __builtin_aarch64_ssli_nv8hi (__a, __b, __c); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); -+ __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3q_u16 (uint16_t * __a, uint16x8x3_t val) - { -- return (int32x4_t) __builtin_aarch64_ssli_nv4si (__a, __b, __c); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); -+ __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3q_u32 (uint32_t * __a, uint32x4x3_t val) - { -- return (int64x2_t) __builtin_aarch64_ssli_nv2di (__a, __b, __c); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2); -+ __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3q_u64 (uint64_t * __a, uint64x2x3_t val) - { -- return __builtin_aarch64_usli_nv16qi_uuus (__a, __b, __c); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2); -+ __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3q_f16 (float16_t * __a, float16x8x3_t val) - { -- return __builtin_aarch64_usli_nv8hi_uuus (__a, __b, __c); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[2], 2); -+ __builtin_aarch64_st3v8hf ((__builtin_aarch64_simd_hf *) __a, __o); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3q_f32 (float32_t * __a, float32x4x3_t val) - { -- return __builtin_aarch64_usli_nv4si_uuus (__a, __b, __c); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2); -+ __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3q_f64 (float64_t * __a, float64x2x3_t val) - { -- return __builtin_aarch64_usli_nv2di_uuus (__a, __b, __c); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2); -+ __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vslid_n_s64 (int64_t __a, int64_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst3q_p64 (poly64_t * __a, poly64x2x3_t val) - { -- return __builtin_aarch64_ssli_ndi (__a, __b, __c); -+ __builtin_aarch64_simd_ci __o; -+ __o = __builtin_aarch64_set_qregciv2di_ssps (__o, -+ (poly64x2_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregciv2di_ssps (__o, -+ (poly64x2_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregciv2di_ssps (__o, -+ (poly64x2_t) val.val[2], 2); -+ __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vslid_n_u64 (uint64_t __a, uint64_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4_s64 (int64_t * __a, int64x1x4_t val) - { -- return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c); -+ __builtin_aarch64_simd_xi __o; -+ int64x2x4_t temp; -+ temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); -+ temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); -+ temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); -+ temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3); -+ __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o); - } - --/* vsqadd */ -- --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vsqadd_u8 (uint8x8_t __a, int8x8_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4_u64 (uint64_t * __a, uint64x1x4_t val) - { -- return __builtin_aarch64_usqaddv8qi_uus (__a, __b); -+ __builtin_aarch64_simd_xi __o; -+ uint64x2x4_t temp; -+ temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3); -+ __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vsqadd_u16 (uint16x4_t __a, int16x4_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4_f64 (float64_t * __a, float64x1x4_t val) - { -- return __builtin_aarch64_usqaddv4hi_uus (__a, __b); -+ __builtin_aarch64_simd_xi __o; -+ float64x2x4_t temp; -+ temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3); -+ __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vsqadd_u32 (uint32x2_t __a, int32x2_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4_s8 (int8_t * __a, int8x8x4_t val) - { -- return __builtin_aarch64_usqaddv2si_uus (__a, __b); -+ __builtin_aarch64_simd_xi __o; -+ int8x16x4_t temp; -+ temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); -+ temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); -+ temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); -+ temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3); -+ __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vsqadd_u64 (uint64x1_t __a, int64x1_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4_p8 (poly8_t * __a, poly8x8x4_t val) - { -- return (uint64x1_t) {__builtin_aarch64_usqadddi_uus (__a[0], __b[0])}; -+ __builtin_aarch64_simd_xi __o; -+ poly8x16x4_t temp; -+ temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3); -+ __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vsqaddq_u8 (uint8x16_t __a, int8x16_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4_s16 (int16_t * __a, int16x4x4_t val) - { -- return __builtin_aarch64_usqaddv16qi_uus (__a, __b); -+ __builtin_aarch64_simd_xi __o; -+ int16x8x4_t temp; -+ temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); -+ temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); -+ temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); -+ temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3); -+ __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vsqaddq_u16 (uint16x8_t __a, int16x8_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4_p16 (poly16_t * __a, poly16x4x4_t val) - { -- return __builtin_aarch64_usqaddv8hi_uus (__a, __b); -+ __builtin_aarch64_simd_xi __o; -+ poly16x8x4_t temp; -+ temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3); -+ __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vsqaddq_u32 (uint32x4_t __a, int32x4_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4_s32 (int32_t * __a, int32x2x4_t val) - { -- return __builtin_aarch64_usqaddv4si_uus (__a, __b); -+ __builtin_aarch64_simd_xi __o; -+ int32x4x4_t temp; -+ temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); -+ temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); -+ temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); -+ temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3); -+ __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vsqaddq_u64 (uint64x2_t __a, int64x2_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4_u8 (uint8_t * __a, uint8x8x4_t val) - { -- return __builtin_aarch64_usqaddv2di_uus (__a, __b); -+ __builtin_aarch64_simd_xi __o; -+ uint8x16x4_t temp; -+ temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3); -+ __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - --__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) --vsqaddb_u8 (uint8_t __a, int8_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4_u16 (uint16_t * __a, uint16x4x4_t val) - { -- return __builtin_aarch64_usqaddqi_uus (__a, __b); -+ __builtin_aarch64_simd_xi __o; -+ uint16x8x4_t temp; -+ temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3); -+ __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) --vsqaddh_u16 (uint16_t __a, int16_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4_u32 (uint32_t * __a, uint32x2x4_t val) - { -- return __builtin_aarch64_usqaddhi_uus (__a, __b); -+ __builtin_aarch64_simd_xi __o; -+ uint32x4x4_t temp; -+ temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3); -+ __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vsqadds_u32 (uint32_t __a, int32_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4_f16 (float16_t * __a, float16x4x4_t val) - { -- return __builtin_aarch64_usqaddsi_uus (__a, __b); -+ __builtin_aarch64_simd_xi __o; -+ float16x8x4_t temp; -+ temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ temp.val[3] = vcombine_f16 (val.val[3], vcreate_f16 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[3], 3); -+ __builtin_aarch64_st4v4hf ((__builtin_aarch64_simd_hf *) __a, __o); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vsqaddd_u64 (uint64_t __a, int64_t __b) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4_f32 (float32_t * __a, float32x2x4_t val) - { -- return __builtin_aarch64_usqadddi_uus (__a, __b); -+ __builtin_aarch64_simd_xi __o; -+ float32x4x4_t temp; -+ temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3); -+ __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o); - } - --/* vsqrt */ --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vsqrt_f32 (float32x2_t a) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4_p64 (poly64_t * __a, poly64x1x4_t val) - { -- return __builtin_aarch64_sqrtv2sf (a); -+ __builtin_aarch64_simd_xi __o; -+ poly64x2x4_t temp; -+ temp.val[0] = vcombine_p64 (val.val[0], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ temp.val[1] = vcombine_p64 (val.val[1], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ temp.val[2] = vcombine_p64 (val.val[2], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ temp.val[3] = vcombine_p64 (val.val[3], vcreate_p64 (__AARCH64_UINT64_C (0))); -+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, -+ (poly64x2_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, -+ (poly64x2_t) temp.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, -+ (poly64x2_t) temp.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, -+ (poly64x2_t) temp.val[3], 3); -+ __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vsqrtq_f32 (float32x4_t a) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4q_s8 (int8_t * __a, int8x16x4_t val) - { -- return __builtin_aarch64_sqrtv4sf (a); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3); -+ __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - --__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) --vsqrt_f64 (float64x1_t a) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4q_p8 (poly8_t * __a, poly8x16x4_t val) - { -- return (float64x1_t) { __builtin_aarch64_sqrtdf (a[0]) }; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3); -+ __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vsqrtq_f64 (float64x2_t a) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4q_s16 (int16_t * __a, int16x8x4_t val) - { -- return __builtin_aarch64_sqrtv2df (a); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3); -+ __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - --/* vsra */ -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4q_p16 (poly16_t * __a, poly16x8x4_t val) - { -- return (int8x8_t) __builtin_aarch64_ssra_nv8qi (__a, __b, __c); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3); -+ __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4q_s32 (int32_t * __a, int32x4x4_t val) - { -- return (int16x4_t) __builtin_aarch64_ssra_nv4hi (__a, __b, __c); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3); -+ __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4q_s64 (int64_t * __a, int64x2x4_t val) - { -- return (int32x2_t) __builtin_aarch64_ssra_nv2si (__a, __b, __c); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3); -+ __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4q_u8 (uint8_t * __a, uint8x16x4_t val) - { -- return (int64x1_t) {__builtin_aarch64_ssra_ndi (__a[0], __b[0], __c)}; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3); -+ __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4q_u16 (uint16_t * __a, uint16x8x4_t val) - { -- return __builtin_aarch64_usra_nv8qi_uuus (__a, __b, __c); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3); -+ __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4q_u32 (uint32_t * __a, uint32x4x4_t val) - { -- return __builtin_aarch64_usra_nv4hi_uuus (__a, __b, __c); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3); -+ __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4q_u64 (uint64_t * __a, uint64x2x4_t val) - { -- return __builtin_aarch64_usra_nv2si_uuus (__a, __b, __c); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3); -+ __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4q_f16 (float16_t * __a, float16x8x4_t val) - { -- return (uint64x1_t) {__builtin_aarch64_usra_ndi_uuus (__a[0], __b[0], __c)}; -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[3], 3); -+ __builtin_aarch64_st4v8hf ((__builtin_aarch64_simd_hf *) __a, __o); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4q_f32 (float32_t * __a, float32x4x4_t val) - { -- return (int8x16_t) __builtin_aarch64_ssra_nv16qi (__a, __b, __c); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3); -+ __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4q_f64 (float64_t * __a, float64x2x4_t val) - { -- return (int16x8_t) __builtin_aarch64_ssra_nv8hi (__a, __b, __c); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3); -+ __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vst4q_p64 (poly64_t * __a, poly64x2x4_t val) - { -- return (int32x4_t) __builtin_aarch64_ssra_nv4si (__a, __b, __c); -+ __builtin_aarch64_simd_xi __o; -+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, -+ (poly64x2_t) val.val[0], 0); -+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, -+ (poly64x2_t) val.val[1], 1); -+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, -+ (poly64x2_t) val.val[2], 2); -+ __o = __builtin_aarch64_set_qregxiv2di_ssps (__o, -+ (poly64x2_t) val.val[3], 3); -+ __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) --{ -- return (int64x2_t) __builtin_aarch64_ssra_nv2di (__a, __b, __c); --} -+/* vsub */ - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsubd_s64 (int64_t __a, int64_t __b) - { -- return __builtin_aarch64_usra_nv16qi_uuus (__a, __b, __c); -+ return __a - __b; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsubd_u64 (uint64_t __a, uint64_t __b) - { -- return __builtin_aarch64_usra_nv8hi_uuus (__a, __b, __c); -+ return __a - __b; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) --{ -- return __builtin_aarch64_usra_nv4si_uuus (__a, __b, __c); --} -+/* vtbx1 */ - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx) - { -- return __builtin_aarch64_usra_nv2di_uuus (__a, __b, __c); -+ uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx), -+ vmov_n_u8 (8)); -+ int8x8_t __tbl = vtbl1_s8 (__tab, __idx); -+ -+ return vbsl_s8 (__mask, __tbl, __r); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vsrad_n_s64 (int64_t __a, int64_t __b, const int __c) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx) - { -- return __builtin_aarch64_ssra_ndi (__a, __b, __c); -+ uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8)); -+ uint8x8_t __tbl = vtbl1_u8 (__tab, __idx); -+ -+ return vbsl_u8 (__mask, __tbl, __r); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vsrad_n_u64 (uint64_t __a, uint64_t __b, const int __c) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx) - { -- return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c); -+ uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8)); -+ poly8x8_t __tbl = vtbl1_p8 (__tab, __idx); -+ -+ return vbsl_p8 (__mask, __tbl, __r); - } - --/* vsri */ -+/* vtbx3 */ - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx) - { -- return (int8x8_t) __builtin_aarch64_ssri_nv8qi (__a, __b, __c); -+ uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx), -+ vmov_n_u8 (24)); -+ int8x8_t __tbl = vtbl3_s8 (__tab, __idx); -+ -+ return vbsl_s8 (__mask, __tbl, __r); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx) - { -- return (int16x4_t) __builtin_aarch64_ssri_nv4hi (__a, __b, __c); -+ uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24)); -+ uint8x8_t __tbl = vtbl3_u8 (__tab, __idx); -+ -+ return vbsl_u8 (__mask, __tbl, __r); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx) - { -- return (int32x2_t) __builtin_aarch64_ssri_nv2si (__a, __b, __c); -+ uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24)); -+ poly8x8_t __tbl = vtbl3_p8 (__tab, __idx); -+ -+ return vbsl_p8 (__mask, __tbl, __r); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) -+/* vtbx4 */ -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbx4_s8 (int8x8_t __r, int8x8x4_t __tab, int8x8_t __idx) - { -- return (int64x1_t) {__builtin_aarch64_ssri_ndi (__a[0], __b[0], __c)}; -+ int8x8_t result; -+ int8x16x2_t temp; -+ __builtin_aarch64_simd_oi __o; -+ temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]); -+ temp.val[1] = vcombine_s8 (__tab.val[2], __tab.val[3]); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, -+ (int8x16_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, -+ (int8x16_t) temp.val[1], 1); -+ result = __builtin_aarch64_tbx4v8qi (__r, __o, __idx); -+ return result; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbx4_u8 (uint8x8_t __r, uint8x8x4_t __tab, uint8x8_t __idx) - { -- return __builtin_aarch64_usri_nv8qi_uuus (__a, __b, __c); -+ uint8x8_t result; -+ uint8x16x2_t temp; -+ __builtin_aarch64_simd_oi __o; -+ temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]); -+ temp.val[1] = vcombine_u8 (__tab.val[2], __tab.val[3]); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, -+ (int8x16_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, -+ (int8x16_t) temp.val[1], 1); -+ result = (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, -+ (int8x8_t)__idx); -+ return result; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtbx4_p8 (poly8x8_t __r, poly8x8x4_t __tab, uint8x8_t __idx) - { -- return __builtin_aarch64_usri_nv4hi_uuus (__a, __b, __c); -+ poly8x8_t result; -+ poly8x16x2_t temp; -+ __builtin_aarch64_simd_oi __o; -+ temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]); -+ temp.val[1] = vcombine_p8 (__tab.val[2], __tab.val[3]); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, -+ (int8x16_t) temp.val[0], 0); -+ __o = __builtin_aarch64_set_qregoiv16qi (__o, -+ (int8x16_t) temp.val[1], 1); -+ result = (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, -+ (int8x8_t)__idx); -+ return result; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) -+/* vtrn */ -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1_f16 (float16x4_t __a, float16x4_t __b) - { -- return __builtin_aarch64_usri_nv2si_uuus (__a, __b, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6}); -+#endif - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1_f32 (float32x2_t __a, float32x2_t __b) - { -- return (uint64x1_t) {__builtin_aarch64_usri_ndi_uuus (__a[0], __b[0], __c)}; -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+#endif - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1_p8 (poly8x8_t __a, poly8x8_t __b) - { -- return (int8x16_t) __builtin_aarch64_ssri_nv16qi (__a, __b, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); -+#endif - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1_p16 (poly16x4_t __a, poly16x4_t __b) - { -- return (int16x8_t) __builtin_aarch64_ssri_nv8hi (__a, __b, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6}); -+#endif - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1_s8 (int8x8_t __a, int8x8_t __b) - { -- return (int32x4_t) __builtin_aarch64_ssri_nv4si (__a, __b, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); -+#endif - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1_s16 (int16x4_t __a, int16x4_t __b) - { -- return (int64x2_t) __builtin_aarch64_ssri_nv2di (__a, __b, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6}); -+#endif - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1_s32 (int32x2_t __a, int32x2_t __b) - { -- return __builtin_aarch64_usri_nv16qi_uuus (__a, __b, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+#endif - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1_u8 (uint8x8_t __a, uint8x8_t __b) - { -- return __builtin_aarch64_usri_nv8hi_uuus (__a, __b, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); -+#endif - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1_u16 (uint16x4_t __a, uint16x4_t __b) - { -- return __builtin_aarch64_usri_nv4si_uuus (__a, __b, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6}); -+#endif - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1_u32 (uint32x2_t __a, uint32x2_t __b) - { -- return __builtin_aarch64_usri_nv2di_uuus (__a, __b, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+#endif - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vsrid_n_s64 (int64_t __a, int64_t __b, const int __c) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1q_f16 (float16x8_t __a, float16x8_t __b) - { -- return __builtin_aarch64_ssri_ndi (__a, __b, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); -+#endif - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vsrid_n_u64 (uint64_t __a, uint64_t __b, const int __c) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1q_f32 (float32x4_t __a, float32x4_t __b) - { -- return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6}); -+#endif - } - --/* vst1 */ -- --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_f16 (float16_t *__a, float16x4_t __b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1q_f64 (float64x2_t __a, float64x2_t __b) - { -- __builtin_aarch64_st1v4hf (__a, __b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_f32 (float32_t *a, float32x2_t b) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1q_p8 (poly8x16_t __a, poly8x16_t __b) - { -- __builtin_aarch64_st1v2sf ((__builtin_aarch64_simd_sf *) a, b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_f64 (float64_t *a, float64x1_t b) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1q_p16 (poly16x8_t __a, poly16x8_t __b) - { -- *a = b[0]; -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_p8 (poly8_t *a, poly8x8_t b) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1q_s8 (int8x16_t __a, int8x16_t __b) - { -- __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, -- (int8x8_t) b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_p16 (poly16_t *a, poly16x4_t b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1q_s16 (int16x8_t __a, int16x8_t __b) - { -- __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, -- (int16x4_t) b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_s8 (int8_t *a, int8x8_t b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1q_s32 (int32x4_t __a, int32x4_t __b) - { -- __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_s16 (int16_t *a, int16x4_t b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1q_s64 (int64x2_t __a, int64x2_t __b) - { -- __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_s32 (int32_t *a, int32x2_t b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1q_u8 (uint8x16_t __a, uint8x16_t __b) - { -- __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_s64 (int64_t *a, int64x1_t b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1q_u16 (uint16x8_t __a, uint16x8_t __b) - { -- *a = b[0]; -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_u8 (uint8_t *a, uint8x8_t b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b) - { -- __builtin_aarch64_st1v8qi ((__builtin_aarch64_simd_qi *) a, -- (int8x8_t) b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_u16 (uint16_t *a, uint16x4_t b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b) - { -- __builtin_aarch64_st1v4hi ((__builtin_aarch64_simd_hi *) a, -- (int16x4_t) b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_u32 (uint32_t *a, uint32x2_t b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2_f16 (float16x4_t __a, float16x4_t __b) - { -- __builtin_aarch64_st1v2si ((__builtin_aarch64_simd_si *) a, -- (int32x2_t) b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_u64 (uint64_t *a, uint64x1_t b) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2_f32 (float32x2_t __a, float32x2_t __b) - { -- *a = b[0]; -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); -+#endif - } - --/* vst1q */ -- --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_f16 (float16_t *__a, float16x8_t __b) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2_p8 (poly8x8_t __a, poly8x8_t __b) - { -- __builtin_aarch64_st1v8hf (__a, __b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_f32 (float32_t *a, float32x4_t b) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2_p16 (poly16x4_t __a, poly16x4_t __b) - { -- __builtin_aarch64_st1v4sf ((__builtin_aarch64_simd_sf *) a, b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_f64 (float64_t *a, float64x2_t b) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2_s8 (int8x8_t __a, int8x8_t __b) - { -- __builtin_aarch64_st1v2df ((__builtin_aarch64_simd_df *) a, b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_p8 (poly8_t *a, poly8x16_t b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2_s16 (int16x4_t __a, int16x4_t __b) - { -- __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, -- (int8x16_t) b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_p16 (poly16_t *a, poly16x8_t b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2_s32 (int32x2_t __a, int32x2_t __b) - { -- __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, -- (int16x8_t) b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_s8 (int8_t *a, int8x16_t b) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2_u8 (uint8x8_t __a, uint8x8_t __b) - { -- __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_s16 (int16_t *a, int16x8_t b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2_u16 (uint16x4_t __a, uint16x4_t __b) - { -- __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_s32 (int32_t *a, int32x4_t b) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2_u32 (uint32x2_t __a, uint32x2_t __b) - { -- __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_s64 (int64_t *a, int64x2_t b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2q_f16 (float16x8_t __a, float16x8_t __b) - { -- __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_u8 (uint8_t *a, uint8x16_t b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2q_f32 (float32x4_t __a, float32x4_t __b) - { -- __builtin_aarch64_st1v16qi ((__builtin_aarch64_simd_qi *) a, -- (int8x16_t) b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_u16 (uint16_t *a, uint16x8_t b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2q_f64 (float64x2_t __a, float64x2_t __b) - { -- __builtin_aarch64_st1v8hi ((__builtin_aarch64_simd_hi *) a, -- (int16x8_t) b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_u32 (uint32_t *a, uint32x4_t b) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2q_p8 (poly8x16_t __a, poly8x16_t __b) - { -- __builtin_aarch64_st1v4si ((__builtin_aarch64_simd_si *) a, -- (int32x4_t) b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_u64 (uint64_t *a, uint64x2_t b) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2q_p16 (poly16x8_t __a, poly16x8_t __b) - { -- __builtin_aarch64_st1v2di ((__builtin_aarch64_simd_di *) a, -- (int64x2_t) b); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); -+#endif - } - --/* vst1_lane */ -- --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_lane_f16 (float16_t *__a, float16x4_t __b, const int __lane) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2q_s8 (int8x16_t __a, int8x16_t __b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_lane_f32 (float32_t *__a, float32x2_t __b, const int __lane) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2q_s16 (int16x8_t __a, int16x8_t __b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_lane_f64 (float64_t *__a, float64x1_t __b, const int __lane) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2q_s32 (int32x4_t __a, int32x4_t __b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_lane_p8 (poly8_t *__a, poly8x8_t __b, const int __lane) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2q_s64 (int64x2_t __a, int64x2_t __b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_lane_p16 (poly16_t *__a, poly16x4_t __b, const int __lane) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2q_u8 (uint8x16_t __a, uint8x16_t __b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_lane_s8 (int8_t *__a, int8x8_t __b, const int __lane) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2q_u16 (uint16x8_t __a, uint16x8_t __b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_lane_s16 (int16_t *__a, int16x4_t __b, const int __lane) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2q_u32 (uint32x4_t __a, uint32x4_t __b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_lane_s32 (int32_t *__a, int32x2_t __b, const int __lane) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_lane_s64 (int64_t *__a, int64x1_t __b, const int __lane) -+__extension__ extern __inline float16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn_f16 (float16x4_t __a, float16x4_t __b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+ return (float16x4x2_t) {vtrn1_f16 (__a, __b), vtrn2_f16 (__a, __b)}; - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_lane_u8 (uint8_t *__a, uint8x8_t __b, const int __lane) -+__extension__ extern __inline float32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn_f32 (float32x2_t a, float32x2_t b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+ return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)}; - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_lane_u16 (uint16_t *__a, uint16x4_t __b, const int __lane) -+__extension__ extern __inline poly8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn_p8 (poly8x8_t a, poly8x8_t b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+ return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)}; - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_lane_u32 (uint32_t *__a, uint32x2_t __b, const int __lane) -+__extension__ extern __inline poly16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn_p16 (poly16x4_t a, poly16x4_t b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+ return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)}; - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1_lane_u64 (uint64_t *__a, uint64x1_t __b, const int __lane) -+__extension__ extern __inline int8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn_s8 (int8x8_t a, int8x8_t b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+ return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)}; - } - --/* vst1q_lane */ -- --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_lane_f16 (float16_t *__a, float16x8_t __b, const int __lane) -+__extension__ extern __inline int16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn_s16 (int16x4_t a, int16x4_t b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+ return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)}; - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_lane_f32 (float32_t *__a, float32x4_t __b, const int __lane) -+__extension__ extern __inline int32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn_s32 (int32x2_t a, int32x2_t b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+ return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)}; - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_lane_f64 (float64_t *__a, float64x2_t __b, const int __lane) -+__extension__ extern __inline uint8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn_u8 (uint8x8_t a, uint8x8_t b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+ return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)}; - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_lane_p8 (poly8_t *__a, poly8x16_t __b, const int __lane) -+__extension__ extern __inline uint16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn_u16 (uint16x4_t a, uint16x4_t b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+ return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)}; - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_lane_p16 (poly16_t *__a, poly16x8_t __b, const int __lane) -+__extension__ extern __inline uint32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn_u32 (uint32x2_t a, uint32x2_t b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+ return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)}; - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_lane_s8 (int8_t *__a, int8x16_t __b, const int __lane) -+__extension__ extern __inline float16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrnq_f16 (float16x8_t __a, float16x8_t __b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+ return (float16x8x2_t) {vtrn1q_f16 (__a, __b), vtrn2q_f16 (__a, __b)}; - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_lane_s16 (int16_t *__a, int16x8_t __b, const int __lane) -+__extension__ extern __inline float32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrnq_f32 (float32x4_t a, float32x4_t b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+ return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)}; - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_lane_s32 (int32_t *__a, int32x4_t __b, const int __lane) -+__extension__ extern __inline poly8x16x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrnq_p8 (poly8x16_t a, poly8x16_t b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+ return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)}; - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_lane_s64 (int64_t *__a, int64x2_t __b, const int __lane) -+__extension__ extern __inline poly16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrnq_p16 (poly16x8_t a, poly16x8_t b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+ return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)}; - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_lane_u8 (uint8_t *__a, uint8x16_t __b, const int __lane) -+__extension__ extern __inline int8x16x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrnq_s8 (int8x16_t a, int8x16_t b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+ return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)}; - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_lane_u16 (uint16_t *__a, uint16x8_t __b, const int __lane) -+__extension__ extern __inline int16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrnq_s16 (int16x8_t a, int16x8_t b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+ return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)}; - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_lane_u32 (uint32_t *__a, uint32x4_t __b, const int __lane) -+__extension__ extern __inline int32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrnq_s32 (int32x4_t a, int32x4_t b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+ return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)}; - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst1q_lane_u64 (uint64_t *__a, uint64x2_t __b, const int __lane) -+__extension__ extern __inline uint8x16x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrnq_u8 (uint8x16_t a, uint8x16_t b) - { -- *__a = __aarch64_vget_lane_any (__b, __lane); -+ return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)}; - } - --/* vstn */ -- --__extension__ static __inline void --vst2_s64 (int64_t * __a, int64x1x2_t val) -+__extension__ extern __inline uint16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrnq_u16 (uint16x8_t a, uint16x8_t b) - { -- __builtin_aarch64_simd_oi __o; -- int64x2x2_t temp; -- temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1); -- __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); -+ return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)}; - } - --__extension__ static __inline void --vst2_u64 (uint64_t * __a, uint64x1x2_t val) -+__extension__ extern __inline uint32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrnq_u32 (uint32x4_t a, uint32x4_t b) - { -- __builtin_aarch64_simd_oi __o; -- uint64x2x2_t temp; -- temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) temp.val[1], 1); -- __builtin_aarch64_st2di ((__builtin_aarch64_simd_di *) __a, __o); -+ return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)}; - } - --__extension__ static __inline void --vst2_f64 (float64_t * __a, float64x1x2_t val) --{ -- __builtin_aarch64_simd_oi __o; -- float64x2x2_t temp; -- temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) temp.val[1], 1); -- __builtin_aarch64_st2df ((__builtin_aarch64_simd_df *) __a, __o); --} -+/* vtst */ - --__extension__ static __inline void --vst2_s8 (int8_t * __a, int8x8x2_t val) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtst_s8 (int8x8_t __a, int8x8_t __b) - { -- __builtin_aarch64_simd_oi __o; -- int8x16x2_t temp; -- temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); -- __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); -+ return (uint8x8_t) ((__a & __b) != 0); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst2_p8 (poly8_t * __a, poly8x8x2_t val) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtst_s16 (int16x4_t __a, int16x4_t __b) - { -- __builtin_aarch64_simd_oi __o; -- poly8x16x2_t temp; -- temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); -- __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); -+ return (uint16x4_t) ((__a & __b) != 0); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst2_s16 (int16_t * __a, int16x4x2_t val) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtst_s32 (int32x2_t __a, int32x2_t __b) - { -- __builtin_aarch64_simd_oi __o; -- int16x8x2_t temp; -- temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); -- __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); -+ return (uint32x2_t) ((__a & __b) != 0); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst2_p16 (poly16_t * __a, poly16x4x2_t val) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtst_s64 (int64x1_t __a, int64x1_t __b) - { -- __builtin_aarch64_simd_oi __o; -- poly16x8x2_t temp; -- temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); -- __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); -+ return (uint64x1_t) ((__a & __b) != __AARCH64_INT64_C (0)); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst2_s32 (int32_t * __a, int32x2x2_t val) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtst_u8 (uint8x8_t __a, uint8x8_t __b) - { -- __builtin_aarch64_simd_oi __o; -- int32x4x2_t temp; -- temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1); -- __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); -+ return ((__a & __b) != 0); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst2_u8 (uint8_t * __a, uint8x8x2_t val) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtst_u16 (uint16x4_t __a, uint16x4_t __b) - { -- __builtin_aarch64_simd_oi __o; -- uint8x16x2_t temp; -- temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) temp.val[1], 1); -- __builtin_aarch64_st2v8qi ((__builtin_aarch64_simd_qi *) __a, __o); -+ return ((__a & __b) != 0); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst2_u16 (uint16_t * __a, uint16x4x2_t val) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtst_u32 (uint32x2_t __a, uint32x2_t __b) - { -- __builtin_aarch64_simd_oi __o; -- uint16x8x2_t temp; -- temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) temp.val[1], 1); -- __builtin_aarch64_st2v4hi ((__builtin_aarch64_simd_hi *) __a, __o); -+ return ((__a & __b) != 0); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst2_u32 (uint32_t * __a, uint32x2x2_t val) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtst_u64 (uint64x1_t __a, uint64x1_t __b) - { -- __builtin_aarch64_simd_oi __o; -- uint32x4x2_t temp; -- temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) temp.val[1], 1); -- __builtin_aarch64_st2v2si ((__builtin_aarch64_simd_si *) __a, __o); -+ return ((__a & __b) != __AARCH64_UINT64_C (0)); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst2_f16 (float16_t * __a, float16x4x2_t val) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtstq_s8 (int8x16_t __a, int8x16_t __b) - { -- __builtin_aarch64_simd_oi __o; -- float16x8x2_t temp; -- temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hf (__o, temp.val[1], 1); -- __builtin_aarch64_st2v4hf (__a, __o); -+ return (uint8x16_t) ((__a & __b) != 0); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst2_f32 (float32_t * __a, float32x2x2_t val) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtstq_s16 (int16x8_t __a, int16x8_t __b) - { -- __builtin_aarch64_simd_oi __o; -- float32x4x2_t temp; -- temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) temp.val[1], 1); -- __builtin_aarch64_st2v2sf ((__builtin_aarch64_simd_sf *) __a, __o); -+ return (uint16x8_t) ((__a & __b) != 0); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst2q_s8 (int8_t * __a, int8x16x2_t val) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtstq_s32 (int32x4_t __a, int32x4_t __b) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); -- __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); -+ return (uint32x4_t) ((__a & __b) != 0); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst2q_p8 (poly8_t * __a, poly8x16x2_t val) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtstq_s64 (int64x2_t __a, int64x2_t __b) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); -- __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); -+ return (uint64x2_t) ((__a & __b) != __AARCH64_INT64_C (0)); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst2q_s16 (int16_t * __a, int16x8x2_t val) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtstq_u8 (uint8x16_t __a, uint8x16_t __b) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); -- __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); -+ return ((__a & __b) != 0); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst2q_p16 (poly16_t * __a, poly16x8x2_t val) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtstq_u16 (uint16x8_t __a, uint16x8_t __b) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); -- __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); -+ return ((__a & __b) != 0); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst2q_s32 (int32_t * __a, int32x4x2_t val) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtstq_u32 (uint32x4_t __a, uint32x4_t __b) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1); -- __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o); -+ return ((__a & __b) != 0); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst2q_s64 (int64_t * __a, int64x2x2_t val) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtstq_u64 (uint64x2_t __a, uint64x2_t __b) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1); -- __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); -+ return ((__a & __b) != __AARCH64_UINT64_C (0)); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst2q_u8 (uint8_t * __a, uint8x16x2_t val) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtstd_s64 (int64_t __a, int64_t __b) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t) val.val[1], 1); -- __builtin_aarch64_st2v16qi ((__builtin_aarch64_simd_qi *) __a, __o); -+ return (__a & __b) ? -1ll : 0ll; - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst2q_u16 (uint16_t * __a, uint16x8x2_t val) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtstd_u64 (uint64_t __a, uint64_t __b) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hi (__o, (int16x8_t) val.val[1], 1); -- __builtin_aarch64_st2v8hi ((__builtin_aarch64_simd_hi *) __a, __o); -+ return (__a & __b) ? -1ll : 0ll; - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst2q_u32 (uint32_t * __a, uint32x4x2_t val) -+/* vuqadd */ -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuqadd_s8 (int8x8_t __a, uint8x8_t __b) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv4si (__o, (int32x4_t) val.val[1], 1); -- __builtin_aarch64_st2v4si ((__builtin_aarch64_simd_si *) __a, __o); -+ return __builtin_aarch64_suqaddv8qi_ssu (__a, __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst2q_u64 (uint64_t * __a, uint64x2x2_t val) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuqadd_s16 (int16x4_t __a, uint16x4_t __b) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv2di (__o, (int64x2_t) val.val[1], 1); -- __builtin_aarch64_st2v2di ((__builtin_aarch64_simd_di *) __a, __o); -+ return __builtin_aarch64_suqaddv4hi_ssu (__a, __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst2q_f16 (float16_t * __a, float16x8x2_t val) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuqadd_s32 (int32x2_t __a, uint32x2_t __b) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv8hf (__o, val.val[1], 1); -- __builtin_aarch64_st2v8hf (__a, __o); -+ return __builtin_aarch64_suqaddv2si_ssu (__a, __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst2q_f32 (float32_t * __a, float32x4x2_t val) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuqadd_s64 (int64x1_t __a, uint64x1_t __b) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv4sf (__o, (float32x4_t) val.val[1], 1); -- __builtin_aarch64_st2v4sf ((__builtin_aarch64_simd_sf *) __a, __o); -+ return (int64x1_t) {__builtin_aarch64_suqadddi_ssu (__a[0], __b[0])}; - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst2q_f64 (float64_t * __a, float64x2x2_t val) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuqaddq_s8 (int8x16_t __a, uint8x16_t __b) - { -- __builtin_aarch64_simd_oi __o; -- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv2df (__o, (float64x2_t) val.val[1], 1); -- __builtin_aarch64_st2v2df ((__builtin_aarch64_simd_df *) __a, __o); -+ return __builtin_aarch64_suqaddv16qi_ssu (__a, __b); - } - --__extension__ static __inline void --vst3_s64 (int64_t * __a, int64x1x3_t val) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuqaddq_s16 (int16x8_t __a, uint16x8_t __b) - { -- __builtin_aarch64_simd_ci __o; -- int64x2x3_t temp; -- temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); -- temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2); -- __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); -+ return __builtin_aarch64_suqaddv8hi_ssu (__a, __b); - } - --__extension__ static __inline void --vst3_u64 (uint64_t * __a, uint64x1x3_t val) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuqaddq_s32 (int32x4_t __a, uint32x4_t __b) - { -- __builtin_aarch64_simd_ci __o; -- uint64x2x3_t temp; -- temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) temp.val[2], 2); -- __builtin_aarch64_st3di ((__builtin_aarch64_simd_di *) __a, __o); -+ return __builtin_aarch64_suqaddv4si_ssu (__a, __b); - } - --__extension__ static __inline void --vst3_f64 (float64_t * __a, float64x1x3_t val) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuqaddq_s64 (int64x2_t __a, uint64x2_t __b) - { -- __builtin_aarch64_simd_ci __o; -- float64x2x3_t temp; -- temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) temp.val[2], 2); -- __builtin_aarch64_st3df ((__builtin_aarch64_simd_df *) __a, __o); -+ return __builtin_aarch64_suqaddv2di_ssu (__a, __b); - } - --__extension__ static __inline void --vst3_s8 (int8_t * __a, int8x8x3_t val) -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuqaddb_s8 (int8_t __a, uint8_t __b) - { -- __builtin_aarch64_simd_ci __o; -- int8x16x3_t temp; -- temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); -- temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); -- __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); -+ return __builtin_aarch64_suqaddqi_ssu (__a, __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst3_p8 (poly8_t * __a, poly8x8x3_t val) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuqaddh_s16 (int16_t __a, uint16_t __b) - { -- __builtin_aarch64_simd_ci __o; -- poly8x16x3_t temp; -- temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); -- __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); -+ return __builtin_aarch64_suqaddhi_ssu (__a, __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst3_s16 (int16_t * __a, int16x4x3_t val) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuqadds_s32 (int32_t __a, uint32_t __b) - { -- __builtin_aarch64_simd_ci __o; -- int16x8x3_t temp; -- temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); -- temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); -- __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); -+ return __builtin_aarch64_suqaddsi_ssu (__a, __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst3_p16 (poly16_t * __a, poly16x4x3_t val) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuqaddd_s64 (int64_t __a, uint64_t __b) - { -- __builtin_aarch64_simd_ci __o; -- poly16x8x3_t temp; -- temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); -- __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); -+ return __builtin_aarch64_suqadddi_ssu (__a, __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst3_s32 (int32_t * __a, int32x2x3_t val) -+#define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \ -+ __extension__ extern __inline rettype \ -+ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) \ -+ v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \ -+ { \ -+ return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \ -+ v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \ -+ } -+ -+#define __INTERLEAVE_LIST(op) \ -+ __DEFINTERLEAVE (op, float16x4x2_t, float16x4_t, f16,) \ -+ __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \ -+ __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \ -+ __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \ -+ __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \ -+ __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \ -+ __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \ -+ __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \ -+ __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \ -+ __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \ -+ __DEFINTERLEAVE (op, float16x8x2_t, float16x8_t, f16, q) \ -+ __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \ -+ __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \ -+ __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \ -+ __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \ -+ __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \ -+ __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \ -+ __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \ -+ __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \ -+ __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q) -+ -+/* vuzp */ -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1_f16 (float16x4_t __a, float16x4_t __b) - { -- __builtin_aarch64_simd_ci __o; -- int32x4x3_t temp; -- temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); -- temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2); -- __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst3_u8 (uint8_t * __a, uint8x8x3_t val) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1_f32 (float32x2_t __a, float32x2_t __b) - { -- __builtin_aarch64_simd_ci __o; -- uint8x16x3_t temp; -- temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) temp.val[2], 2); -- __builtin_aarch64_st3v8qi ((__builtin_aarch64_simd_qi *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst3_u16 (uint16_t * __a, uint16x4x3_t val) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1_p8 (poly8x8_t __a, poly8x8_t __b) - { -- __builtin_aarch64_simd_ci __o; -- uint16x8x3_t temp; -- temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) temp.val[2], 2); -- __builtin_aarch64_st3v4hi ((__builtin_aarch64_simd_hi *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst3_u32 (uint32_t * __a, uint32x2x3_t val) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1_p16 (poly16x4_t __a, poly16x4_t __b) - { -- __builtin_aarch64_simd_ci __o; -- uint32x4x3_t temp; -- temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) temp.val[2], 2); -- __builtin_aarch64_st3v2si ((__builtin_aarch64_simd_si *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst3_f16 (float16_t * __a, float16x4x3_t val) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1_s8 (int8x8_t __a, int8x8_t __b) - { -- __builtin_aarch64_simd_ci __o; -- float16x8x3_t temp; -- temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) temp.val[2], 2); -- __builtin_aarch64_st3v4hf ((__builtin_aarch64_simd_hf *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst3_f32 (float32_t * __a, float32x2x3_t val) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1_s16 (int16x4_t __a, int16x4_t __b) - { -- __builtin_aarch64_simd_ci __o; -- float32x4x3_t temp; -- temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) temp.val[2], 2); -- __builtin_aarch64_st3v2sf ((__builtin_aarch64_simd_sf *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst3q_s8 (int8_t * __a, int8x16x3_t val) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1_s32 (int32x2_t __a, int32x2_t __b) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); -- __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst3q_p8 (poly8_t * __a, poly8x16x3_t val) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1_u8 (uint8x8_t __a, uint8x8_t __b) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); -- __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst3q_s16 (int16_t * __a, int16x8x3_t val) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1_u16 (uint16x4_t __a, uint16x4_t __b) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); -- __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst3q_p16 (poly16_t * __a, poly16x8x3_t val) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1_u32 (uint32x2_t __a, uint32x2_t __b) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); -- __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst3q_s32 (int32_t * __a, int32x4x3_t val) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1q_f16 (float16x8_t __a, float16x8_t __b) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2); -- __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst3q_s64 (int64_t * __a, int64x2x3_t val) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1q_f32 (float32x4_t __a, float32x4_t __b) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2); -- __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst3q_u8 (uint8_t * __a, uint8x16x3_t val) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1q_f64 (float64x2_t __a, float64x2_t __b) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv16qi (__o, (int8x16_t) val.val[2], 2); -- __builtin_aarch64_st3v16qi ((__builtin_aarch64_simd_qi *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst3q_u16 (uint16_t * __a, uint16x8x3_t val) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hi (__o, (int16x8_t) val.val[2], 2); -- __builtin_aarch64_st3v8hi ((__builtin_aarch64_simd_hi *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst3q_u32 (uint32_t * __a, uint32x4x3_t val) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv4si (__o, (int32x4_t) val.val[2], 2); -- __builtin_aarch64_st3v4si ((__builtin_aarch64_simd_si *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst3q_u64 (uint64_t * __a, uint64x2x3_t val) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1q_s8 (int8x16_t __a, int8x16_t __b) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv2di (__o, (int64x2_t) val.val[2], 2); -- __builtin_aarch64_st3v2di ((__builtin_aarch64_simd_di *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst3q_f16 (float16_t * __a, float16x8x3_t val) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1q_s16 (int16x8_t __a, int16x8_t __b) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv8hf (__o, (float16x8_t) val.val[2], 2); -- __builtin_aarch64_st3v8hf ((__builtin_aarch64_simd_hf *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst3q_f32 (float32_t * __a, float32x4x3_t val) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1q_s32 (int32x4_t __a, int32x4_t __b) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv4sf (__o, (float32x4_t) val.val[2], 2); -- __builtin_aarch64_st3v4sf ((__builtin_aarch64_simd_sf *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst3q_f64 (float64_t * __a, float64x2x3_t val) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1q_s64 (int64x2_t __a, int64x2_t __b) - { -- __builtin_aarch64_simd_ci __o; -- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregciv2df (__o, (float64x2_t) val.val[2], 2); -- __builtin_aarch64_st3v2df ((__builtin_aarch64_simd_df *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); -+#endif - } - --__extension__ static __inline void --vst4_s64 (int64_t * __a, int64x1x4_t val) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b) - { -- __builtin_aarch64_simd_xi __o; -- int64x2x4_t temp; -- temp.val[0] = vcombine_s64 (val.val[0], vcreate_s64 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s64 (val.val[1], vcreate_s64 (__AARCH64_INT64_C (0))); -- temp.val[2] = vcombine_s64 (val.val[2], vcreate_s64 (__AARCH64_INT64_C (0))); -- temp.val[3] = vcombine_s64 (val.val[3], vcreate_s64 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3); -- __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}); -+#endif - } - --__extension__ static __inline void --vst4_u64 (uint64_t * __a, uint64x1x4_t val) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b) - { -- __builtin_aarch64_simd_xi __o; -- uint64x2x4_t temp; -- temp.val[0] = vcombine_u64 (val.val[0], vcreate_u64 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u64 (val.val[1], vcreate_u64 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_u64 (val.val[2], vcreate_u64 (__AARCH64_UINT64_C (0))); -- temp.val[3] = vcombine_u64 (val.val[3], vcreate_u64 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) temp.val[3], 3); -- __builtin_aarch64_st4di ((__builtin_aarch64_simd_di *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); -+#endif - } - --__extension__ static __inline void --vst4_f64 (float64_t * __a, float64x1x4_t val) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b) - { -- __builtin_aarch64_simd_xi __o; -- float64x2x4_t temp; -- temp.val[0] = vcombine_f64 (val.val[0], vcreate_f64 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f64 (val.val[1], vcreate_f64 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_f64 (val.val[2], vcreate_f64 (__AARCH64_UINT64_C (0))); -- temp.val[3] = vcombine_f64 (val.val[3], vcreate_f64 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) temp.val[3], 3); -- __builtin_aarch64_st4df ((__builtin_aarch64_simd_df *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6}); -+#endif - } - --__extension__ static __inline void --vst4_s8 (int8_t * __a, int8x8x4_t val) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b) - { -- __builtin_aarch64_simd_xi __o; -- int8x16x4_t temp; -- temp.val[0] = vcombine_s8 (val.val[0], vcreate_s8 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s8 (val.val[1], vcreate_s8 (__AARCH64_INT64_C (0))); -- temp.val[2] = vcombine_s8 (val.val[2], vcreate_s8 (__AARCH64_INT64_C (0))); -- temp.val[3] = vcombine_s8 (val.val[3], vcreate_s8 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3); -- __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); -+#endif -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2_f16 (float16x4_t __a, float16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst4_p8 (poly8_t * __a, poly8x8x4_t val) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2_f32 (float32x2_t __a, float32x2_t __b) - { -- __builtin_aarch64_simd_xi __o; -- poly8x16x4_t temp; -- temp.val[0] = vcombine_p8 (val.val[0], vcreate_p8 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_p8 (val.val[1], vcreate_p8 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_p8 (val.val[2], vcreate_p8 (__AARCH64_UINT64_C (0))); -- temp.val[3] = vcombine_p8 (val.val[3], vcreate_p8 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3); -- __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst4_s16 (int16_t * __a, int16x4x4_t val) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2_p8 (poly8x8_t __a, poly8x8_t __b) - { -- __builtin_aarch64_simd_xi __o; -- int16x8x4_t temp; -- temp.val[0] = vcombine_s16 (val.val[0], vcreate_s16 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s16 (val.val[1], vcreate_s16 (__AARCH64_INT64_C (0))); -- temp.val[2] = vcombine_s16 (val.val[2], vcreate_s16 (__AARCH64_INT64_C (0))); -- temp.val[3] = vcombine_s16 (val.val[3], vcreate_s16 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3); -- __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst4_p16 (poly16_t * __a, poly16x4x4_t val) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2_p16 (poly16x4_t __a, poly16x4_t __b) - { -- __builtin_aarch64_simd_xi __o; -- poly16x8x4_t temp; -- temp.val[0] = vcombine_p16 (val.val[0], vcreate_p16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_p16 (val.val[1], vcreate_p16 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_p16 (val.val[2], vcreate_p16 (__AARCH64_UINT64_C (0))); -- temp.val[3] = vcombine_p16 (val.val[3], vcreate_p16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3); -- __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst4_s32 (int32_t * __a, int32x2x4_t val) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2_s8 (int8x8_t __a, int8x8_t __b) - { -- __builtin_aarch64_simd_xi __o; -- int32x4x4_t temp; -- temp.val[0] = vcombine_s32 (val.val[0], vcreate_s32 (__AARCH64_INT64_C (0))); -- temp.val[1] = vcombine_s32 (val.val[1], vcreate_s32 (__AARCH64_INT64_C (0))); -- temp.val[2] = vcombine_s32 (val.val[2], vcreate_s32 (__AARCH64_INT64_C (0))); -- temp.val[3] = vcombine_s32 (val.val[3], vcreate_s32 (__AARCH64_INT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3); -- __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst4_u8 (uint8_t * __a, uint8x8x4_t val) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2_s16 (int16x4_t __a, int16x4_t __b) - { -- __builtin_aarch64_simd_xi __o; -- uint8x16x4_t temp; -- temp.val[0] = vcombine_u8 (val.val[0], vcreate_u8 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u8 (val.val[1], vcreate_u8 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_u8 (val.val[2], vcreate_u8 (__AARCH64_UINT64_C (0))); -- temp.val[3] = vcombine_u8 (val.val[3], vcreate_u8 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) temp.val[3], 3); -- __builtin_aarch64_st4v8qi ((__builtin_aarch64_simd_qi *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst4_u16 (uint16_t * __a, uint16x4x4_t val) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2_s32 (int32x2_t __a, int32x2_t __b) - { -- __builtin_aarch64_simd_xi __o; -- uint16x8x4_t temp; -- temp.val[0] = vcombine_u16 (val.val[0], vcreate_u16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u16 (val.val[1], vcreate_u16 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_u16 (val.val[2], vcreate_u16 (__AARCH64_UINT64_C (0))); -- temp.val[3] = vcombine_u16 (val.val[3], vcreate_u16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) temp.val[3], 3); -- __builtin_aarch64_st4v4hi ((__builtin_aarch64_simd_hi *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst4_u32 (uint32_t * __a, uint32x2x4_t val) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2_u8 (uint8x8_t __a, uint8x8_t __b) - { -- __builtin_aarch64_simd_xi __o; -- uint32x4x4_t temp; -- temp.val[0] = vcombine_u32 (val.val[0], vcreate_u32 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_u32 (val.val[1], vcreate_u32 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_u32 (val.val[2], vcreate_u32 (__AARCH64_UINT64_C (0))); -- temp.val[3] = vcombine_u32 (val.val[3], vcreate_u32 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) temp.val[3], 3); -- __builtin_aarch64_st4v2si ((__builtin_aarch64_simd_si *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst4_f16 (float16_t * __a, float16x4x4_t val) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2_u16 (uint16x4_t __a, uint16x4_t __b) - { -- __builtin_aarch64_simd_xi __o; -- float16x8x4_t temp; -- temp.val[0] = vcombine_f16 (val.val[0], vcreate_f16 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f16 (val.val[1], vcreate_f16 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_f16 (val.val[2], vcreate_f16 (__AARCH64_UINT64_C (0))); -- temp.val[3] = vcombine_f16 (val.val[3], vcreate_f16 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) temp.val[3], 3); -- __builtin_aarch64_st4v4hf ((__builtin_aarch64_simd_hf *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst4_f32 (float32_t * __a, float32x2x4_t val) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2_u32 (uint32x2_t __a, uint32x2_t __b) - { -- __builtin_aarch64_simd_xi __o; -- float32x4x4_t temp; -- temp.val[0] = vcombine_f32 (val.val[0], vcreate_f32 (__AARCH64_UINT64_C (0))); -- temp.val[1] = vcombine_f32 (val.val[1], vcreate_f32 (__AARCH64_UINT64_C (0))); -- temp.val[2] = vcombine_f32 (val.val[2], vcreate_f32 (__AARCH64_UINT64_C (0))); -- temp.val[3] = vcombine_f32 (val.val[3], vcreate_f32 (__AARCH64_UINT64_C (0))); -- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) temp.val[3], 3); -- __builtin_aarch64_st4v2sf ((__builtin_aarch64_simd_sf *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst4q_s8 (int8_t * __a, int8x16x4_t val) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2q_f16 (float16x8_t __a, float16x8_t __b) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3); -- __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst4q_p8 (poly8_t * __a, poly8x16x4_t val) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2q_f32 (float32x4_t __a, float32x4_t __b) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3); -- __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst4q_s16 (int16_t * __a, int16x8x4_t val) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2q_f64 (float64x2_t __a, float64x2_t __b) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3); -- __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst4q_p16 (poly16_t * __a, poly16x8x4_t val) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3); -- __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst4q_s32 (int32_t * __a, int32x4x4_t val) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3); -- __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst4q_s64 (int64_t * __a, int64x2x4_t val) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2q_s8 (int8x16_t __a, int8x16_t __b) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3); -- __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst4q_u8 (uint8_t * __a, uint8x16x4_t val) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2q_s16 (int16x8_t __a, int16x8_t __b) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv16qi (__o, (int8x16_t) val.val[3], 3); -- __builtin_aarch64_st4v16qi ((__builtin_aarch64_simd_qi *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst4q_u16 (uint16_t * __a, uint16x8x4_t val) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2q_s32 (int32x4_t __a, int32x4_t __b) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv8hi (__o, (int16x8_t) val.val[3], 3); -- __builtin_aarch64_st4v8hi ((__builtin_aarch64_simd_hi *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst4q_u32 (uint32_t * __a, uint32x4x4_t val) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2q_s64 (int64x2_t __a, int64x2_t __b) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv4si (__o, (int32x4_t) val.val[3], 3); -- __builtin_aarch64_st4v4si ((__builtin_aarch64_simd_si *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst4q_u64 (uint64_t * __a, uint64x2x4_t val) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv2di (__o, (int64x2_t) val.val[3], 3); -- __builtin_aarch64_st4v2di ((__builtin_aarch64_simd_di *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst4q_f16 (float16_t * __a, float16x8x4_t val) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv8hf (__o, (float16x8_t) val.val[3], 3); -- __builtin_aarch64_st4v8hf ((__builtin_aarch64_simd_hf *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst4q_f32 (float32_t * __a, float32x4x4_t val) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv4sf (__o, (float32x4_t) val.val[3], 3); -- __builtin_aarch64_st4v4sf ((__builtin_aarch64_simd_sf *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7}); -+#endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) --vst4q_f64 (float64_t * __a, float64x2x4_t val) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b) - { -- __builtin_aarch64_simd_xi __o; -- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[0], 0); -- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[1], 1); -- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[2], 2); -- __o = __builtin_aarch64_set_qregxiv2df (__o, (float64x2_t) val.val[3], 3); -- __builtin_aarch64_st4v2df ((__builtin_aarch64_simd_df *) __a, __o); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); -+#endif - } - --/* vsub */ -+__INTERLEAVE_LIST (uzp) - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vsubd_s64 (int64_t __a, int64_t __b) -+/* vzip */ -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1_f16 (float16x4_t __a, float16x4_t __b) - { -- return __a - __b; -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5}); -+#endif - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vsubd_u64 (uint64_t __a, uint64_t __b) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1_f32 (float32x2_t __a, float32x2_t __b) - { -- return __a - __b; -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+#endif - } - --/* vtbx1 */ -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vtbx1_s8 (int8x8_t __r, int8x8_t __tab, int8x8_t __idx) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1_p8 (poly8x8_t __a, poly8x8_t __b) - { -- uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx), -- vmov_n_u8 (8)); -- int8x8_t __tbl = vtbl1_s8 (__tab, __idx); -- -- return vbsl_s8 (__mask, __tbl, __r); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); -+#endif - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vtbx1_u8 (uint8x8_t __r, uint8x8_t __tab, uint8x8_t __idx) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1_p16 (poly16x4_t __a, poly16x4_t __b) - { -- uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8)); -- uint8x8_t __tbl = vtbl1_u8 (__tab, __idx); -- -- return vbsl_u8 (__mask, __tbl, __r); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5}); -+#endif - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vtbx1_p8 (poly8x8_t __r, poly8x8_t __tab, uint8x8_t __idx) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1_s8 (int8x8_t __a, int8x8_t __b) - { -- uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (8)); -- poly8x8_t __tbl = vtbl1_p8 (__tab, __idx); -- -- return vbsl_p8 (__mask, __tbl, __r); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); -+#endif - } - --/* vtbx3 */ -- --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vtbx3_s8 (int8x8_t __r, int8x8x3_t __tab, int8x8_t __idx) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1_s16 (int16x4_t __a, int16x4_t __b) - { -- uint8x8_t __mask = vclt_u8 (vreinterpret_u8_s8 (__idx), -- vmov_n_u8 (24)); -- int8x8_t __tbl = vtbl3_s8 (__tab, __idx); -- -- return vbsl_s8 (__mask, __tbl, __r); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5}); -+#endif - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vtbx3_u8 (uint8x8_t __r, uint8x8x3_t __tab, uint8x8_t __idx) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1_s32 (int32x2_t __a, int32x2_t __b) - { -- uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24)); -- uint8x8_t __tbl = vtbl3_u8 (__tab, __idx); -- -- return vbsl_u8 (__mask, __tbl, __r); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+#endif - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vtbx3_p8 (poly8x8_t __r, poly8x8x3_t __tab, uint8x8_t __idx) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1_u8 (uint8x8_t __a, uint8x8_t __b) - { -- uint8x8_t __mask = vclt_u8 (__idx, vmov_n_u8 (24)); -- poly8x8_t __tbl = vtbl3_p8 (__tab, __idx); -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); -+#endif -+} - -- return vbsl_p8 (__mask, __tbl, __r); -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1_u16 (uint16x4_t __a, uint16x4_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5}); -+#endif - } - --/* vtbx4 */ -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1_u32 (uint32x2_t __a, uint32x2_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+#endif -+} - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vtbx4_s8 (int8x8_t __r, int8x8x4_t __tab, int8x8_t __idx) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1q_f16 (float16x8_t __a, float16x8_t __b) - { -- int8x8_t result; -- int8x16x2_t temp; -- __builtin_aarch64_simd_oi __o; -- temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]); -- temp.val[1] = vcombine_s8 (__tab.val[2], __tab.val[3]); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[1], 1); -- result = __builtin_aarch64_tbx4v8qi (__r, __o, __idx); -- return result; -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, -+ (uint16x8_t) {12, 4, 13, 5, 14, 6, 15, 7}); -+#else -+ return __builtin_shuffle (__a, __b, -+ (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); -+#endif - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vtbx4_u8 (uint8x8_t __r, uint8x8x4_t __tab, uint8x8_t __idx) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1q_f32 (float32x4_t __a, float32x4_t __b) - { -- uint8x8_t result; -- uint8x16x2_t temp; -- __builtin_aarch64_simd_oi __o; -- temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]); -- temp.val[1] = vcombine_u8 (__tab.val[2], __tab.val[3]); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[1], 1); -- result = (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, -- (int8x8_t)__idx); -- return result; -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3}); -+#else -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5}); -+#endif - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vtbx4_p8 (poly8x8_t __r, poly8x8x4_t __tab, uint8x8_t __idx) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1q_f64 (float64x2_t __a, float64x2_t __b) - { -- poly8x8_t result; -- poly8x16x2_t temp; -- __builtin_aarch64_simd_oi __o; -- temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]); -- temp.val[1] = vcombine_p8 (__tab.val[2], __tab.val[3]); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[0], 0); -- __o = __builtin_aarch64_set_qregoiv16qi (__o, -- (int8x16_t) temp.val[1], 1); -- result = (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o, -- (int8x8_t)__idx); -- return result; -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); -+#else -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); -+#endif - } - --/* vtrn */ -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1q_p8 (poly8x16_t __a, poly8x16_t __b) -+{ -+#ifdef __AARCH64EB__ -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}); -+#else -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}); -+#endif -+} - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vtrn1_f32 (float32x2_t __a, float32x2_t __b) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1q_p16 (poly16x8_t __a, poly16x8_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+ return __builtin_shuffle (__a, __b, (uint16x8_t) -+ {12, 4, 13, 5, 14, 6, 15, 7}); - #else -- return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); - #endif - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vtrn1_p8 (poly8x8_t __a, poly8x8_t __b) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1q_s8 (int8x16_t __a, int8x16_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}); - #else -- return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}); - #endif - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vtrn1_p16 (poly16x4_t __a, poly16x4_t __b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1q_s16 (int16x8_t __a, int16x8_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3}); -+ return __builtin_shuffle (__a, __b, (uint16x8_t) -+ {12, 4, 13, 5, 14, 6, 15, 7}); - #else -- return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6}); -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); - #endif - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vtrn1_s8 (int8x8_t __a, int8x8_t __b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1q_s32 (int32x4_t __a, int32x4_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3}); - #else -- return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5}); - #endif - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vtrn1_s16 (int16x4_t __a, int16x4_t __b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1q_s64 (int64x2_t __a, int64x2_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3}); -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); - #else -- return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6}); -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); - #endif - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vtrn1_s32 (int32x2_t __a, int32x2_t __b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1q_u8 (uint8x16_t __a, uint8x16_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}); - #else -- return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}); - #endif - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vtrn1_u8 (uint8x8_t __a, uint8x8_t __b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1q_u16 (uint16x8_t __a, uint16x8_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); -+ return __builtin_shuffle (__a, __b, (uint16x8_t) -+ {12, 4, 13, 5, 14, 6, 15, 7}); - #else -- return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); - #endif - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vtrn1_u16 (uint16x4_t __a, uint16x4_t __b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1q_u32 (uint32x4_t __a, uint32x4_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3}); -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3}); - #else -- return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6}); -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5}); - #endif - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vtrn1_u32 (uint32x2_t __a, uint32x2_t __b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip1q_u64 (uint64x2_t __a, uint64x2_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); - #else -- return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); - #endif - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vtrn1q_f32 (float32x4_t __a, float32x4_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2_f16 (float16x4_t __a, float16x4_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3}); -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1}); - #else -- return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6}); -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7}); - #endif - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vtrn1q_f64 (float64x2_t __a, float64x2_t __b) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2_f32 (float32x2_t __a, float32x2_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); - #else -- return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); - #endif - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vtrn1q_p8 (poly8x16_t __a, poly8x16_t __b) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2_p8 (poly8x8_t __a, poly8x8_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, -- (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15}); -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); - #else -- return __builtin_shuffle (__a, __b, -- (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}); -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15}); - #endif - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vtrn1q_p16 (poly16x8_t __a, poly16x8_t __b) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2_p16 (poly16x4_t __a, poly16x4_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1}); - #else -- return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7}); - #endif - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vtrn1q_s8 (int8x16_t __a, int8x16_t __b) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2_s8 (int8x8_t __a, int8x8_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, -- (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15}); -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); - #else -- return __builtin_shuffle (__a, __b, -- (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}); -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15}); - #endif - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vtrn1q_s16 (int16x8_t __a, int16x8_t __b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2_s16 (int16x4_t __a, int16x4_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1}); - #else -- return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7}); - #endif - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vtrn1q_s32 (int32x4_t __a, int32x4_t __b) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2_s32 (int32x2_t __a, int32x2_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3}); -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); - #else -- return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6}); -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); - #endif - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vtrn1q_s64 (int64x2_t __a, int64x2_t __b) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2_u8 (uint8x8_t __a, uint8x8_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); - #else -- return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); -+ return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15}); - #endif - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vtrn1q_u8 (uint8x16_t __a, uint8x16_t __b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2_u16 (uint16x4_t __a, uint16x4_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, -- (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15}); -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1}); - #else -- return __builtin_shuffle (__a, __b, -- (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}); -+ return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7}); - #endif - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vtrn1q_u16 (uint16x8_t __a, uint16x8_t __b) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2_u32 (uint32x2_t __a, uint32x2_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); - #else -- return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); -+ return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); - #endif - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2q_f16 (float16x8_t __a, float16x8_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3}); -+ return __builtin_shuffle (__a, __b, -+ (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); - #else -- return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6}); -+ return __builtin_shuffle (__a, __b, -+ (uint16x8_t) {4, 12, 5, 13, 6, 14, 7, 15}); - #endif - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2q_f32 (float32x4_t __a, float32x4_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1}); - #else -- return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7}); - #endif - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vtrn2_f32 (float32x2_t __a, float32x2_t __b) -+__extension__ extern __inline float64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2q_f64 (float64x2_t __a, float64x2_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); - #else -- return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); - #endif - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vtrn2_p8 (poly8x8_t __a, poly8x8_t __b) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2q_p8 (poly8x16_t __a, poly8x16_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}); - #else -- return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); - #endif - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vtrn2_p16 (poly16x4_t __a, poly16x4_t __b) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2q_p16 (poly16x8_t __a, poly16x8_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2}); -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); - #else -- return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7}); -+ return __builtin_shuffle (__a, __b, (uint16x8_t) -+ {4, 12, 5, 13, 6, 14, 7, 15}); - #endif - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vtrn2_s8 (int8x8_t __a, int8x8_t __b) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2q_s8 (int8x16_t __a, int8x16_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}); - #else -- return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); - #endif - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vtrn2_s16 (int16x4_t __a, int16x4_t __b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2q_s16 (int16x8_t __a, int16x8_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2}); -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); - #else -- return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7}); -+ return __builtin_shuffle (__a, __b, (uint16x8_t) -+ {4, 12, 5, 13, 6, 14, 7, 15}); - #endif - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vtrn2_s32 (int32x2_t __a, int32x2_t __b) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2q_s32 (int32x4_t __a, int32x4_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1}); - #else -- return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7}); - #endif - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vtrn2_u8 (uint8x8_t __a, uint8x8_t __b) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2q_s64 (int64x2_t __a, int64x2_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); - #else -- return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); -+ return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); - #endif - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vtrn2_u16 (uint16x4_t __a, uint16x4_t __b) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2q_u8 (uint8x16_t __a, uint8x16_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2}); -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}); - #else -- return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7}); -+ return __builtin_shuffle (__a, __b, (uint8x16_t) -+ {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); - #endif - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vtrn2_u32 (uint32x2_t __a, uint32x2_t __b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2q_u16 (uint16x8_t __a, uint16x8_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); -+ return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); - #else -- return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); -+ return __builtin_shuffle (__a, __b, (uint16x8_t) -+ {4, 12, 5, 13, 6, 14, 7, 15}); - #endif - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vtrn2q_f32 (float32x4_t __a, float32x4_t __b) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2q_u32 (uint32x4_t __a, uint32x4_t __b) - { - #ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2}); -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1}); - #else -- return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7}); -+ return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7}); - #endif - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vtrn2q_f64 (float64x2_t __a, float64x2_t __b) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip2q_u64 (uint64x2_t __a, uint64x2_t __b) - { - #ifdef __AARCH64EB__ - return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); -@@ -24455,1319 +30368,1184 @@ vtrn2q_f64 (float64x2_t __a, float64x2_t __b) - #endif - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vtrn2q_p8 (poly8x16_t __a, poly8x16_t __b) -+__INTERLEAVE_LIST (zip) -+ -+#undef __INTERLEAVE_LIST -+#undef __DEFINTERLEAVE -+ -+/* End of optimal implementations in approved order. */ -+ -+#pragma GCC pop_options -+ -+/* ARMv8.2-A FP16 intrinsics. */ -+ -+#include "arm_fp16.h" -+ -+#pragma GCC push_options -+#pragma GCC target ("arch=armv8.2-a+fp16") -+ -+/* ARMv8.2-A FP16 one operand vector intrinsics. */ -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabs_f16 (float16x4_t __a) -+{ -+ return __builtin_aarch64_absv4hf (__a); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabsq_f16 (float16x8_t __a) -+{ -+ return __builtin_aarch64_absv8hf (__a); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqz_f16 (float16x4_t __a) -+{ -+ return __builtin_aarch64_cmeqv4hf_uss (__a, vdup_n_f16 (0.0f)); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqzq_f16 (float16x8_t __a) -+{ -+ return __builtin_aarch64_cmeqv8hf_uss (__a, vdupq_n_f16 (0.0f)); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgez_f16 (float16x4_t __a) -+{ -+ return __builtin_aarch64_cmgev4hf_uss (__a, vdup_n_f16 (0.0f)); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgezq_f16 (float16x8_t __a) -+{ -+ return __builtin_aarch64_cmgev8hf_uss (__a, vdupq_n_f16 (0.0f)); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtz_f16 (float16x4_t __a) -+{ -+ return __builtin_aarch64_cmgtv4hf_uss (__a, vdup_n_f16 (0.0f)); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtzq_f16 (float16x8_t __a) -+{ -+ return __builtin_aarch64_cmgtv8hf_uss (__a, vdupq_n_f16 (0.0f)); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclez_f16 (float16x4_t __a) -+{ -+ return __builtin_aarch64_cmlev4hf_uss (__a, vdup_n_f16 (0.0f)); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclezq_f16 (float16x8_t __a) -+{ -+ return __builtin_aarch64_cmlev8hf_uss (__a, vdupq_n_f16 (0.0f)); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltz_f16 (float16x4_t __a) -+{ -+ return __builtin_aarch64_cmltv4hf_uss (__a, vdup_n_f16 (0.0f)); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltzq_f16 (float16x8_t __a) -+{ -+ return __builtin_aarch64_cmltv8hf_uss (__a, vdupq_n_f16 (0.0f)); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_f16_s16 (int16x4_t __a) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, -- (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14}); --#else -- return __builtin_shuffle (__a, __b, -- (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}); --#endif -+ return __builtin_aarch64_floatv4hiv4hf (__a); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vtrn2q_p16 (poly16x8_t __a, poly16x8_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_f16_s16 (int16x8_t __a) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); --#else -- return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); --#endif -+ return __builtin_aarch64_floatv8hiv8hf (__a); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vtrn2q_s8 (int8x16_t __a, int8x16_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_f16_u16 (uint16x4_t __a) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, -- (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14}); --#else -- return __builtin_shuffle (__a, __b, -- (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}); --#endif -+ return __builtin_aarch64_floatunsv4hiv4hf ((int16x4_t) __a); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vtrn2q_s16 (int16x8_t __a, int16x8_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_f16_u16 (uint16x8_t __a) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); --#else -- return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); --#endif -+ return __builtin_aarch64_floatunsv8hiv8hf ((int16x8_t) __a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vtrn2q_s32 (int32x4_t __a, int32x4_t __b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_s16_f16 (float16x4_t __a) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2}); --#else -- return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7}); --#endif -+ return __builtin_aarch64_lbtruncv4hfv4hi (__a); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vtrn2q_s64 (int64x2_t __a, int64x2_t __b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_s16_f16 (float16x8_t __a) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); --#else -- return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); --#endif -+ return __builtin_aarch64_lbtruncv8hfv8hi (__a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vtrn2q_u8 (uint8x16_t __a, uint8x16_t __b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_u16_f16 (float16x4_t __a) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, -- (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14}); --#else -- return __builtin_shuffle (__a, __b, -- (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}); --#endif -+ return __builtin_aarch64_lbtruncuv4hfv4hi_us (__a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vtrn2q_u16 (uint16x8_t __a, uint16x8_t __b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_u16_f16 (float16x8_t __a) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); --#else -- return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); --#endif -+ return __builtin_aarch64_lbtruncuv8hfv8hi_us (__a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vtrn2q_u32 (uint32x4_t __a, uint32x4_t __b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvta_s16_f16 (float16x4_t __a) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2}); --#else -- return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7}); --#endif -+ return __builtin_aarch64_lroundv4hfv4hi (__a); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtaq_s16_f16 (float16x8_t __a) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); --#else -- return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); --#endif -+ return __builtin_aarch64_lroundv8hfv8hi (__a); - } - --__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) --vtrn_f32 (float32x2_t a, float32x2_t b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvta_u16_f16 (float16x4_t __a) - { -- return (float32x2x2_t) {vtrn1_f32 (a, b), vtrn2_f32 (a, b)}; -+ return __builtin_aarch64_lrounduv4hfv4hi_us (__a); - } - --__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) --vtrn_p8 (poly8x8_t a, poly8x8_t b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtaq_u16_f16 (float16x8_t __a) - { -- return (poly8x8x2_t) {vtrn1_p8 (a, b), vtrn2_p8 (a, b)}; -+ return __builtin_aarch64_lrounduv8hfv8hi_us (__a); - } - --__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) --vtrn_p16 (poly16x4_t a, poly16x4_t b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtm_s16_f16 (float16x4_t __a) - { -- return (poly16x4x2_t) {vtrn1_p16 (a, b), vtrn2_p16 (a, b)}; -+ return __builtin_aarch64_lfloorv4hfv4hi (__a); - } - --__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) --vtrn_s8 (int8x8_t a, int8x8_t b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtmq_s16_f16 (float16x8_t __a) - { -- return (int8x8x2_t) {vtrn1_s8 (a, b), vtrn2_s8 (a, b)}; -+ return __builtin_aarch64_lfloorv8hfv8hi (__a); - } - --__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) --vtrn_s16 (int16x4_t a, int16x4_t b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtm_u16_f16 (float16x4_t __a) - { -- return (int16x4x2_t) {vtrn1_s16 (a, b), vtrn2_s16 (a, b)}; -+ return __builtin_aarch64_lflooruv4hfv4hi_us (__a); - } - --__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) --vtrn_s32 (int32x2_t a, int32x2_t b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtmq_u16_f16 (float16x8_t __a) - { -- return (int32x2x2_t) {vtrn1_s32 (a, b), vtrn2_s32 (a, b)}; -+ return __builtin_aarch64_lflooruv8hfv8hi_us (__a); - } - --__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) --vtrn_u8 (uint8x8_t a, uint8x8_t b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtn_s16_f16 (float16x4_t __a) - { -- return (uint8x8x2_t) {vtrn1_u8 (a, b), vtrn2_u8 (a, b)}; -+ return __builtin_aarch64_lfrintnv4hfv4hi (__a); - } - --__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) --vtrn_u16 (uint16x4_t a, uint16x4_t b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtnq_s16_f16 (float16x8_t __a) - { -- return (uint16x4x2_t) {vtrn1_u16 (a, b), vtrn2_u16 (a, b)}; -+ return __builtin_aarch64_lfrintnv8hfv8hi (__a); - } - --__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) --vtrn_u32 (uint32x2_t a, uint32x2_t b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtn_u16_f16 (float16x4_t __a) - { -- return (uint32x2x2_t) {vtrn1_u32 (a, b), vtrn2_u32 (a, b)}; -+ return __builtin_aarch64_lfrintnuv4hfv4hi_us (__a); - } - --__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) --vtrnq_f32 (float32x4_t a, float32x4_t b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtnq_u16_f16 (float16x8_t __a) - { -- return (float32x4x2_t) {vtrn1q_f32 (a, b), vtrn2q_f32 (a, b)}; -+ return __builtin_aarch64_lfrintnuv8hfv8hi_us (__a); - } - --__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) --vtrnq_p8 (poly8x16_t a, poly8x16_t b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtp_s16_f16 (float16x4_t __a) - { -- return (poly8x16x2_t) {vtrn1q_p8 (a, b), vtrn2q_p8 (a, b)}; -+ return __builtin_aarch64_lceilv4hfv4hi (__a); - } - --__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) --vtrnq_p16 (poly16x8_t a, poly16x8_t b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtpq_s16_f16 (float16x8_t __a) - { -- return (poly16x8x2_t) {vtrn1q_p16 (a, b), vtrn2q_p16 (a, b)}; -+ return __builtin_aarch64_lceilv8hfv8hi (__a); - } - --__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) --vtrnq_s8 (int8x16_t a, int8x16_t b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtp_u16_f16 (float16x4_t __a) - { -- return (int8x16x2_t) {vtrn1q_s8 (a, b), vtrn2q_s8 (a, b)}; -+ return __builtin_aarch64_lceiluv4hfv4hi_us (__a); - } - --__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) --vtrnq_s16 (int16x8_t a, int16x8_t b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtpq_u16_f16 (float16x8_t __a) - { -- return (int16x8x2_t) {vtrn1q_s16 (a, b), vtrn2q_s16 (a, b)}; -+ return __builtin_aarch64_lceiluv8hfv8hi_us (__a); - } - --__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) --vtrnq_s32 (int32x4_t a, int32x4_t b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vneg_f16 (float16x4_t __a) - { -- return (int32x4x2_t) {vtrn1q_s32 (a, b), vtrn2q_s32 (a, b)}; -+ return -__a; - } - --__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) --vtrnq_u8 (uint8x16_t a, uint8x16_t b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vnegq_f16 (float16x8_t __a) - { -- return (uint8x16x2_t) {vtrn1q_u8 (a, b), vtrn2q_u8 (a, b)}; -+ return -__a; - } - --__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) --vtrnq_u16 (uint16x8_t a, uint16x8_t b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecpe_f16 (float16x4_t __a) - { -- return (uint16x8x2_t) {vtrn1q_u16 (a, b), vtrn2q_u16 (a, b)}; -+ return __builtin_aarch64_frecpev4hf (__a); - } - --__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) --vtrnq_u32 (uint32x4_t a, uint32x4_t b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecpeq_f16 (float16x8_t __a) - { -- return (uint32x4x2_t) {vtrn1q_u32 (a, b), vtrn2q_u32 (a, b)}; -+ return __builtin_aarch64_frecpev8hf (__a); - } - --/* vtst */ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnd_f16 (float16x4_t __a) -+{ -+ return __builtin_aarch64_btruncv4hf (__a); -+} - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vtst_s8 (int8x8_t __a, int8x8_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndq_f16 (float16x8_t __a) - { -- return (uint8x8_t) ((__a & __b) != 0); -+ return __builtin_aarch64_btruncv8hf (__a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vtst_s16 (int16x4_t __a, int16x4_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnda_f16 (float16x4_t __a) - { -- return (uint16x4_t) ((__a & __b) != 0); -+ return __builtin_aarch64_roundv4hf (__a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vtst_s32 (int32x2_t __a, int32x2_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndaq_f16 (float16x8_t __a) - { -- return (uint32x2_t) ((__a & __b) != 0); -+ return __builtin_aarch64_roundv8hf (__a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vtst_s64 (int64x1_t __a, int64x1_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndi_f16 (float16x4_t __a) - { -- return (uint64x1_t) ((__a & __b) != __AARCH64_INT64_C (0)); -+ return __builtin_aarch64_nearbyintv4hf (__a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vtst_u8 (uint8x8_t __a, uint8x8_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndiq_f16 (float16x8_t __a) - { -- return ((__a & __b) != 0); -+ return __builtin_aarch64_nearbyintv8hf (__a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vtst_u16 (uint16x4_t __a, uint16x4_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndm_f16 (float16x4_t __a) - { -- return ((__a & __b) != 0); -+ return __builtin_aarch64_floorv4hf (__a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vtst_u32 (uint32x2_t __a, uint32x2_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndmq_f16 (float16x8_t __a) - { -- return ((__a & __b) != 0); -+ return __builtin_aarch64_floorv8hf (__a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vtst_u64 (uint64x1_t __a, uint64x1_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndn_f16 (float16x4_t __a) - { -- return ((__a & __b) != __AARCH64_UINT64_C (0)); -+ return __builtin_aarch64_frintnv4hf (__a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vtstq_s8 (int8x16_t __a, int8x16_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndnq_f16 (float16x8_t __a) - { -- return (uint8x16_t) ((__a & __b) != 0); -+ return __builtin_aarch64_frintnv8hf (__a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vtstq_s16 (int16x8_t __a, int16x8_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndp_f16 (float16x4_t __a) - { -- return (uint16x8_t) ((__a & __b) != 0); -+ return __builtin_aarch64_ceilv4hf (__a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vtstq_s32 (int32x4_t __a, int32x4_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndpq_f16 (float16x8_t __a) - { -- return (uint32x4_t) ((__a & __b) != 0); -+ return __builtin_aarch64_ceilv8hf (__a); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vtstq_s64 (int64x2_t __a, int64x2_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndx_f16 (float16x4_t __a) - { -- return (uint64x2_t) ((__a & __b) != __AARCH64_INT64_C (0)); -+ return __builtin_aarch64_rintv4hf (__a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vtstq_u8 (uint8x16_t __a, uint8x16_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndxq_f16 (float16x8_t __a) - { -- return ((__a & __b) != 0); -+ return __builtin_aarch64_rintv8hf (__a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vtstq_u16 (uint16x8_t __a, uint16x8_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsqrte_f16 (float16x4_t a) - { -- return ((__a & __b) != 0); -+ return __builtin_aarch64_rsqrtev4hf (a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vtstq_u32 (uint32x4_t __a, uint32x4_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsqrteq_f16 (float16x8_t a) - { -- return ((__a & __b) != 0); -+ return __builtin_aarch64_rsqrtev8hf (a); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vtstq_u64 (uint64x2_t __a, uint64x2_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsqrt_f16 (float16x4_t a) - { -- return ((__a & __b) != __AARCH64_UINT64_C (0)); -+ return __builtin_aarch64_sqrtv4hf (a); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vtstd_s64 (int64_t __a, int64_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsqrtq_f16 (float16x8_t a) - { -- return (__a & __b) ? -1ll : 0ll; -+ return __builtin_aarch64_sqrtv8hf (a); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) --vtstd_u64 (uint64_t __a, uint64_t __b) -+/* ARMv8.2-A FP16 two operands vector intrinsics. */ -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vadd_f16 (float16x4_t __a, float16x4_t __b) - { -- return (__a & __b) ? -1ll : 0ll; -+ return __a + __b; - } - --/* vuqadd */ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ return __a + __b; -+} - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vuqadd_s8 (int8x8_t __a, uint8x8_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabd_f16 (float16x4_t a, float16x4_t b) - { -- return __builtin_aarch64_suqaddv8qi_ssu (__a, __b); -+ return __builtin_aarch64_fabdv4hf (a, b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vuqadd_s16 (int16x4_t __a, uint16x4_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabdq_f16 (float16x8_t a, float16x8_t b) - { -- return __builtin_aarch64_suqaddv4hi_ssu (__a, __b); -+ return __builtin_aarch64_fabdv8hf (a, b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vuqadd_s32 (int32x2_t __a, uint32x2_t __b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcage_f16 (float16x4_t __a, float16x4_t __b) - { -- return __builtin_aarch64_suqaddv2si_ssu (__a, __b); -+ return __builtin_aarch64_facgev4hf_uss (__a, __b); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) --vuqadd_s64 (int64x1_t __a, uint64x1_t __b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcageq_f16 (float16x8_t __a, float16x8_t __b) - { -- return (int64x1_t) {__builtin_aarch64_suqadddi_ssu (__a[0], __b[0])}; -+ return __builtin_aarch64_facgev8hf_uss (__a, __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vuqaddq_s8 (int8x16_t __a, uint8x16_t __b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcagt_f16 (float16x4_t __a, float16x4_t __b) - { -- return __builtin_aarch64_suqaddv16qi_ssu (__a, __b); -+ return __builtin_aarch64_facgtv4hf_uss (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vuqaddq_s16 (int16x8_t __a, uint16x8_t __b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcagtq_f16 (float16x8_t __a, float16x8_t __b) - { -- return __builtin_aarch64_suqaddv8hi_ssu (__a, __b); -+ return __builtin_aarch64_facgtv8hf_uss (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vuqaddq_s32 (int32x4_t __a, uint32x4_t __b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcale_f16 (float16x4_t __a, float16x4_t __b) - { -- return __builtin_aarch64_suqaddv4si_ssu (__a, __b); -+ return __builtin_aarch64_faclev4hf_uss (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vuqaddq_s64 (int64x2_t __a, uint64x2_t __b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcaleq_f16 (float16x8_t __a, float16x8_t __b) - { -- return __builtin_aarch64_suqaddv2di_ssu (__a, __b); -+ return __builtin_aarch64_faclev8hf_uss (__a, __b); - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) --vuqaddb_s8 (int8_t __a, uint8_t __b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcalt_f16 (float16x4_t __a, float16x4_t __b) - { -- return __builtin_aarch64_suqaddqi_ssu (__a, __b); -+ return __builtin_aarch64_facltv4hf_uss (__a, __b); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) --vuqaddh_s16 (int16_t __a, uint16_t __b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcaltq_f16 (float16x8_t __a, float16x8_t __b) - { -- return __builtin_aarch64_suqaddhi_ssu (__a, __b); -+ return __builtin_aarch64_facltv8hf_uss (__a, __b); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) --vuqadds_s32 (int32_t __a, uint32_t __b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceq_f16 (float16x4_t __a, float16x4_t __b) - { -- return __builtin_aarch64_suqaddsi_ssu (__a, __b); -+ return __builtin_aarch64_cmeqv4hf_uss (__a, __b); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) --vuqaddd_s64 (int64_t __a, uint64_t __b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqq_f16 (float16x8_t __a, float16x8_t __b) - { -- return __builtin_aarch64_suqadddi_ssu (__a, __b); -+ return __builtin_aarch64_cmeqv8hf_uss (__a, __b); - } - --#define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \ -- __extension__ static __inline rettype \ -- __attribute__ ((__always_inline__)) \ -- v ## op ## Q ## _ ## funcsuffix (intype a, intype b) \ -- { \ -- return (rettype) {v ## op ## 1 ## Q ## _ ## funcsuffix (a, b), \ -- v ## op ## 2 ## Q ## _ ## funcsuffix (a, b)}; \ -- } -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcge_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return __builtin_aarch64_cmgev4hf_uss (__a, __b); -+} - --#define __INTERLEAVE_LIST(op) \ -- __DEFINTERLEAVE (op, float32x2x2_t, float32x2_t, f32,) \ -- __DEFINTERLEAVE (op, poly8x8x2_t, poly8x8_t, p8,) \ -- __DEFINTERLEAVE (op, poly16x4x2_t, poly16x4_t, p16,) \ -- __DEFINTERLEAVE (op, int8x8x2_t, int8x8_t, s8,) \ -- __DEFINTERLEAVE (op, int16x4x2_t, int16x4_t, s16,) \ -- __DEFINTERLEAVE (op, int32x2x2_t, int32x2_t, s32,) \ -- __DEFINTERLEAVE (op, uint8x8x2_t, uint8x8_t, u8,) \ -- __DEFINTERLEAVE (op, uint16x4x2_t, uint16x4_t, u16,) \ -- __DEFINTERLEAVE (op, uint32x2x2_t, uint32x2_t, u32,) \ -- __DEFINTERLEAVE (op, float32x4x2_t, float32x4_t, f32, q) \ -- __DEFINTERLEAVE (op, poly8x16x2_t, poly8x16_t, p8, q) \ -- __DEFINTERLEAVE (op, poly16x8x2_t, poly16x8_t, p16, q) \ -- __DEFINTERLEAVE (op, int8x16x2_t, int8x16_t, s8, q) \ -- __DEFINTERLEAVE (op, int16x8x2_t, int16x8_t, s16, q) \ -- __DEFINTERLEAVE (op, int32x4x2_t, int32x4_t, s32, q) \ -- __DEFINTERLEAVE (op, uint8x16x2_t, uint8x16_t, u8, q) \ -- __DEFINTERLEAVE (op, uint16x8x2_t, uint16x8_t, u16, q) \ -- __DEFINTERLEAVE (op, uint32x4x2_t, uint32x4_t, u32, q) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgeq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ return __builtin_aarch64_cmgev8hf_uss (__a, __b); -+} - --/* vuzp */ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgt_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return __builtin_aarch64_cmgtv4hf_uss (__a, __b); -+} - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vuzp1_f32 (float32x2_t __a, float32x2_t __b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtq_f16 (float16x8_t __a, float16x8_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); --#else -- return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); --#endif -+ return __builtin_aarch64_cmgtv8hf_uss (__a, __b); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vuzp1_p8 (poly8x8_t __a, poly8x8_t __b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcle_f16 (float16x4_t __a, float16x4_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); --#else -- return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); --#endif -+ return __builtin_aarch64_cmlev4hf_uss (__a, __b); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vuzp1_p16 (poly16x4_t __a, poly16x4_t __b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcleq_f16 (float16x8_t __a, float16x8_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3}); --#else -- return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6}); --#endif -+ return __builtin_aarch64_cmlev8hf_uss (__a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vuzp1_s8 (int8x8_t __a, int8x8_t __b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclt_f16 (float16x4_t __a, float16x4_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); --#else -- return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); --#endif -+ return __builtin_aarch64_cmltv4hf_uss (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vuzp1_s16 (int16x4_t __a, int16x4_t __b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltq_f16 (float16x8_t __a, float16x8_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3}); --#else -- return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6}); --#endif -+ return __builtin_aarch64_cmltv8hf_uss (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vuzp1_s32 (int32x2_t __a, int32x2_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_n_f16_s16 (int16x4_t __a, const int __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); --#else -- return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); --#endif -+ return __builtin_aarch64_scvtfv4hi (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vuzp1_u8 (uint8x8_t __a, uint8x8_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_n_f16_s16 (int16x8_t __a, const int __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); --#else -- return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); --#endif -+ return __builtin_aarch64_scvtfv8hi (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vuzp1_u16 (uint16x4_t __a, uint16x4_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_n_f16_u16 (uint16x4_t __a, const int __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3}); --#else -- return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6}); --#endif -+ return __builtin_aarch64_ucvtfv4hi_sus (__a, __b); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_n_f16_u16 (uint16x8_t __a, const int __b) -+{ -+ return __builtin_aarch64_ucvtfv8hi_sus (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vuzp1_u32 (uint32x2_t __a, uint32x2_t __b) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_n_s16_f16 (float16x4_t __a, const int __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); --#else -- return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); --#endif -+ return __builtin_aarch64_fcvtzsv4hf (__a, __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vuzp1q_f32 (float32x4_t __a, float32x4_t __b) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_n_s16_f16 (float16x8_t __a, const int __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3}); --#else -- return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6}); --#endif -+ return __builtin_aarch64_fcvtzsv8hf (__a, __b); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vuzp1q_f64 (float64x2_t __a, float64x2_t __b) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_n_u16_f16 (float16x4_t __a, const int __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); --#else -- return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); --#endif -+ return __builtin_aarch64_fcvtzuv4hf_uss (__a, __b); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_n_u16_f16 (float16x8_t __a, const int __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x16_t) -- {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15}); --#else -- return __builtin_shuffle (__a, __b, (uint8x16_t) -- {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}); --#endif -+ return __builtin_aarch64_fcvtzuv8hf_uss (__a, __b); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdiv_f16 (float16x4_t __a, float16x4_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); --#else -- return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); --#endif -+ return __a / __b; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vuzp1q_s8 (int8x16_t __a, int8x16_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdivq_f16 (float16x8_t __a, float16x8_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, -- (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15}); --#else -- return __builtin_shuffle (__a, __b, -- (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}); --#endif -+ return __a / __b; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vuzp1q_s16 (int16x8_t __a, int16x8_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmax_f16 (float16x4_t __a, float16x4_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); --#else -- return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); --#endif -+ return __builtin_aarch64_smax_nanv4hf (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vuzp1q_s32 (int32x4_t __a, int32x4_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxq_f16 (float16x8_t __a, float16x8_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3}); --#else -- return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6}); --#endif -+ return __builtin_aarch64_smax_nanv8hf (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vuzp1q_s64 (int64x2_t __a, int64x2_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxnm_f16 (float16x4_t __a, float16x4_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); --#else -- return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); --#endif -+ return __builtin_aarch64_fmaxv4hf (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxnmq_f16 (float16x8_t __a, float16x8_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, -- (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15}); --#else -- return __builtin_shuffle (__a, __b, -- (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}); --#endif -+ return __builtin_aarch64_fmaxv8hf (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmin_f16 (float16x4_t __a, float16x4_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); --#else -- return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); --#endif -+ return __builtin_aarch64_smin_nanv4hf (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminq_f16 (float16x8_t __a, float16x8_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3}); --#else -- return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6}); --#endif -+ return __builtin_aarch64_smin_nanv8hf (__a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminnm_f16 (float16x4_t __a, float16x4_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); --#else -- return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); --#endif -+ return __builtin_aarch64_fminv4hf (__a, __b); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vuzp2_f32 (float32x2_t __a, float32x2_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminnmq_f16 (float16x8_t __a, float16x8_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); --#else -- return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); --#endif -+ return __builtin_aarch64_fminv8hf (__a, __b); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vuzp2_p8 (poly8x8_t __a, poly8x8_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_f16 (float16x4_t __a, float16x4_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); --#else -- return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); --#endif -+ return __a * __b; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vuzp2_p16 (poly16x4_t __a, poly16x4_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_f16 (float16x8_t __a, float16x8_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2}); --#else -- return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7}); --#endif -+ return __a * __b; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vuzp2_s8 (int8x8_t __a, int8x8_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulx_f16 (float16x4_t __a, float16x4_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); --#else -- return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); --#endif -+ return __builtin_aarch64_fmulxv4hf (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vuzp2_s16 (int16x4_t __a, int16x4_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulxq_f16 (float16x8_t __a, float16x8_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2}); --#else -- return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7}); --#endif -+ return __builtin_aarch64_fmulxv8hf (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vuzp2_s32 (int32x2_t __a, int32x2_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpadd_f16 (float16x4_t a, float16x4_t b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); --#else -- return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); --#endif -+ return __builtin_aarch64_faddpv4hf (a, b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vuzp2_u8 (uint8x8_t __a, uint8x8_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpaddq_f16 (float16x8_t a, float16x8_t b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); --#else -- return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); --#endif -+ return __builtin_aarch64_faddpv8hf (a, b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vuzp2_u16 (uint16x4_t __a, uint16x4_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmax_f16 (float16x4_t a, float16x4_t b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2}); --#else -- return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7}); --#endif -+ return __builtin_aarch64_smax_nanpv4hf (a, b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vuzp2_u32 (uint32x2_t __a, uint32x2_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmaxq_f16 (float16x8_t a, float16x8_t b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); --#else -- return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); --#endif -+ return __builtin_aarch64_smax_nanpv8hf (a, b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vuzp2q_f32 (float32x4_t __a, float32x4_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmaxnm_f16 (float16x4_t a, float16x4_t b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2}); --#else -- return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7}); --#endif -+ return __builtin_aarch64_smaxpv4hf (a, b); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vuzp2q_f64 (float64x2_t __a, float64x2_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmaxnmq_f16 (float16x8_t a, float16x8_t b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); --#else -- return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); --#endif -+ return __builtin_aarch64_smaxpv8hf (a, b); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmin_f16 (float16x4_t a, float16x4_t b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, -- (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14}); --#else -- return __builtin_shuffle (__a, __b, -- (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}); --#endif -+ return __builtin_aarch64_smin_nanpv4hf (a, b); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpminq_f16 (float16x8_t a, float16x8_t b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); --#else -- return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); --#endif -+ return __builtin_aarch64_smin_nanpv8hf (a, b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vuzp2q_s8 (int8x16_t __a, int8x16_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpminnm_f16 (float16x4_t a, float16x4_t b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, -- (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14}); --#else -- return __builtin_shuffle (__a, __b, -- (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}); --#endif -+ return __builtin_aarch64_sminpv4hf (a, b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vuzp2q_s16 (int16x8_t __a, int16x8_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpminnmq_f16 (float16x8_t a, float16x8_t b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); --#else -- return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); --#endif -+ return __builtin_aarch64_sminpv8hf (a, b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vuzp2q_s32 (int32x4_t __a, int32x4_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecps_f16 (float16x4_t __a, float16x4_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2}); --#else -- return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7}); --#endif -+ return __builtin_aarch64_frecpsv4hf (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vuzp2q_s64 (int64x2_t __a, int64x2_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecpsq_f16 (float16x8_t __a, float16x8_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); --#else -- return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); --#endif -+ return __builtin_aarch64_frecpsv8hf (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsqrts_f16 (float16x4_t a, float16x4_t b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x16_t) -- {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14}); --#else -- return __builtin_shuffle (__a, __b, (uint8x16_t) -- {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}); --#endif -+ return __builtin_aarch64_rsqrtsv4hf (a, b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsqrtsq_f16 (float16x8_t a, float16x8_t b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); --#else -- return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); --#endif -+ return __builtin_aarch64_rsqrtsv8hf (a, b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsub_f16 (float16x4_t __a, float16x4_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2}); --#else -- return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7}); --#endif -+ return __a - __b; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsubq_f16 (float16x8_t __a, float16x8_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); --#else -- return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); --#endif -+ return __a - __b; - } - --__INTERLEAVE_LIST (uzp) -- --/* vzip */ -+/* ARMv8.2-A FP16 three operands vector intrinsics. */ - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vzip1_f32 (float32x2_t __a, float32x2_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfma_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); --#else -- return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); --#endif -+ return __builtin_aarch64_fmav4hf (__b, __c, __a); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vzip1_p8 (poly8x8_t __a, poly8x8_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmaq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7}); --#else -- return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); --#endif -+ return __builtin_aarch64_fmav8hf (__b, __c, __a); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vzip1_p16 (poly16x4_t __a, poly16x4_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfms_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3}); --#else -- return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5}); --#endif -+ return __builtin_aarch64_fnmav4hf (__b, __c, __a); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vzip1_s8 (int8x8_t __a, int8x8_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmsq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7}); --#else -- return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); --#endif -+ return __builtin_aarch64_fnmav8hf (__b, __c, __a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vzip1_s16 (int16x4_t __a, int16x4_t __b) --{ --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3}); --#else -- return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5}); --#endif -+/* ARMv8.2-A FP16 lane vector intrinsics. */ -+ -+__extension__ extern __inline float16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmah_lane_f16 (float16_t __a, float16_t __b, -+ float16x4_t __c, const int __lane) -+{ -+ return vfmah_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane)); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vzip1_s32 (int32x2_t __a, int32x2_t __b) -+__extension__ extern __inline float16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmah_laneq_f16 (float16_t __a, float16_t __b, -+ float16x8_t __c, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); --#else -- return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); --#endif -+ return vfmah_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane)); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vzip1_u8 (uint8x8_t __a, uint8x8_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfma_lane_f16 (float16x4_t __a, float16x4_t __b, -+ float16x4_t __c, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7}); --#else -- return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); --#endif -+ return vfma_f16 (__a, __b, __aarch64_vdup_lane_f16 (__c, __lane)); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vzip1_u16 (uint16x4_t __a, uint16x4_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmaq_lane_f16 (float16x8_t __a, float16x8_t __b, -+ float16x4_t __c, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3}); --#else -- return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5}); --#endif -+ return vfmaq_f16 (__a, __b, __aarch64_vdupq_lane_f16 (__c, __lane)); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vzip1_u32 (uint32x2_t __a, uint32x2_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfma_laneq_f16 (float16x4_t __a, float16x4_t __b, -+ float16x8_t __c, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); --#else -- return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); --#endif -+ return vfma_f16 (__a, __b, __aarch64_vdup_laneq_f16 (__c, __lane)); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vzip1q_f32 (float32x4_t __a, float32x4_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmaq_laneq_f16 (float16x8_t __a, float16x8_t __b, -+ float16x8_t __c, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3}); --#else -- return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5}); --#endif -+ return vfmaq_f16 (__a, __b, __aarch64_vdupq_laneq_f16 (__c, __lane)); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vzip1q_f64 (float64x2_t __a, float64x2_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfma_n_f16 (float16x4_t __a, float16x4_t __b, float16_t __c) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); --#else -- return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); --#endif -+ return vfma_f16 (__a, __b, vdup_n_f16 (__c)); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vzip1q_p8 (poly8x16_t __a, poly8x16_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmaq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x16_t) -- {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}); --#else -- return __builtin_shuffle (__a, __b, (uint8x16_t) -- {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}); --#endif -+ return vfmaq_f16 (__a, __b, vdupq_n_f16 (__c)); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vzip1q_p16 (poly16x8_t __a, poly16x8_t __b) -+__extension__ extern __inline float16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmsh_lane_f16 (float16_t __a, float16_t __b, -+ float16x4_t __c, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x8_t) -- {12, 4, 13, 5, 14, 6, 15, 7}); --#else -- return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); --#endif -+ return vfmsh_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane)); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vzip1q_s8 (int8x16_t __a, int8x16_t __b) -+__extension__ extern __inline float16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmsh_laneq_f16 (float16_t __a, float16_t __b, -+ float16x8_t __c, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x16_t) -- {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}); --#else -- return __builtin_shuffle (__a, __b, (uint8x16_t) -- {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}); --#endif -+ return vfmsh_f16 (__a, __b, __aarch64_vget_lane_any (__c, __lane)); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vzip1q_s16 (int16x8_t __a, int16x8_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfms_lane_f16 (float16x4_t __a, float16x4_t __b, -+ float16x4_t __c, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x8_t) -- {12, 4, 13, 5, 14, 6, 15, 7}); --#else -- return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); --#endif -+ return vfms_f16 (__a, __b, __aarch64_vdup_lane_f16 (__c, __lane)); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vzip1q_s32 (int32x4_t __a, int32x4_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmsq_lane_f16 (float16x8_t __a, float16x8_t __b, -+ float16x4_t __c, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3}); --#else -- return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5}); --#endif -+ return vfmsq_f16 (__a, __b, __aarch64_vdupq_lane_f16 (__c, __lane)); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vzip1q_s64 (int64x2_t __a, int64x2_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfms_laneq_f16 (float16x4_t __a, float16x4_t __b, -+ float16x8_t __c, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); --#else -- return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); --#endif -+ return vfms_f16 (__a, __b, __aarch64_vdup_laneq_f16 (__c, __lane)); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vzip1q_u8 (uint8x16_t __a, uint8x16_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmsq_laneq_f16 (float16x8_t __a, float16x8_t __b, -+ float16x8_t __c, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x16_t) -- {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}); --#else -- return __builtin_shuffle (__a, __b, (uint8x16_t) -- {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}); --#endif -+ return vfmsq_f16 (__a, __b, __aarch64_vdupq_laneq_f16 (__c, __lane)); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vzip1q_u16 (uint16x8_t __a, uint16x8_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfms_n_f16 (float16x4_t __a, float16x4_t __b, float16_t __c) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x8_t) -- {12, 4, 13, 5, 14, 6, 15, 7}); --#else -- return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); --#endif -+ return vfms_f16 (__a, __b, vdup_n_f16 (__c)); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vzip1q_u32 (uint32x4_t __a, uint32x4_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmsq_n_f16 (float16x8_t __a, float16x8_t __b, float16_t __c) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3}); --#else -- return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5}); --#endif -+ return vfmsq_f16 (__a, __b, vdupq_n_f16 (__c)); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vzip1q_u64 (uint64x2_t __a, uint64x2_t __b) -+__extension__ extern __inline float16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulh_lane_f16 (float16_t __a, float16x4_t __b, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); --#else -- return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); --#endif -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) --vzip2_f32 (float32x2_t __a, float32x2_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); --#else -- return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); --#endif -+ return vmul_f16 (__a, vdup_n_f16 (__aarch64_vget_lane_any (__b, __lane))); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) --vzip2_p8 (poly8x8_t __a, poly8x8_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); --#else -- return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15}); --#endif -+ return vmulq_f16 (__a, vdupq_n_f16 (__aarch64_vget_lane_any (__b, __lane))); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) --vzip2_p16 (poly16x4_t __a, poly16x4_t __b) -+__extension__ extern __inline float16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulh_laneq_f16 (float16_t __a, float16x8_t __b, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1}); --#else -- return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7}); --#endif -+ return __a * __aarch64_vget_lane_any (__b, __lane); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) --vzip2_s8 (int8x8_t __a, int8x8_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); --#else -- return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15}); --#endif -+ return vmul_f16 (__a, vdup_n_f16 (__aarch64_vget_lane_any (__b, __lane))); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) --vzip2_s16 (int16x4_t __a, int16x4_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_laneq_f16 (float16x8_t __a, float16x8_t __b, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1}); --#else -- return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7}); --#endif -+ return vmulq_f16 (__a, vdupq_n_f16 (__aarch64_vget_lane_any (__b, __lane))); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) --vzip2_s32 (int32x2_t __a, int32x2_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_n_f16 (float16x4_t __a, float16_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); --#else -- return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); --#endif -+ return vmul_lane_f16 (__a, vdup_n_f16 (__b), 0); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) --vzip2_u8 (uint8x8_t __a, uint8x8_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_n_f16 (float16x8_t __a, float16_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); --#else -- return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15}); --#endif -+ return vmulq_laneq_f16 (__a, vdupq_n_f16 (__b), 0); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) --vzip2_u16 (uint16x4_t __a, uint16x4_t __b) -+__extension__ extern __inline float16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulxh_lane_f16 (float16_t __a, float16x4_t __b, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1}); --#else -- return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7}); --#endif -+ return vmulxh_f16 (__a, __aarch64_vget_lane_any (__b, __lane)); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) --vzip2_u32 (uint32x2_t __a, uint32x2_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulx_lane_f16 (float16x4_t __a, float16x4_t __b, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); --#else -- return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); --#endif -+ return vmulx_f16 (__a, __aarch64_vdup_lane_f16 (__b, __lane)); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) --vzip2q_f32 (float32x4_t __a, float32x4_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulxq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1}); --#else -- return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7}); --#endif -+ return vmulxq_f16 (__a, __aarch64_vdupq_lane_f16 (__b, __lane)); - } - --__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) --vzip2q_f64 (float64x2_t __a, float64x2_t __b) -+__extension__ extern __inline float16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulxh_laneq_f16 (float16_t __a, float16x8_t __b, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); --#else -- return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); --#endif -+ return vmulxh_f16 (__a, __aarch64_vget_lane_any (__b, __lane)); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) --vzip2q_p8 (poly8x16_t __a, poly8x16_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulx_laneq_f16 (float16x4_t __a, float16x8_t __b, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x16_t) -- {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}); --#else -- return __builtin_shuffle (__a, __b, (uint8x16_t) -- {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); --#endif -+ return vmulx_f16 (__a, __aarch64_vdup_laneq_f16 (__b, __lane)); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) --vzip2q_p16 (poly16x8_t __a, poly16x8_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulxq_laneq_f16 (float16x8_t __a, float16x8_t __b, const int __lane) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); --#else -- return __builtin_shuffle (__a, __b, (uint16x8_t) -- {4, 12, 5, 13, 6, 14, 7, 15}); --#endif -+ return vmulxq_f16 (__a, __aarch64_vdupq_laneq_f16 (__b, __lane)); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) --vzip2q_s8 (int8x16_t __a, int8x16_t __b) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulx_n_f16 (float16x4_t __a, float16_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x16_t) -- {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}); --#else -- return __builtin_shuffle (__a, __b, (uint8x16_t) -- {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); --#endif -+ return vmulx_f16 (__a, vdup_n_f16 (__b)); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) --vzip2q_s16 (int16x8_t __a, int16x8_t __b) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulxq_n_f16 (float16x8_t __a, float16_t __b) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); --#else -- return __builtin_shuffle (__a, __b, (uint16x8_t) -- {4, 12, 5, 13, 6, 14, 7, 15}); --#endif -+ return vmulxq_f16 (__a, vdupq_n_f16 (__b)); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) --vzip2q_s32 (int32x4_t __a, int32x4_t __b) -+/* ARMv8.2-A FP16 reduction vector intrinsics. */ -+ -+__extension__ extern __inline float16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxv_f16 (float16x4_t __a) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1}); --#else -- return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7}); --#endif -+ return __builtin_aarch64_reduc_smax_nan_scal_v4hf (__a); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) --vzip2q_s64 (int64x2_t __a, int64x2_t __b) -+__extension__ extern __inline float16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxvq_f16 (float16x8_t __a) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); --#else -- return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); --#endif -+ return __builtin_aarch64_reduc_smax_nan_scal_v8hf (__a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vzip2q_u8 (uint8x16_t __a, uint8x16_t __b) -+__extension__ extern __inline float16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminv_f16 (float16x4_t __a) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint8x16_t) -- {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}); --#else -- return __builtin_shuffle (__a, __b, (uint8x16_t) -- {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); --#endif -+ return __builtin_aarch64_reduc_smin_nan_scal_v4hf (__a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) --vzip2q_u16 (uint16x8_t __a, uint16x8_t __b) -+__extension__ extern __inline float16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminvq_f16 (float16x8_t __a) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); --#else -- return __builtin_shuffle (__a, __b, (uint16x8_t) -- {4, 12, 5, 13, 6, 14, 7, 15}); --#endif -+ return __builtin_aarch64_reduc_smin_nan_scal_v8hf (__a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vzip2q_u32 (uint32x4_t __a, uint32x4_t __b) -+__extension__ extern __inline float16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxnmv_f16 (float16x4_t __a) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1}); --#else -- return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7}); --#endif -+ return __builtin_aarch64_reduc_smax_scal_v4hf (__a); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) --vzip2q_u64 (uint64x2_t __a, uint64x2_t __b) -+__extension__ extern __inline float16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxnmvq_f16 (float16x8_t __a) - { --#ifdef __AARCH64EB__ -- return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); --#else -- return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); --#endif -+ return __builtin_aarch64_reduc_smax_scal_v8hf (__a); - } - --__INTERLEAVE_LIST (zip) -+__extension__ extern __inline float16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminnmv_f16 (float16x4_t __a) -+{ -+ return __builtin_aarch64_reduc_smin_scal_v4hf (__a); -+} - --#undef __INTERLEAVE_LIST --#undef __DEFINTERLEAVE -+__extension__ extern __inline float16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminnmvq_f16 (float16x8_t __a) -+{ -+ return __builtin_aarch64_reduc_smin_scal_v8hf (__a); -+} - --/* End of optimal implementations in approved order. */ -+#pragma GCC pop_options - - #undef __aarch64_vget_lane_any - - #undef __aarch64_vdup_lane_any -+#undef __aarch64_vdup_lane_f16 - #undef __aarch64_vdup_lane_f32 - #undef __aarch64_vdup_lane_f64 - #undef __aarch64_vdup_lane_p8 -@@ -25780,6 +31558,7 @@ __INTERLEAVE_LIST (zip) - #undef __aarch64_vdup_lane_u16 - #undef __aarch64_vdup_lane_u32 - #undef __aarch64_vdup_lane_u64 -+#undef __aarch64_vdup_laneq_f16 - #undef __aarch64_vdup_laneq_f32 - #undef __aarch64_vdup_laneq_f64 - #undef __aarch64_vdup_laneq_p8 -@@ -25792,6 +31571,7 @@ __INTERLEAVE_LIST (zip) - #undef __aarch64_vdup_laneq_u16 - #undef __aarch64_vdup_laneq_u32 - #undef __aarch64_vdup_laneq_u64 -+#undef __aarch64_vdupq_lane_f16 - #undef __aarch64_vdupq_lane_f32 - #undef __aarch64_vdupq_lane_f64 - #undef __aarch64_vdupq_lane_p8 -@@ -25804,6 +31584,7 @@ __INTERLEAVE_LIST (zip) - #undef __aarch64_vdupq_lane_u16 - #undef __aarch64_vdupq_lane_u32 - #undef __aarch64_vdupq_lane_u64 -+#undef __aarch64_vdupq_laneq_f16 - #undef __aarch64_vdupq_laneq_f32 - #undef __aarch64_vdupq_laneq_f64 - #undef __aarch64_vdupq_laneq_p8 -@@ -25817,6 +31598,4 @@ __INTERLEAVE_LIST (zip) - #undef __aarch64_vdupq_laneq_u32 - #undef __aarch64_vdupq_laneq_u64 - --#pragma GCC pop_options -- - #endif ---- a/src/gcc/config/aarch64/atomics.md -+++ b/src/gcc/config/aarch64/atomics.md -@@ -583,7 +583,7 @@ - } - ) - --;; ARMv8.1 LSE instructions. -+;; ARMv8.1-A LSE instructions. - - ;; Atomic swap with memory. - (define_insn "aarch64_atomic_swp<mode>" ---- a/src/gcc/config/aarch64/cortex-a57-fma-steering.c -+++ b/src/gcc/config/aarch64/cortex-a57-fma-steering.c -@@ -35,7 +35,6 @@ - #include "context.h" - #include "tree-pass.h" - #include "regrename.h" --#include "cortex-a57-fma-steering.h" - #include "aarch64-protos.h" - - /* For better performance, the destination of FMADD/FMSUB instructions should -@@ -923,10 +922,10 @@ func_fma_steering::analyze () - FOR_BB_INSNS (bb, insn) - { - operand_rr_info *dest_op_info; -- struct du_chain *chain; -+ struct du_chain *chain = NULL; - unsigned dest_regno; -- fma_forest *forest; -- du_head_p head; -+ fma_forest *forest = NULL; -+ du_head_p head = NULL; - int i; - - if (!is_fmul_fmac_insn (insn, true)) -@@ -1068,21 +1067,8 @@ public: - - /* Create a new fma steering pass instance. */ - --static rtl_opt_pass * -+rtl_opt_pass * - make_pass_fma_steering (gcc::context *ctxt) - { - return new pass_fma_steering (ctxt); - } -- --/* Register the FMA steering pass to the pass manager. */ -- --void --aarch64_register_fma_steering () --{ -- opt_pass *pass_fma_steering = make_pass_fma_steering (g); -- -- struct register_pass_info fma_steering_info -- = { pass_fma_steering, "rnreg", 1, PASS_POS_INSERT_AFTER }; -- -- register_pass (&fma_steering_info); --} ---- a/src/gcc/config/aarch64/cortex-a57-fma-steering.h -+++ b/src//dev/null -@@ -1,22 +0,0 @@ --/* This file contains declarations for the FMA steering optimization -- pass for Cortex-A57. -- Copyright (C) 2015-2016 Free Software Foundation, Inc. -- Contributed by ARM Ltd. -- -- This file is part of GCC. -- -- GCC is free software; you can redistribute it and/or modify it -- under the terms of the GNU General Public License as published by -- the Free Software Foundation; either version 3, or (at your option) -- any later version. -- -- GCC is distributed in the hope that it will be useful, but -- WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with GCC; see the file COPYING3. If not see -- <http://www.gnu.org/licenses/>. */ -- --void aarch64_register_fma_steering (void); ---- a/src/gcc/config/aarch64/geniterators.sh -+++ b/src/gcc/config/aarch64/geniterators.sh -@@ -23,10 +23,7 @@ - # BUILTIN_<ITERATOR> macros, which expand to VAR<N> Macros covering the - # same set of modes as the iterator in iterators.md - # --# Find the <ITERATOR> definitions (may span several lines), skip the ones --# which does not have a simple format because it contains characters we --# don't want to or can't handle (e.g P, PTR iterators change depending on --# Pmode and ptr_mode). -+# Find the <ITERATOR> definitions (may span several lines). - LC_ALL=C awk ' - BEGIN { - print "/* -*- buffer-read-only: t -*- */" -@@ -49,12 +46,24 @@ iterdef { - sub(/.*\(define_mode_iterator/, "", s) - } - --iterdef && s ~ /\)/ { -+iterdef { -+ # Count the parentheses, the iterator definition ends -+ # if there are more closing ones than opening ones. -+ nopen = gsub(/\(/, "(", s) -+ nclose = gsub(/\)/, ")", s) -+ if (nopen >= nclose) -+ next -+ - iterdef = 0 - - gsub(/[ \t]+/, " ", s) -- sub(/ *\).*/, "", s) -+ sub(/ *\)[^)]*$/, "", s) - sub(/^ /, "", s) -+ -+ # Drop the conditions. -+ gsub(/ *"[^"]*" *\)/, "", s) -+ gsub(/\( */, "", s) -+ - if (s !~ /^[A-Za-z0-9_]+ \[[A-Z0-9 ]*\]$/) - next - sub(/\[ */, "", s) ---- a/src/gcc/config/aarch64/iterators.md -+++ b/src/gcc/config/aarch64/iterators.md -@@ -26,6 +26,9 @@ - ;; Iterator for General Purpose Integer registers (32- and 64-bit modes) - (define_mode_iterator GPI [SI DI]) - -+;; Iterator for HI, SI, DI, some instructions can only work on these modes. -+(define_mode_iterator GPI_I16 [(HI "AARCH64_ISA_F16") SI DI]) -+ - ;; Iterator for QI and HI modes - (define_mode_iterator SHORT [QI HI]) - -@@ -38,6 +41,9 @@ - ;; Iterator for General Purpose Floating-point registers (32- and 64-bit modes) - (define_mode_iterator GPF [SF DF]) - -+;; Iterator for all scalar floating point modes (HF, SF, DF) -+(define_mode_iterator GPF_F16 [(HF "AARCH64_ISA_F16") SF DF]) -+ - ;; Iterator for all scalar floating point modes (HF, SF, DF and TF) - (define_mode_iterator GPF_TF_F16 [HF SF DF TF]) - -@@ -88,11 +94,22 @@ - ;; Vector Float modes suitable for moving, loading and storing. - (define_mode_iterator VDQF_F16 [V4HF V8HF V2SF V4SF V2DF]) - --;; Vector Float modes, barring HF modes. -+;; Vector Float modes. - (define_mode_iterator VDQF [V2SF V4SF V2DF]) -+(define_mode_iterator VHSDF [(V4HF "TARGET_SIMD_F16INST") -+ (V8HF "TARGET_SIMD_F16INST") -+ V2SF V4SF V2DF]) - - ;; Vector Float modes, and DF. - (define_mode_iterator VDQF_DF [V2SF V4SF V2DF DF]) -+(define_mode_iterator VHSDF_DF [(V4HF "TARGET_SIMD_F16INST") -+ (V8HF "TARGET_SIMD_F16INST") -+ V2SF V4SF V2DF DF]) -+(define_mode_iterator VHSDF_HSDF [(V4HF "TARGET_SIMD_F16INST") -+ (V8HF "TARGET_SIMD_F16INST") -+ V2SF V4SF V2DF -+ (HF "TARGET_SIMD_F16INST") -+ SF DF]) - - ;; Vector single Float modes. - (define_mode_iterator VDQSF [V2SF V4SF]) -@@ -150,10 +167,30 @@ - - ;; Vector modes except double int. - (define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF]) -+(define_mode_iterator VDQIF_F16 [V8QI V16QI V4HI V8HI V2SI V4SI -+ V4HF V8HF V2SF V4SF V2DF]) - - ;; Vector modes for S type. - (define_mode_iterator VDQ_SI [V2SI V4SI]) - -+;; Vector modes for S and D -+(define_mode_iterator VDQ_SDI [V2SI V4SI V2DI]) -+ -+;; Vector modes for H, S and D -+(define_mode_iterator VDQ_HSDI [(V4HI "TARGET_SIMD_F16INST") -+ (V8HI "TARGET_SIMD_F16INST") -+ V2SI V4SI V2DI]) -+ -+;; Scalar and Vector modes for S and D -+(define_mode_iterator VSDQ_SDI [V2SI V4SI V2DI SI DI]) -+ -+;; Scalar and Vector modes for S and D, Vector modes for H. -+(define_mode_iterator VSDQ_HSDI [(V4HI "TARGET_SIMD_F16INST") -+ (V8HI "TARGET_SIMD_F16INST") -+ V2SI V4SI V2DI -+ (HI "TARGET_SIMD_F16INST") -+ SI DI]) -+ - ;; Vector modes for Q and H types. - (define_mode_iterator VDQQH [V8QI V16QI V4HI V8HI]) - -@@ -193,7 +230,10 @@ - (define_mode_iterator DX [DI DF]) - - ;; Modes available for <f>mul lane operations. --(define_mode_iterator VMUL [V4HI V8HI V2SI V4SI V2SF V4SF V2DF]) -+(define_mode_iterator VMUL [V4HI V8HI V2SI V4SI -+ (V4HF "TARGET_SIMD_F16INST") -+ (V8HF "TARGET_SIMD_F16INST") -+ V2SF V4SF V2DF]) - - ;; Modes available for <f>mul lane operations changing lane count. - (define_mode_iterator VMUL_CHANGE_NLANES [V4HI V8HI V2SI V4SI V2SF V4SF]) -@@ -342,8 +382,8 @@ - (define_mode_attr w [(QI "w") (HI "w") (SI "w") (DI "x") (SF "s") (DF "d")]) - - ;; For inequal width int to float conversion --(define_mode_attr w1 [(SF "w") (DF "x")]) --(define_mode_attr w2 [(SF "x") (DF "w")]) -+(define_mode_attr w1 [(HF "w") (SF "w") (DF "x")]) -+(define_mode_attr w2 [(HF "x") (SF "x") (DF "w")]) - - (define_mode_attr short_mask [(HI "65535") (QI "255")]) - -@@ -355,12 +395,13 @@ - - ;; For scalar usage of vector/FP registers - (define_mode_attr v [(QI "b") (HI "h") (SI "s") (DI "d") -- (SF "s") (DF "d") -+ (HF "h") (SF "s") (DF "d") - (V8QI "") (V16QI "") - (V4HI "") (V8HI "") - (V2SI "") (V4SI "") - (V2DI "") (V2SF "") -- (V4SF "") (V2DF "")]) -+ (V4SF "") (V4HF "") -+ (V8HF "") (V2DF "")]) - - ;; For scalar usage of vector/FP registers, narrowing - (define_mode_attr vn2 [(QI "") (HI "b") (SI "h") (DI "s") -@@ -385,7 +426,7 @@ - (define_mode_attr vas [(DI "") (SI ".2s")]) - - ;; Map a floating point mode to the appropriate register name prefix --(define_mode_attr s [(SF "s") (DF "d")]) -+(define_mode_attr s [(HF "h") (SF "s") (DF "d")]) - - ;; Give the length suffix letter for a sign- or zero-extension. - (define_mode_attr size [(QI "b") (HI "h") (SI "w")]) -@@ -421,8 +462,8 @@ - (V4SF ".4s") (V2DF ".2d") - (DI "") (SI "") - (HI "") (QI "") -- (TI "") (SF "") -- (DF "")]) -+ (TI "") (HF "") -+ (SF "") (DF "")]) - - ;; Register suffix narrowed modes for VQN. - (define_mode_attr Vmntype [(V8HI ".8b") (V4SI ".4h") -@@ -437,10 +478,21 @@ - (V2DI "d") (V4HF "h") - (V8HF "h") (V2SF "s") - (V4SF "s") (V2DF "d") -+ (HF "h") - (SF "s") (DF "d") - (QI "b") (HI "h") - (SI "s") (DI "d")]) - -+;; Vetype is used everywhere in scheduling type and assembly output, -+;; sometimes they are not the same, for example HF modes on some -+;; instructions. stype is defined to represent scheduling type -+;; more accurately. -+(define_mode_attr stype [(V8QI "b") (V16QI "b") (V4HI "s") (V8HI "s") -+ (V2SI "s") (V4SI "s") (V2DI "d") (V4HF "s") -+ (V8HF "s") (V2SF "s") (V4SF "s") (V2DF "d") -+ (HF "s") (SF "s") (DF "d") (QI "b") (HI "s") -+ (SI "s") (DI "d")]) -+ - ;; Mode-to-bitwise operation type mapping. - (define_mode_attr Vbtype [(V8QI "8b") (V16QI "16b") - (V4HI "8b") (V8HI "16b") -@@ -598,7 +650,7 @@ - (V4HF "V4HI") (V8HF "V8HI") - (V2SF "V2SI") (V4SF "V4SI") - (V2DF "V2DI") (DF "DI") -- (SF "SI")]) -+ (SF "SI") (HF "HI")]) - - ;; Lower case mode of results of comparison operations. - (define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi") -@@ -648,12 +700,21 @@ - (define_mode_attr atomic_sfx - [(QI "b") (HI "h") (SI "") (DI "")]) - --(define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si") (SF "si") (DF "di")]) --(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI") (SF "SI") (DF "DI")]) -+(define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si") -+ (V2DI "v2df") (V4SI "v4sf") (V2SI "v2sf") -+ (SF "si") (DF "di") (SI "sf") (DI "df") -+ (V4HF "v4hi") (V8HF "v8hi") (V4HI "v4hf") -+ (V8HI "v8hf") (HF "hi") (HI "hf")]) -+(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI") -+ (V2DI "V2DF") (V4SI "V4SF") (V2SI "V2SF") -+ (SF "SI") (DF "DI") (SI "SF") (DI "DF") -+ (V4HF "V4HI") (V8HF "V8HI") (V4HI "V4HF") -+ (V8HI "V8HF") (HF "HI") (HI "HF")]) -+ - - ;; for the inequal width integer to fp conversions --(define_mode_attr fcvt_iesize [(SF "di") (DF "si")]) --(define_mode_attr FCVT_IESIZE [(SF "DI") (DF "SI")]) -+(define_mode_attr fcvt_iesize [(HF "di") (SF "di") (DF "si")]) -+(define_mode_attr FCVT_IESIZE [(HF "DI") (SF "DI") (DF "SI")]) - - (define_mode_attr VSWAP_WIDTH [(V8QI "V16QI") (V16QI "V8QI") - (V4HI "V8HI") (V8HI "V4HI") -@@ -676,6 +737,7 @@ - ;; the 'x' constraint. All other modes may use the 'w' constraint. - (define_mode_attr h_con [(V2SI "w") (V4SI "w") - (V4HI "x") (V8HI "x") -+ (V4HF "w") (V8HF "w") - (V2SF "w") (V4SF "w") - (V2DF "w") (DF "w")]) - -@@ -684,6 +746,7 @@ - (V4HI "") (V8HI "") - (V2SI "") (V4SI "") - (DI "") (V2DI "") -+ (V4HF "f") (V8HF "f") - (V2SF "f") (V4SF "f") - (V2DF "f") (DF "f")]) - -@@ -692,6 +755,7 @@ - (V4HI "") (V8HI "") - (V2SI "") (V4SI "") - (DI "") (V2DI "") -+ (V4HF "_fp") (V8HF "_fp") - (V2SF "_fp") (V4SF "_fp") - (V2DF "_fp") (DF "_fp") - (SF "_fp")]) -@@ -704,17 +768,19 @@ - (V4HF "") (V8HF "_q") - (V2SF "") (V4SF "_q") - (V2DF "_q") -- (QI "") (HI "") (SI "") (DI "") (SF "") (DF "")]) -+ (QI "") (HI "") (SI "") (DI "") (HF "") (SF "") (DF "")]) - - (define_mode_attr vp [(V8QI "v") (V16QI "v") - (V4HI "v") (V8HI "v") - (V2SI "p") (V4SI "v") -- (V2DI "p") (V2DF "p") -- (V2SF "p") (V4SF "v")]) -+ (V2DI "p") (V2DF "p") -+ (V2SF "p") (V4SF "v") -+ (V4HF "v") (V8HF "v")]) - - (define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")]) - (define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")]) - -+;; Sum of lengths of instructions needed to move vector registers of a mode. - (define_mode_attr insn_count [(OI "8") (CI "12") (XI "16")]) - - ;; -fpic small model GOT reloc modifers: gotpage_lo15/lo14 for ILP64/32. -@@ -876,9 +942,6 @@ - ;; Similar, but when not(op) - (define_code_attr nlogical [(and "bic") (ior "orn") (xor "eon")]) - --;; Sign- or zero-extending load --(define_code_attr ldrxt [(sign_extend "ldrs") (zero_extend "ldr")]) -- - ;; Sign- or zero-extending data-op - (define_code_attr su [(sign_extend "s") (zero_extend "u") - (sign_extract "s") (zero_extract "u") -@@ -953,9 +1016,8 @@ - (define_int_iterator ADDSUBHN2 [UNSPEC_ADDHN2 UNSPEC_RADDHN2 - UNSPEC_SUBHN2 UNSPEC_RSUBHN2]) - --(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN]) -- --(define_int_iterator FMAXMIN [UNSPEC_FMAXNM UNSPEC_FMINNM]) -+(define_int_iterator FMAXMIN_UNS [UNSPEC_FMAX UNSPEC_FMIN -+ UNSPEC_FMAXNM UNSPEC_FMINNM]) - - (define_int_iterator VQDMULH [UNSPEC_SQDMULH UNSPEC_SQRDMULH]) - -@@ -1001,6 +1063,9 @@ - (define_int_iterator FCVT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM - UNSPEC_FRINTA UNSPEC_FRINTN]) - -+(define_int_iterator FCVT_F2FIXED [UNSPEC_FCVTZS UNSPEC_FCVTZU]) -+(define_int_iterator FCVT_FIXED2F [UNSPEC_SCVTF UNSPEC_UCVTF]) -+ - (define_int_iterator FRECP [UNSPEC_FRECPE UNSPEC_FRECPX]) - - (define_int_iterator CRC [UNSPEC_CRC32B UNSPEC_CRC32H UNSPEC_CRC32W -@@ -1036,7 +1101,9 @@ - (UNSPEC_FMAXV "smax_nan") - (UNSPEC_FMIN "smin_nan") - (UNSPEC_FMINNMV "smin") -- (UNSPEC_FMINV "smin_nan")]) -+ (UNSPEC_FMINV "smin_nan") -+ (UNSPEC_FMAXNM "fmax") -+ (UNSPEC_FMINNM "fmin")]) - - (define_int_attr maxmin_uns_op [(UNSPEC_UMAXV "umax") - (UNSPEC_UMINV "umin") -@@ -1047,13 +1114,9 @@ - (UNSPEC_FMAXV "fmax") - (UNSPEC_FMIN "fmin") - (UNSPEC_FMINNMV "fminnm") -- (UNSPEC_FMINV "fmin")]) -- --(define_int_attr fmaxmin [(UNSPEC_FMAXNM "fmax") -- (UNSPEC_FMINNM "fmin")]) -- --(define_int_attr fmaxmin_op [(UNSPEC_FMAXNM "fmaxnm") -- (UNSPEC_FMINNM "fminnm")]) -+ (UNSPEC_FMINV "fmin") -+ (UNSPEC_FMAXNM "fmaxnm") -+ (UNSPEC_FMINNM "fminnm")]) - - (define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u") - (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur") -@@ -1137,6 +1200,11 @@ - (UNSPEC_FRINTP "ceil") (UNSPEC_FRINTM "floor") - (UNSPEC_FRINTN "frintn")]) - -+(define_int_attr fcvt_fixed_insn [(UNSPEC_SCVTF "scvtf") -+ (UNSPEC_UCVTF "ucvtf") -+ (UNSPEC_FCVTZS "fcvtzs") -+ (UNSPEC_FCVTZU "fcvtzu")]) -+ - (define_int_attr perm_insn [(UNSPEC_ZIP1 "zip") (UNSPEC_ZIP2 "zip") - (UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn") - (UNSPEC_UZP1 "uzp") (UNSPEC_UZP2 "uzp")]) ---- a/src/gcc/config/aarch64/predicates.md -+++ b/src/gcc/config/aarch64/predicates.md -@@ -54,9 +54,9 @@ - (match_test "op == const0_rtx")))) - - (define_predicate "aarch64_reg_or_fp_zero" -- (and (match_code "reg,subreg,const_double") -- (ior (match_operand 0 "register_operand") -- (match_test "aarch64_float_const_zero_rtx_p (op)")))) -+ (ior (match_operand 0 "register_operand") -+ (and (match_code "const_double") -+ (match_test "aarch64_float_const_zero_rtx_p (op)")))) - - (define_predicate "aarch64_reg_zero_or_m1_or_1" - (and (match_code "reg,subreg,const_int") ---- a/src/gcc/config/aarch64/t-aarch64 -+++ b/src/gcc/config/aarch64/t-aarch64 -@@ -52,16 +52,17 @@ aarch-common.o: $(srcdir)/config/arm/aarch-common.c $(CONFIG_H) $(SYSTEM_H) \ - $(srcdir)/config/arm/aarch-common.c - - aarch64-c.o: $(srcdir)/config/aarch64/aarch64-c.c $(CONFIG_H) $(SYSTEM_H) \ -- coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) -+ coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) $(TARGET_H) - $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ - $(srcdir)/config/aarch64/aarch64-c.c - -+PASSES_EXTRA += $(srcdir)/config/aarch64/aarch64-passes.def -+ - cortex-a57-fma-steering.o: $(srcdir)/config/aarch64/cortex-a57-fma-steering.c \ - $(CONFIG_H) $(SYSTEM_H) $(TM_H) $(REGS_H) insn-config.h $(RTL_BASE_H) \ - dominance.h cfg.h cfganal.h $(BASIC_BLOCK_H) $(INSN_ATTR_H) $(RECOG_H) \ - output.h hash-map.h $(DF_H) $(OBSTACK_H) $(TARGET_H) $(RTL_H) \ - $(CONTEXT_H) $(TREE_PASS_H) regrename.h \ -- $(srcdir)/config/aarch64/cortex-a57-fma-steering.h \ - $(srcdir)/config/aarch64/aarch64-protos.h - $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ - $(srcdir)/config/aarch64/cortex-a57-fma-steering.c ---- a/src/gcc/config/aarch64/thunderx.md -+++ b/src/gcc/config/aarch64/thunderx.md -@@ -39,7 +39,7 @@ - - (define_insn_reservation "thunderx_shift" 1 - (and (eq_attr "tune" "thunderx") -- (eq_attr "type" "bfm,extend,rotate_imm,shift_imm,shift_reg,rbit,rev")) -+ (eq_attr "type" "bfm,bfx,extend,rotate_imm,shift_imm,shift_reg,rbit,rev")) - "thunderx_pipe0 | thunderx_pipe1") - - ---- a/src/gcc/config/alpha/alpha.c -+++ b/src/gcc/config/alpha/alpha.c -@@ -26,6 +26,7 @@ along with GCC; see the file COPYING3. If not see - #include "target.h" - #include "rtl.h" - #include "tree.h" -+#include "memmodel.h" - #include "gimple.h" - #include "df.h" - #include "tm_p.h" ---- a/src/gcc/config/arm/aarch-cost-tables.h -+++ b/src/gcc/config/arm/aarch-cost-tables.h -@@ -191,35 +191,35 @@ const struct cpu_cost_table cortexa53_extra_costs = - { - /* FP SFmode */ - { -- COSTS_N_INSNS (15), /* div. */ -- COSTS_N_INSNS (3), /* mult. */ -- COSTS_N_INSNS (7), /* mult_addsub. */ -- COSTS_N_INSNS (7), /* fma. */ -- COSTS_N_INSNS (3), /* addsub. */ -- COSTS_N_INSNS (1), /* fpconst. */ -- COSTS_N_INSNS (2), /* neg. */ -- COSTS_N_INSNS (1), /* compare. */ -- COSTS_N_INSNS (3), /* widen. */ -- COSTS_N_INSNS (3), /* narrow. */ -- COSTS_N_INSNS (3), /* toint. */ -- COSTS_N_INSNS (3), /* fromint. */ -- COSTS_N_INSNS (3) /* roundint. */ -+ COSTS_N_INSNS (5), /* div. */ -+ COSTS_N_INSNS (1), /* mult. */ -+ COSTS_N_INSNS (2), /* mult_addsub. */ -+ COSTS_N_INSNS (2), /* fma. */ -+ COSTS_N_INSNS (1), /* addsub. */ -+ 0, /* fpconst. */ -+ COSTS_N_INSNS (1), /* neg. */ -+ 0, /* compare. */ -+ COSTS_N_INSNS (1), /* widen. */ -+ COSTS_N_INSNS (1), /* narrow. */ -+ COSTS_N_INSNS (1), /* toint. */ -+ COSTS_N_INSNS (1), /* fromint. */ -+ COSTS_N_INSNS (1) /* roundint. */ - }, - /* FP DFmode */ - { -- COSTS_N_INSNS (30), /* div. */ -- COSTS_N_INSNS (3), /* mult. */ -- COSTS_N_INSNS (7), /* mult_addsub. */ -- COSTS_N_INSNS (7), /* fma. */ -- COSTS_N_INSNS (3), /* addsub. */ -- COSTS_N_INSNS (1), /* fpconst. */ -- COSTS_N_INSNS (2), /* neg. */ -- COSTS_N_INSNS (1), /* compare. */ -- COSTS_N_INSNS (3), /* widen. */ -- COSTS_N_INSNS (3), /* narrow. */ -- COSTS_N_INSNS (3), /* toint. */ -- COSTS_N_INSNS (3), /* fromint. */ -- COSTS_N_INSNS (3) /* roundint. */ -+ COSTS_N_INSNS (10), /* div. */ -+ COSTS_N_INSNS (1), /* mult. */ -+ COSTS_N_INSNS (2), /* mult_addsub. */ -+ COSTS_N_INSNS (2), /* fma. */ -+ COSTS_N_INSNS (1), /* addsub. */ -+ 0, /* fpconst. */ -+ COSTS_N_INSNS (1), /* neg. */ -+ 0, /* compare. */ -+ COSTS_N_INSNS (1), /* widen. */ -+ COSTS_N_INSNS (1), /* narrow. */ -+ COSTS_N_INSNS (1), /* toint. */ -+ COSTS_N_INSNS (1), /* fromint. */ -+ COSTS_N_INSNS (1) /* roundint. */ - } - }, - /* Vector */ -@@ -294,35 +294,35 @@ const struct cpu_cost_table cortexa57_extra_costs = - { - /* FP SFmode */ - { -- COSTS_N_INSNS (17), /* div. */ -- COSTS_N_INSNS (5), /* mult. */ -- COSTS_N_INSNS (9), /* mult_addsub. */ -- COSTS_N_INSNS (9), /* fma. */ -- COSTS_N_INSNS (4), /* addsub. */ -- COSTS_N_INSNS (2), /* fpconst. */ -- COSTS_N_INSNS (2), /* neg. */ -- COSTS_N_INSNS (2), /* compare. */ -- COSTS_N_INSNS (4), /* widen. */ -- COSTS_N_INSNS (4), /* narrow. */ -- COSTS_N_INSNS (4), /* toint. */ -- COSTS_N_INSNS (4), /* fromint. */ -- COSTS_N_INSNS (4) /* roundint. */ -+ COSTS_N_INSNS (6), /* div. */ -+ COSTS_N_INSNS (1), /* mult. */ -+ COSTS_N_INSNS (2), /* mult_addsub. */ -+ COSTS_N_INSNS (2), /* fma. */ -+ COSTS_N_INSNS (1), /* addsub. */ -+ 0, /* fpconst. */ -+ 0, /* neg. */ -+ 0, /* compare. */ -+ COSTS_N_INSNS (1), /* widen. */ -+ COSTS_N_INSNS (1), /* narrow. */ -+ COSTS_N_INSNS (1), /* toint. */ -+ COSTS_N_INSNS (1), /* fromint. */ -+ COSTS_N_INSNS (1) /* roundint. */ - }, - /* FP DFmode */ - { -- COSTS_N_INSNS (31), /* div. */ -- COSTS_N_INSNS (5), /* mult. */ -- COSTS_N_INSNS (9), /* mult_addsub. */ -- COSTS_N_INSNS (9), /* fma. */ -- COSTS_N_INSNS (4), /* addsub. */ -- COSTS_N_INSNS (2), /* fpconst. */ -- COSTS_N_INSNS (2), /* neg. */ -- COSTS_N_INSNS (2), /* compare. */ -- COSTS_N_INSNS (4), /* widen. */ -- COSTS_N_INSNS (4), /* narrow. */ -- COSTS_N_INSNS (4), /* toint. */ -- COSTS_N_INSNS (4), /* fromint. */ -- COSTS_N_INSNS (4) /* roundint. */ -+ COSTS_N_INSNS (11), /* div. */ -+ COSTS_N_INSNS (1), /* mult. */ -+ COSTS_N_INSNS (2), /* mult_addsub. */ -+ COSTS_N_INSNS (2), /* fma. */ -+ COSTS_N_INSNS (1), /* addsub. */ -+ 0, /* fpconst. */ -+ 0, /* neg. */ -+ 0, /* compare. */ -+ COSTS_N_INSNS (1), /* widen. */ -+ COSTS_N_INSNS (1), /* narrow. */ -+ COSTS_N_INSNS (1), /* toint. */ -+ COSTS_N_INSNS (1), /* fromint. */ -+ COSTS_N_INSNS (1) /* roundint. */ - } - }, - /* Vector */ -@@ -537,4 +537,107 @@ const struct cpu_cost_table xgene1_extra_costs = - } - }; - -+const struct cpu_cost_table qdf24xx_extra_costs = -+{ -+ /* ALU */ -+ { -+ 0, /* arith. */ -+ 0, /* logical. */ -+ 0, /* shift. */ -+ 0, /* shift_reg. */ -+ COSTS_N_INSNS (1), /* arith_shift. */ -+ COSTS_N_INSNS (1), /* arith_shift_reg. */ -+ 0, /* log_shift. */ -+ 0, /* log_shift_reg. */ -+ 0, /* extend. */ -+ 0, /* extend_arith. */ -+ 0, /* bfi. */ -+ 0, /* bfx. */ -+ 0, /* clz. */ -+ 0, /* rev. */ -+ 0, /* non_exec. */ -+ true /* non_exec_costs_exec. */ -+ }, -+ { -+ /* MULT SImode */ -+ { -+ COSTS_N_INSNS (2), /* simple. */ -+ COSTS_N_INSNS (2), /* flag_setting. */ -+ COSTS_N_INSNS (2), /* extend. */ -+ COSTS_N_INSNS (2), /* add. */ -+ COSTS_N_INSNS (2), /* extend_add. */ -+ COSTS_N_INSNS (4) /* idiv. */ -+ }, -+ /* MULT DImode */ -+ { -+ COSTS_N_INSNS (3), /* simple. */ -+ 0, /* flag_setting (N/A). */ -+ COSTS_N_INSNS (3), /* extend. */ -+ COSTS_N_INSNS (3), /* add. */ -+ COSTS_N_INSNS (3), /* extend_add. */ -+ COSTS_N_INSNS (9) /* idiv. */ -+ } -+ }, -+ /* LD/ST */ -+ { -+ COSTS_N_INSNS (2), /* load. */ -+ COSTS_N_INSNS (2), /* load_sign_extend. */ -+ COSTS_N_INSNS (2), /* ldrd. */ -+ COSTS_N_INSNS (2), /* ldm_1st. */ -+ 1, /* ldm_regs_per_insn_1st. */ -+ 2, /* ldm_regs_per_insn_subsequent. */ -+ COSTS_N_INSNS (2), /* loadf. */ -+ COSTS_N_INSNS (2), /* loadd. */ -+ COSTS_N_INSNS (3), /* load_unaligned. */ -+ 0, /* store. */ -+ 0, /* strd. */ -+ 0, /* stm_1st. */ -+ 1, /* stm_regs_per_insn_1st. */ -+ 2, /* stm_regs_per_insn_subsequent. */ -+ 0, /* storef. */ -+ 0, /* stored. */ -+ COSTS_N_INSNS (1), /* store_unaligned. */ -+ COSTS_N_INSNS (1), /* loadv. */ -+ COSTS_N_INSNS (1) /* storev. */ -+ }, -+ { -+ /* FP SFmode */ -+ { -+ COSTS_N_INSNS (6), /* div. */ -+ COSTS_N_INSNS (5), /* mult. */ -+ COSTS_N_INSNS (5), /* mult_addsub. */ -+ COSTS_N_INSNS (5), /* fma. */ -+ COSTS_N_INSNS (3), /* addsub. */ -+ COSTS_N_INSNS (1), /* fpconst. */ -+ COSTS_N_INSNS (1), /* neg. */ -+ COSTS_N_INSNS (2), /* compare. */ -+ COSTS_N_INSNS (4), /* widen. */ -+ COSTS_N_INSNS (4), /* narrow. */ -+ COSTS_N_INSNS (4), /* toint. */ -+ COSTS_N_INSNS (4), /* fromint. */ -+ COSTS_N_INSNS (2) /* roundint. */ -+ }, -+ /* FP DFmode */ -+ { -+ COSTS_N_INSNS (11), /* div. */ -+ COSTS_N_INSNS (6), /* mult. */ -+ COSTS_N_INSNS (6), /* mult_addsub. */ -+ COSTS_N_INSNS (6), /* fma. */ -+ COSTS_N_INSNS (3), /* addsub. */ -+ COSTS_N_INSNS (1), /* fpconst. */ -+ COSTS_N_INSNS (1), /* neg. */ -+ COSTS_N_INSNS (2), /* compare. */ -+ COSTS_N_INSNS (4), /* widen. */ -+ COSTS_N_INSNS (4), /* narrow. */ -+ COSTS_N_INSNS (4), /* toint. */ -+ COSTS_N_INSNS (4), /* fromint. */ -+ COSTS_N_INSNS (2) /* roundint. */ -+ } -+ }, -+ /* Vector */ -+ { -+ COSTS_N_INSNS (1) /* alu. */ -+ } -+}; -+ - #endif /* GCC_AARCH_COST_TABLES_H */ ---- a/src/gcc/config/arm/arm-arches.def -+++ b/src/gcc/config/arm/arm-arches.def -@@ -58,10 +58,22 @@ ARM_ARCH("armv7e-m", cortexm4, 7EM, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_F - ARM_ARCH("armv8-a", cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_FOR_ARCH8A)) - ARM_ARCH("armv8-a+crc",cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_CO_PROC | FL_CRC32 | FL_FOR_ARCH8A)) - ARM_ARCH("armv8.1-a", cortexa53, 8A, -- ARM_FSET_MAKE (FL_CO_PROC | FL_FOR_ARCH8A, FL2_FOR_ARCH8_1A)) -+ ARM_FSET_MAKE (FL_CO_PROC | FL_CRC32 | FL_FOR_ARCH8A, -+ FL2_FOR_ARCH8_1A)) - ARM_ARCH("armv8.1-a+crc",cortexa53, 8A, - ARM_FSET_MAKE (FL_CO_PROC | FL_CRC32 | FL_FOR_ARCH8A, - FL2_FOR_ARCH8_1A)) -+ARM_ARCH ("armv8.2-a", cortexa53, 8A, -+ ARM_FSET_MAKE (FL_CO_PROC | FL_CRC32 | FL_FOR_ARCH8A, -+ FL2_FOR_ARCH8_2A)) -+ARM_ARCH ("armv8.2-a+fp16", cortexa53, 8A, -+ ARM_FSET_MAKE (FL_CO_PROC | FL_CRC32 | FL_FOR_ARCH8A, -+ FL2_FOR_ARCH8_2A | FL2_FP16INST)) -+ARM_ARCH("armv8-m.base", cortexm23, 8M_BASE, -+ ARM_FSET_MAKE (FL_FOR_ARCH8M_BASE, FL2_CMSE)) -+ARM_ARCH("armv8-m.main", cortexm7, 8M_MAIN, -+ ARM_FSET_MAKE (FL_CO_PROC | FL_FOR_ARCH8M_MAIN, FL2_CMSE)) -+ARM_ARCH("armv8-m.main+dsp", cortexm33, 8M_MAIN, -+ ARM_FSET_MAKE (FL_CO_PROC | FL_ARCH7EM | FL_FOR_ARCH8M_MAIN, FL2_CMSE)) - ARM_ARCH("iwmmxt", iwmmxt, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT)) - ARM_ARCH("iwmmxt2", iwmmxt2, 5TE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2)) -- ---- a/src/gcc/config/arm/arm-builtins.c -+++ b/src/gcc/config/arm/arm-builtins.c -@@ -190,6 +190,8 @@ arm_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] - #define ti_UP TImode - #define ei_UP EImode - #define oi_UP OImode -+#define hf_UP HFmode -+#define si_UP SImode - - #define UP(X) X##_UP - -@@ -239,12 +241,22 @@ typedef struct { - VAR11 (T, N, A, B, C, D, E, F, G, H, I, J, K) \ - VAR1 (T, N, L) - --/* The NEON builtin data can be found in arm_neon_builtins.def. -- The mode entries in the following table correspond to the "key" type of the -- instruction variant, i.e. equivalent to that which would be specified after -- the assembler mnemonic, which usually refers to the last vector operand. -- The modes listed per instruction should be the same as those defined for -- that instruction's pattern in neon.md. */ -+/* The NEON builtin data can be found in arm_neon_builtins.def and -+ arm_vfp_builtins.def. The entries in arm_neon_builtins.def require -+ TARGET_NEON to be true. The feature tests are checked when the -+ builtins are expanded. -+ -+ The mode entries in the following table correspond to the "key" -+ type of the instruction variant, i.e. equivalent to that which -+ would be specified after the assembler mnemonic, which usually -+ refers to the last vector operand. The modes listed per -+ instruction should be the same as those defined for that -+ instruction's pattern in neon.md. */ -+ -+static neon_builtin_datum vfp_builtin_data[] = -+{ -+#include "arm_vfp_builtins.def" -+}; - - static neon_builtin_datum neon_builtin_data[] = - { -@@ -515,6 +527,8 @@ enum arm_builtins - ARM_BUILTIN_GET_FPSCR, - ARM_BUILTIN_SET_FPSCR, - -+ ARM_BUILTIN_CMSE_NONSECURE_CALLER, -+ - #undef CRYPTO1 - #undef CRYPTO2 - #undef CRYPTO3 -@@ -534,6 +548,10 @@ enum arm_builtins - #undef CRYPTO2 - #undef CRYPTO3 - -+ ARM_BUILTIN_VFP_BASE, -+ -+#include "arm_vfp_builtins.def" -+ - ARM_BUILTIN_NEON_BASE, - ARM_BUILTIN_NEON_LANE_CHECK = ARM_BUILTIN_NEON_BASE, - -@@ -542,8 +560,11 @@ enum arm_builtins - ARM_BUILTIN_MAX - }; - -+#define ARM_BUILTIN_VFP_PATTERN_START \ -+ (ARM_BUILTIN_VFP_BASE + 1) -+ - #define ARM_BUILTIN_NEON_PATTERN_START \ -- (ARM_BUILTIN_MAX - ARRAY_SIZE (neon_builtin_data)) -+ (ARM_BUILTIN_NEON_BASE + 1) - - #undef CF - #undef VAR1 -@@ -895,6 +916,110 @@ arm_init_simd_builtin_scalar_types (void) - "__builtin_neon_uti"); - } - -+/* Set up a NEON builtin. */ -+ -+static void -+arm_init_neon_builtin (unsigned int fcode, -+ neon_builtin_datum *d) -+{ -+ bool print_type_signature_p = false; -+ char type_signature[SIMD_MAX_BUILTIN_ARGS] = { 0 }; -+ char namebuf[60]; -+ tree ftype = NULL; -+ tree fndecl = NULL; -+ -+ d->fcode = fcode; -+ -+ /* We must track two variables here. op_num is -+ the operand number as in the RTL pattern. This is -+ required to access the mode (e.g. V4SF mode) of the -+ argument, from which the base type can be derived. -+ arg_num is an index in to the qualifiers data, which -+ gives qualifiers to the type (e.g. const unsigned). -+ The reason these two variables may differ by one is the -+ void return type. While all return types take the 0th entry -+ in the qualifiers array, there is no operand for them in the -+ RTL pattern. */ -+ int op_num = insn_data[d->code].n_operands - 1; -+ int arg_num = d->qualifiers[0] & qualifier_void -+ ? op_num + 1 -+ : op_num; -+ tree return_type = void_type_node, args = void_list_node; -+ tree eltype; -+ -+ /* Build a function type directly from the insn_data for this -+ builtin. The build_function_type () function takes care of -+ removing duplicates for us. */ -+ for (; op_num >= 0; arg_num--, op_num--) -+ { -+ machine_mode op_mode = insn_data[d->code].operand[op_num].mode; -+ enum arm_type_qualifiers qualifiers = d->qualifiers[arg_num]; -+ -+ if (qualifiers & qualifier_unsigned) -+ { -+ type_signature[arg_num] = 'u'; -+ print_type_signature_p = true; -+ } -+ else if (qualifiers & qualifier_poly) -+ { -+ type_signature[arg_num] = 'p'; -+ print_type_signature_p = true; -+ } -+ else -+ type_signature[arg_num] = 's'; -+ -+ /* Skip an internal operand for vget_{low, high}. */ -+ if (qualifiers & qualifier_internal) -+ continue; -+ -+ /* Some builtins have different user-facing types -+ for certain arguments, encoded in d->mode. */ -+ if (qualifiers & qualifier_map_mode) -+ op_mode = d->mode; -+ -+ /* For pointers, we want a pointer to the basic type -+ of the vector. */ -+ if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode)) -+ op_mode = GET_MODE_INNER (op_mode); -+ -+ eltype = arm_simd_builtin_type -+ (op_mode, -+ (qualifiers & qualifier_unsigned) != 0, -+ (qualifiers & qualifier_poly) != 0); -+ gcc_assert (eltype != NULL); -+ -+ /* Add qualifiers. */ -+ if (qualifiers & qualifier_const) -+ eltype = build_qualified_type (eltype, TYPE_QUAL_CONST); -+ -+ if (qualifiers & qualifier_pointer) -+ eltype = build_pointer_type (eltype); -+ -+ /* If we have reached arg_num == 0, we are at a non-void -+ return type. Otherwise, we are still processing -+ arguments. */ -+ if (arg_num == 0) -+ return_type = eltype; -+ else -+ args = tree_cons (NULL_TREE, eltype, args); -+ } -+ -+ ftype = build_function_type (return_type, args); -+ -+ gcc_assert (ftype != NULL); -+ -+ if (print_type_signature_p) -+ snprintf (namebuf, sizeof (namebuf), "__builtin_neon_%s_%s", -+ d->name, type_signature); -+ else -+ snprintf (namebuf, sizeof (namebuf), "__builtin_neon_%s", -+ d->name); -+ -+ fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, -+ NULL, NULL_TREE); -+ arm_builtin_decls[fcode] = fndecl; -+} -+ - /* Set up all the NEON builtins, even builtins for instructions that are not - in the current target ISA to allow the user to compile particular modules - with different target specific options that differ from the command line -@@ -924,103 +1049,22 @@ arm_init_neon_builtins (void) - - for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++, fcode++) - { -- bool print_type_signature_p = false; -- char type_signature[SIMD_MAX_BUILTIN_ARGS] = { 0 }; - neon_builtin_datum *d = &neon_builtin_data[i]; -- char namebuf[60]; -- tree ftype = NULL; -- tree fndecl = NULL; -- -- d->fcode = fcode; -- -- /* We must track two variables here. op_num is -- the operand number as in the RTL pattern. This is -- required to access the mode (e.g. V4SF mode) of the -- argument, from which the base type can be derived. -- arg_num is an index in to the qualifiers data, which -- gives qualifiers to the type (e.g. const unsigned). -- The reason these two variables may differ by one is the -- void return type. While all return types take the 0th entry -- in the qualifiers array, there is no operand for them in the -- RTL pattern. */ -- int op_num = insn_data[d->code].n_operands - 1; -- int arg_num = d->qualifiers[0] & qualifier_void -- ? op_num + 1 -- : op_num; -- tree return_type = void_type_node, args = void_list_node; -- tree eltype; -- -- /* Build a function type directly from the insn_data for this -- builtin. The build_function_type () function takes care of -- removing duplicates for us. */ -- for (; op_num >= 0; arg_num--, op_num--) -- { -- machine_mode op_mode = insn_data[d->code].operand[op_num].mode; -- enum arm_type_qualifiers qualifiers = d->qualifiers[arg_num]; -- -- if (qualifiers & qualifier_unsigned) -- { -- type_signature[arg_num] = 'u'; -- print_type_signature_p = true; -- } -- else if (qualifiers & qualifier_poly) -- { -- type_signature[arg_num] = 'p'; -- print_type_signature_p = true; -- } -- else -- type_signature[arg_num] = 's'; -- -- /* Skip an internal operand for vget_{low, high}. */ -- if (qualifiers & qualifier_internal) -- continue; -- -- /* Some builtins have different user-facing types -- for certain arguments, encoded in d->mode. */ -- if (qualifiers & qualifier_map_mode) -- op_mode = d->mode; -- -- /* For pointers, we want a pointer to the basic type -- of the vector. */ -- if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode)) -- op_mode = GET_MODE_INNER (op_mode); -- -- eltype = arm_simd_builtin_type -- (op_mode, -- (qualifiers & qualifier_unsigned) != 0, -- (qualifiers & qualifier_poly) != 0); -- gcc_assert (eltype != NULL); -- -- /* Add qualifiers. */ -- if (qualifiers & qualifier_const) -- eltype = build_qualified_type (eltype, TYPE_QUAL_CONST); -- -- if (qualifiers & qualifier_pointer) -- eltype = build_pointer_type (eltype); -- -- /* If we have reached arg_num == 0, we are at a non-void -- return type. Otherwise, we are still processing -- arguments. */ -- if (arg_num == 0) -- return_type = eltype; -- else -- args = tree_cons (NULL_TREE, eltype, args); -- } -- -- ftype = build_function_type (return_type, args); -+ arm_init_neon_builtin (fcode, d); -+ } -+} - -- gcc_assert (ftype != NULL); -+/* Set up all the scalar floating point builtins. */ - -- if (print_type_signature_p) -- snprintf (namebuf, sizeof (namebuf), "__builtin_neon_%s_%s", -- d->name, type_signature); -- else -- snprintf (namebuf, sizeof (namebuf), "__builtin_neon_%s", -- d->name); -+static void -+arm_init_vfp_builtins (void) -+{ -+ unsigned int i, fcode = ARM_BUILTIN_VFP_PATTERN_START; - -- fndecl = add_builtin_function (namebuf, ftype, fcode, BUILT_IN_MD, -- NULL, NULL_TREE); -- arm_builtin_decls[fcode] = fndecl; -+ for (i = 0; i < ARRAY_SIZE (vfp_builtin_data); i++, fcode++) -+ { -+ neon_builtin_datum *d = &vfp_builtin_data[i]; -+ arm_init_neon_builtin (fcode, d); - } - } - -@@ -1768,14 +1812,14 @@ arm_init_builtins (void) - if (TARGET_HARD_FLOAT) - { - arm_init_neon_builtins (); -- -+ arm_init_vfp_builtins (); - arm_init_crypto_builtins (); - } - - if (TARGET_CRC32) - arm_init_crc32_builtins (); - -- if (TARGET_VFP && TARGET_HARD_FLOAT) -+ if (TARGET_HARD_FLOAT) - { - tree ftype_set_fpscr - = build_function_type_list (void_type_node, unsigned_type_node, NULL); -@@ -1789,6 +1833,17 @@ arm_init_builtins (void) - = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr, - ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE); - } -+ -+ if (use_cmse) -+ { -+ tree ftype_cmse_nonsecure_caller -+ = build_function_type_list (unsigned_type_node, NULL); -+ arm_builtin_decls[ARM_BUILTIN_CMSE_NONSECURE_CALLER] -+ = add_builtin_function ("__builtin_arm_cmse_nonsecure_caller", -+ ftype_cmse_nonsecure_caller, -+ ARM_BUILTIN_CMSE_NONSECURE_CALLER, BUILT_IN_MD, -+ NULL, NULL_TREE); -+ } - } - - /* Return the ARM builtin for CODE. */ -@@ -2211,40 +2266,16 @@ constant_arg: - return target; - } - --/* Expand a Neon builtin, i.e. those registered only if TARGET_NEON holds. -- Most of these are "special" because they don't have symbolic -- constants defined per-instruction or per instruction-variant. Instead, the -- required info is looked up in the table neon_builtin_data. */ -+/* Expand a neon builtin. This is also used for vfp builtins, which behave in -+ the same way. These builtins are "special" because they don't have symbolic -+ constants defined per-instruction or per instruction-variant. Instead, the -+ required info is looked up in the NEON_BUILTIN_DATA record that is passed -+ into the function. */ -+ - static rtx --arm_expand_neon_builtin (int fcode, tree exp, rtx target) -+arm_expand_neon_builtin_1 (int fcode, tree exp, rtx target, -+ neon_builtin_datum *d) - { -- /* Check in the context of the function making the call whether the -- builtin is supported. */ -- if (! TARGET_NEON) -- { -- fatal_error (input_location, -- "You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use these intrinsics."); -- return const0_rtx; -- } -- -- if (fcode == ARM_BUILTIN_NEON_LANE_CHECK) -- { -- /* Builtin is only to check bounds of the lane passed to some intrinsics -- that are implemented with gcc vector extensions in arm_neon.h. */ -- -- tree nlanes = CALL_EXPR_ARG (exp, 0); -- gcc_assert (TREE_CODE (nlanes) == INTEGER_CST); -- rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 1)); -- if (CONST_INT_P (lane_idx)) -- neon_lane_bounds (lane_idx, 0, TREE_INT_CST_LOW (nlanes), exp); -- else -- error ("%Klane index must be a constant immediate", exp); -- /* Don't generate any RTL. */ -- return const0_rtx; -- } -- -- neon_builtin_datum *d = -- &neon_builtin_data[fcode - ARM_BUILTIN_NEON_PATTERN_START]; - enum insn_code icode = d->code; - builtin_arg args[SIMD_MAX_BUILTIN_ARGS + 1]; - int num_args = insn_data[d->code].n_operands; -@@ -2260,8 +2291,8 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target) - /* We have four arrays of data, each indexed in a different fashion. - qualifiers - element 0 always describes the function return type. - operands - element 0 is either the operand for return value (if -- the function has a non-void return type) or the operand for the -- first argument. -+ the function has a non-void return type) or the operand for the -+ first argument. - expr_args - element 0 always holds the first argument. - args - element 0 is always used for the return type. */ - int qualifiers_k = k; -@@ -2283,7 +2314,7 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target) - bool op_const_int_p = - (CONST_INT_P (arg) - && (*insn_data[icode].operand[operands_k].predicate) -- (arg, insn_data[icode].operand[operands_k].mode)); -+ (arg, insn_data[icode].operand[operands_k].mode)); - args[k] = op_const_int_p ? NEON_ARG_CONSTANT : NEON_ARG_COPY_TO_REG; - } - else if (d->qualifiers[qualifiers_k] & qualifier_pointer) -@@ -2296,8 +2327,68 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target) - /* The interface to arm_expand_neon_args expects a 0 if - the function is void, and a 1 if it is not. */ - return arm_expand_neon_args -- (target, d->mode, fcode, icode, !is_void, exp, -- &args[1]); -+ (target, d->mode, fcode, icode, !is_void, exp, -+ &args[1]); -+} -+ -+/* Expand a Neon builtin, i.e. those registered only if TARGET_NEON holds. -+ Most of these are "special" because they don't have symbolic -+ constants defined per-instruction or per instruction-variant. Instead, the -+ required info is looked up in the table neon_builtin_data. */ -+ -+static rtx -+arm_expand_neon_builtin (int fcode, tree exp, rtx target) -+{ -+ if (fcode >= ARM_BUILTIN_NEON_BASE && ! TARGET_NEON) -+ { -+ fatal_error (input_location, -+ "You must enable NEON instructions" -+ " (e.g. -mfloat-abi=softfp -mfpu=neon)" -+ " to use these intrinsics."); -+ return const0_rtx; -+ } -+ -+ if (fcode == ARM_BUILTIN_NEON_LANE_CHECK) -+ { -+ /* Builtin is only to check bounds of the lane passed to some intrinsics -+ that are implemented with gcc vector extensions in arm_neon.h. */ -+ -+ tree nlanes = CALL_EXPR_ARG (exp, 0); -+ gcc_assert (TREE_CODE (nlanes) == INTEGER_CST); -+ rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 1)); -+ if (CONST_INT_P (lane_idx)) -+ neon_lane_bounds (lane_idx, 0, TREE_INT_CST_LOW (nlanes), exp); -+ else -+ error ("%Klane index must be a constant immediate", exp); -+ /* Don't generate any RTL. */ -+ return const0_rtx; -+ } -+ -+ neon_builtin_datum *d -+ = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_PATTERN_START]; -+ -+ return arm_expand_neon_builtin_1 (fcode, exp, target, d); -+} -+ -+/* Expand a VFP builtin. These builtins are treated like -+ neon builtins except that the data is looked up in table -+ VFP_BUILTIN_DATA. */ -+ -+static rtx -+arm_expand_vfp_builtin (int fcode, tree exp, rtx target) -+{ -+ if (fcode >= ARM_BUILTIN_VFP_BASE && ! TARGET_HARD_FLOAT) -+ { -+ fatal_error (input_location, -+ "You must enable VFP instructions" -+ " to use these intrinsics."); -+ return const0_rtx; -+ } -+ -+ neon_builtin_datum *d -+ = &vfp_builtin_data[fcode - ARM_BUILTIN_VFP_PATTERN_START]; -+ -+ return arm_expand_neon_builtin_1 (fcode, exp, target, d); - } - - /* Expand an expression EXP that calls a built-in function, -@@ -2337,13 +2428,18 @@ arm_expand_builtin (tree exp, - if (fcode >= ARM_BUILTIN_NEON_BASE) - return arm_expand_neon_builtin (fcode, exp, target); - -+ if (fcode >= ARM_BUILTIN_VFP_BASE) -+ return arm_expand_vfp_builtin (fcode, exp, target); -+ - /* Check in the context of the function making the call whether the - builtin is supported. */ - if (fcode >= ARM_BUILTIN_CRYPTO_BASE - && (!TARGET_CRYPTO || !TARGET_HARD_FLOAT)) - { - fatal_error (input_location, -- "You must enable crypto intrinsics (e.g. include -mfloat-abi=softfp -mfpu=crypto-neon...) to use these intrinsics."); -+ "You must enable crypto instructions" -+ " (e.g. include -mfloat-abi=softfp -mfpu=crypto-neon...)" -+ " to use these intrinsics."); - return const0_rtx; - } - -@@ -2368,6 +2464,12 @@ arm_expand_builtin (tree exp, - emit_insn (pat); - return target; - -+ case ARM_BUILTIN_CMSE_NONSECURE_CALLER: -+ target = gen_reg_rtx (SImode); -+ op0 = arm_return_addr (0, NULL_RTX); -+ emit_insn (gen_addsi3 (target, op0, const1_rtx)); -+ return target; -+ - case ARM_BUILTIN_TEXTRMSB: - case ARM_BUILTIN_TEXTRMUB: - case ARM_BUILTIN_TEXTRMSH: -@@ -2995,7 +3097,7 @@ arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) - tree new_fenv_var, reload_fenv, restore_fnenv; - tree update_call, atomic_feraiseexcept, hold_fnclex; - -- if (!TARGET_VFP || !TARGET_HARD_FLOAT) -+ if (!TARGET_HARD_FLOAT) - return; - - /* Generate the equivalent of : ---- a/src/gcc/config/arm/arm-c.c -+++ b/src/gcc/config/arm/arm-c.c -@@ -76,6 +76,14 @@ arm_cpu_builtins (struct cpp_reader* pfile) - - def_or_undef_macro (pfile, "__ARM_32BIT_STATE", TARGET_32BIT); - -+ if (arm_arch8 && !arm_arch_notm) -+ { -+ if (arm_arch_cmse && use_cmse) -+ builtin_define_with_int_value ("__ARM_FEATURE_CMSE", 3); -+ else -+ builtin_define ("__ARM_FEATURE_CMSE"); -+ } -+ - if (TARGET_ARM_FEATURE_LDREX) - builtin_define_with_int_value ("__ARM_FEATURE_LDREX", - TARGET_ARM_FEATURE_LDREX); -@@ -86,6 +94,9 @@ arm_cpu_builtins (struct cpp_reader* pfile) - ((TARGET_ARM_ARCH >= 5 && !TARGET_THUMB) - || TARGET_ARM_ARCH_ISA_THUMB >=2)); - -+ def_or_undef_macro (pfile, "__ARM_FEATURE_NUMERIC_MAXMIN", -+ TARGET_ARM_ARCH >= 8 && TARGET_NEON && TARGET_FPU_ARMV8); -+ - def_or_undef_macro (pfile, "__ARM_FEATURE_SIMD32", TARGET_INT_SIMD); - - builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM", -@@ -128,17 +139,24 @@ arm_cpu_builtins (struct cpp_reader* pfile) - if (TARGET_SOFT_FLOAT) - builtin_define ("__SOFTFP__"); - -- def_or_undef_macro (pfile, "__VFP_FP__", TARGET_VFP); -+ builtin_define ("__VFP_FP__"); - - if (TARGET_ARM_FP) - builtin_define_with_int_value ("__ARM_FP", TARGET_ARM_FP); - else - cpp_undef (pfile, "__ARM_FP"); - -- if (arm_fp16_format == ARM_FP16_FORMAT_IEEE) -- builtin_define ("__ARM_FP16_FORMAT_IEEE"); -- if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) -- builtin_define ("__ARM_FP16_FORMAT_ALTERNATIVE"); -+ def_or_undef_macro (pfile, "__ARM_FP16_FORMAT_IEEE", -+ arm_fp16_format == ARM_FP16_FORMAT_IEEE); -+ def_or_undef_macro (pfile, "__ARM_FP16_FORMAT_ALTERNATIVE", -+ arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE); -+ def_or_undef_macro (pfile, "__ARM_FP16_ARGS", -+ arm_fp16_format != ARM_FP16_FORMAT_NONE); -+ -+ def_or_undef_macro (pfile, "__ARM_FEATURE_FP16_SCALAR_ARITHMETIC", -+ TARGET_VFP_FP16INST); -+ def_or_undef_macro (pfile, "__ARM_FEATURE_FP16_VECTOR_ARITHMETIC", -+ TARGET_NEON_FP16INST); - - def_or_undef_macro (pfile, "__ARM_FEATURE_FMA", TARGET_FMA); - def_or_undef_macro (pfile, "__ARM_NEON__", TARGET_NEON); ---- a/src/gcc/config/arm/arm-cores.def -+++ b/src/gcc/config/arm/arm-cores.def -@@ -166,15 +166,21 @@ ARM_CORE("cortex-a15.cortex-a7", cortexa15cortexa7, cortexa7, 7A, ARM_FSET_MAKE_ - ARM_CORE("cortex-a17.cortex-a7", cortexa17cortexa7, cortexa7, 7A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV | FL_FOR_ARCH7A), cortex_a12) - - /* V8 Architecture Processors */ -+ARM_CORE("cortex-m23", cortexm23, cortexm23, 8M_BASE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH8M_BASE), v6m) - ARM_CORE("cortex-a32", cortexa32, cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a35) -+ARM_CORE("cortex-m33", cortexm33, cortexm33, 8M_MAIN, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_ARCH7EM | FL_FOR_ARCH8M_MAIN), v7m) - ARM_CORE("cortex-a35", cortexa35, cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a35) - ARM_CORE("cortex-a53", cortexa53, cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a53) - ARM_CORE("cortex-a57", cortexa57, cortexa57, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57) - ARM_CORE("cortex-a72", cortexa72, cortexa57, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57) -+ARM_CORE("cortex-a73", cortexa73, cortexa57, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a73) - ARM_CORE("exynos-m1", exynosm1, exynosm1, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), exynosm1) --ARM_CORE("qdf24xx", qdf24xx, cortexa57, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57) -+ARM_CORE("qdf24xx", qdf24xx, cortexa57, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), qdf24xx) - ARM_CORE("xgene1", xgene1, xgene1, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH8A), xgene1) - - /* V8 big.LITTLE implementations */ - ARM_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57) - ARM_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57) -+ARM_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a73) -+ARM_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, 8A, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a73) -+ ---- /dev/null -+++ b/src/gcc/config/arm/arm-flags.h -@@ -0,0 +1,212 @@ -+/* Flags used to identify the presence of processor capabilities. -+ -+ Copyright (C) 2016 Free Software Foundation, Inc. -+ Contributed by ARM Ltd. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published -+ by the Free Software Foundation; either version 3, or (at your -+ option) any later version. -+ -+ GCC is distributed in the hope that it will be useful, but WITHOUT -+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public -+ License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with GCC; see the file COPYING3. If not see -+ <http://www.gnu.org/licenses/>. */ -+ -+#ifndef GCC_ARM_FLAGS_H -+#define GCC_ARM_FLAGS_H -+ -+/* Flags used to identify the presence of processor capabilities. */ -+ -+/* Bit values used to identify processor capabilities. */ -+#define FL_NONE (0U) /* No flags. */ -+#define FL_ANY (0xffffffffU) /* All flags. */ -+#define FL_CO_PROC (1U << 0) /* Has external co-processor bus. */ -+#define FL_ARCH3M (1U << 1) /* Extended multiply. */ -+#define FL_MODE26 (1U << 2) /* 26-bit mode support. */ -+#define FL_MODE32 (1U << 3) /* 32-bit mode support. */ -+#define FL_ARCH4 (1U << 4) /* Architecture rel 4. */ -+#define FL_ARCH5 (1U << 5) /* Architecture rel 5. */ -+#define FL_THUMB (1U << 6) /* Thumb aware. */ -+#define FL_LDSCHED (1U << 7) /* Load scheduling necessary. */ -+#define FL_STRONG (1U << 8) /* StrongARM. */ -+#define FL_ARCH5E (1U << 9) /* DSP extensions to v5. */ -+#define FL_XSCALE (1U << 10) /* XScale. */ -+/* spare (1U << 11) */ -+#define FL_ARCH6 (1U << 12) /* Architecture rel 6. Adds -+ media instructions. */ -+#define FL_VFPV2 (1U << 13) /* Vector Floating Point V2. */ -+#define FL_WBUF (1U << 14) /* Schedule for write buffer ops. -+ Note: ARM6 & 7 derivatives only. */ -+#define FL_ARCH6K (1U << 15) /* Architecture rel 6 K extensions. */ -+#define FL_THUMB2 (1U << 16) /* Thumb-2. */ -+#define FL_NOTM (1U << 17) /* Instructions not present in the 'M' -+ profile. */ -+#define FL_THUMB_DIV (1U << 18) /* Hardware divide (Thumb mode). */ -+#define FL_VFPV3 (1U << 19) /* Vector Floating Point V3. */ -+#define FL_NEON (1U << 20) /* Neon instructions. */ -+#define FL_ARCH7EM (1U << 21) /* Instructions present in the ARMv7E-M -+ architecture. */ -+#define FL_ARCH7 (1U << 22) /* Architecture 7. */ -+#define FL_ARM_DIV (1U << 23) /* Hardware divide (ARM mode). */ -+#define FL_ARCH8 (1U << 24) /* Architecture 8. */ -+#define FL_CRC32 (1U << 25) /* ARMv8 CRC32 instructions. */ -+#define FL_SMALLMUL (1U << 26) /* Small multiply supported. */ -+#define FL_NO_VOLATILE_CE (1U << 27) /* No volatile memory in IT block. */ -+ -+#define FL_IWMMXT (1U << 29) /* XScale v2 or "Intel Wireless MMX -+ technology". */ -+#define FL_IWMMXT2 (1U << 30) /* "Intel Wireless MMX2 -+ technology". */ -+#define FL_ARCH6KZ (1U << 31) /* ARMv6KZ architecture. */ -+ -+#define FL2_ARCH8_1 (1U << 0) /* Architecture 8.1. */ -+#define FL2_ARCH8_2 (1U << 1) /* Architecture 8.2. */ -+#define FL2_FP16INST (1U << 2) /* FP16 Instructions for ARMv8.2 and -+ later. */ -+#define FL2_CMSE (1U << 3) /* ARMv8-M Security Extensions. */ -+ -+/* Flags that only effect tuning, not available instructions. */ -+#define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \ -+ | FL_CO_PROC) -+ -+#define FL_FOR_ARCH2 FL_NOTM -+#define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32) -+#define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M) -+#define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4) -+#define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB) -+#define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5) -+#define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB) -+#define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E) -+#define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB) -+#define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE -+#define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6) -+#define FL_FOR_ARCH6J FL_FOR_ARCH6 -+#define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K) -+#define FL_FOR_ARCH6Z FL_FOR_ARCH6 -+#define FL_FOR_ARCH6ZK FL_FOR_ARCH6K -+#define FL_FOR_ARCH6KZ (FL_FOR_ARCH6K | FL_ARCH6KZ) -+#define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2) -+#define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM) -+#define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7) -+#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K) -+#define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV) -+#define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV) -+#define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV) -+#define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM) -+#define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8) -+#define FL2_FOR_ARCH8_1A FL2_ARCH8_1 -+#define FL2_FOR_ARCH8_2A (FL2_FOR_ARCH8_1A | FL2_ARCH8_2) -+#define FL_FOR_ARCH8M_BASE (FL_FOR_ARCH6M | FL_ARCH8 | FL_THUMB_DIV) -+#define FL_FOR_ARCH8M_MAIN (FL_FOR_ARCH7M | FL_ARCH8) -+ -+/* There are too many feature bits to fit in a single word so the set of cpu and -+ fpu capabilities is a structure. A feature set is created and manipulated -+ with the ARM_FSET macros. */ -+ -+typedef struct -+{ -+ unsigned cpu[2]; -+} arm_feature_set; -+ -+ -+/* Initialize a feature set. */ -+ -+#define ARM_FSET_MAKE(CPU1,CPU2) { { (CPU1), (CPU2) } } -+ -+#define ARM_FSET_MAKE_CPU1(CPU1) ARM_FSET_MAKE ((CPU1), (FL_NONE)) -+#define ARM_FSET_MAKE_CPU2(CPU2) ARM_FSET_MAKE ((FL_NONE), (CPU2)) -+ -+/* Accessors. */ -+ -+#define ARM_FSET_CPU1(S) ((S).cpu[0]) -+#define ARM_FSET_CPU2(S) ((S).cpu[1]) -+ -+/* Useful combinations. */ -+ -+#define ARM_FSET_EMPTY ARM_FSET_MAKE (FL_NONE, FL_NONE) -+#define ARM_FSET_ANY ARM_FSET_MAKE (FL_ANY, FL_ANY) -+ -+/* Tests for a specific CPU feature. */ -+ -+#define ARM_FSET_HAS_CPU1(A, F) \ -+ (((A).cpu[0] & ((unsigned long)(F))) == ((unsigned long)(F))) -+#define ARM_FSET_HAS_CPU2(A, F) \ -+ (((A).cpu[1] & ((unsigned long)(F))) == ((unsigned long)(F))) -+#define ARM_FSET_HAS_CPU(A, F1, F2) \ -+ (ARM_FSET_HAS_CPU1 ((A), (F1)) && ARM_FSET_HAS_CPU2 ((A), (F2))) -+ -+/* Add a feature to a feature set. */ -+ -+#define ARM_FSET_ADD_CPU1(DST, F) \ -+ do { \ -+ (DST).cpu[0] |= (F); \ -+ } while (0) -+ -+#define ARM_FSET_ADD_CPU2(DST, F) \ -+ do { \ -+ (DST).cpu[1] |= (F); \ -+ } while (0) -+ -+/* Remove a feature from a feature set. */ -+ -+#define ARM_FSET_DEL_CPU1(DST, F) \ -+ do { \ -+ (DST).cpu[0] &= ~(F); \ -+ } while (0) -+ -+#define ARM_FSET_DEL_CPU2(DST, F) \ -+ do { \ -+ (DST).cpu[1] &= ~(F); \ -+ } while (0) -+ -+/* Union of feature sets. */ -+ -+#define ARM_FSET_UNION(DST,F1,F2) \ -+ do { \ -+ (DST).cpu[0] = (F1).cpu[0] | (F2).cpu[0]; \ -+ (DST).cpu[1] = (F1).cpu[1] | (F2).cpu[1]; \ -+ } while (0) -+ -+/* Intersection of feature sets. */ -+ -+#define ARM_FSET_INTER(DST,F1,F2) \ -+ do { \ -+ (DST).cpu[0] = (F1).cpu[0] & (F2).cpu[0]; \ -+ (DST).cpu[1] = (F1).cpu[1] & (F2).cpu[1]; \ -+ } while (0) -+ -+/* Exclusive disjunction. */ -+ -+#define ARM_FSET_XOR(DST,F1,F2) \ -+ do { \ -+ (DST).cpu[0] = (F1).cpu[0] ^ (F2).cpu[0]; \ -+ (DST).cpu[1] = (F1).cpu[1] ^ (F2).cpu[1]; \ -+ } while (0) -+ -+/* Difference of feature sets: F1 excluding the elements of F2. */ -+ -+#define ARM_FSET_EXCLUDE(DST,F1,F2) \ -+ do { \ -+ (DST).cpu[0] = (F1).cpu[0] & ~(F2).cpu[0]; \ -+ (DST).cpu[1] = (F1).cpu[1] & ~(F2).cpu[1]; \ -+ } while (0) -+ -+/* Test for an empty feature set. */ -+ -+#define ARM_FSET_IS_EMPTY(A) \ -+ (!((A).cpu[0]) && !((A).cpu[1])) -+ -+/* Tests whether the cpu features of A are a subset of B. */ -+ -+#define ARM_FSET_CPU_SUBSET(A,B) \ -+ ((((A).cpu[0] & (B).cpu[0]) == (A).cpu[0]) \ -+ && (((A).cpu[1] & (B).cpu[1]) == (A).cpu[1])) -+ -+#endif /* GCC_ARM_FLAGS_H */ ---- a/src/gcc/config/arm/arm-fpus.def -+++ b/src/gcc/config/arm/arm-fpus.def -@@ -19,30 +19,31 @@ - - /* Before using #include to read this file, define a macro: - -- ARM_FPU(NAME, MODEL, REV, VFP_REGS, FEATURES) -+ ARM_FPU(NAME, REV, VFP_REGS, FEATURES) - - The arguments are the fields of struct arm_fpu_desc. - - genopt.sh assumes no whitespace up to the first "," in each entry. */ - --ARM_FPU("vfp", ARM_FP_MODEL_VFP, 2, VFP_REG_D16, FPU_FL_NONE) --ARM_FPU("vfpv3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, FPU_FL_NONE) --ARM_FPU("vfpv3-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, FPU_FL_FP16) --ARM_FPU("vfpv3-d16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, FPU_FL_NONE) --ARM_FPU("vfpv3-d16-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D16, FPU_FL_FP16) --ARM_FPU("vfpv3xd", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, FPU_FL_NONE) --ARM_FPU("vfpv3xd-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_SINGLE, FPU_FL_FP16) --ARM_FPU("neon", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, FPU_FL_NEON) --ARM_FPU("neon-fp16", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, FPU_FL_NEON | FPU_FL_FP16) --ARM_FPU("vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, FPU_FL_FP16) --ARM_FPU("vfpv4-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_D16, FPU_FL_FP16) --ARM_FPU("fpv4-sp-d16", ARM_FP_MODEL_VFP, 4, VFP_REG_SINGLE, FPU_FL_FP16) --ARM_FPU("fpv5-sp-d16", ARM_FP_MODEL_VFP, 5, VFP_REG_SINGLE, FPU_FL_FP16) --ARM_FPU("fpv5-d16", ARM_FP_MODEL_VFP, 5, VFP_REG_D16, FPU_FL_FP16) --ARM_FPU("neon-vfpv4", ARM_FP_MODEL_VFP, 4, VFP_REG_D32, FPU_FL_NEON | FPU_FL_FP16) --ARM_FPU("fp-armv8", ARM_FP_MODEL_VFP, 8, VFP_REG_D32, FPU_FL_FP16) --ARM_FPU("neon-fp-armv8",ARM_FP_MODEL_VFP, 8, VFP_REG_D32, FPU_FL_NEON | FPU_FL_FP16) --ARM_FPU("crypto-neon-fp-armv8", -- ARM_FP_MODEL_VFP, 8, VFP_REG_D32, FPU_FL_NEON | FPU_FL_FP16 | FPU_FL_CRYPTO) -+ARM_FPU("vfp", 2, VFP_REG_D16, FPU_FL_NONE) -+ARM_FPU("vfpv2", 2, VFP_REG_D16, FPU_FL_NONE) -+ARM_FPU("vfpv3", 3, VFP_REG_D32, FPU_FL_NONE) -+ARM_FPU("vfpv3-fp16", 3, VFP_REG_D32, FPU_FL_FP16) -+ARM_FPU("vfpv3-d16", 3, VFP_REG_D16, FPU_FL_NONE) -+ARM_FPU("vfpv3-d16-fp16", 3, VFP_REG_D16, FPU_FL_FP16) -+ARM_FPU("vfpv3xd", 3, VFP_REG_SINGLE, FPU_FL_NONE) -+ARM_FPU("vfpv3xd-fp16", 3, VFP_REG_SINGLE, FPU_FL_FP16) -+ARM_FPU("neon", 3, VFP_REG_D32, FPU_FL_NEON) -+ARM_FPU("neon-vfpv3", 3, VFP_REG_D32, FPU_FL_NEON) -+ARM_FPU("neon-fp16", 3, VFP_REG_D32, FPU_FL_NEON | FPU_FL_FP16) -+ARM_FPU("vfpv4", 4, VFP_REG_D32, FPU_FL_FP16) -+ARM_FPU("vfpv4-d16", 4, VFP_REG_D16, FPU_FL_FP16) -+ARM_FPU("fpv4-sp-d16", 4, VFP_REG_SINGLE, FPU_FL_FP16) -+ARM_FPU("fpv5-sp-d16", 5, VFP_REG_SINGLE, FPU_FL_FP16) -+ARM_FPU("fpv5-d16", 5, VFP_REG_D16, FPU_FL_FP16) -+ARM_FPU("neon-vfpv4", 4, VFP_REG_D32, FPU_FL_NEON | FPU_FL_FP16) -+ARM_FPU("fp-armv8", 8, VFP_REG_D32, FPU_FL_FP16) -+ARM_FPU("neon-fp-armv8", 8, VFP_REG_D32, FPU_FL_NEON | FPU_FL_FP16) -+ARM_FPU("crypto-neon-fp-armv8", 8, VFP_REG_D32, FPU_FL_NEON | FPU_FL_FP16 | FPU_FL_CRYPTO) - /* Compatibility aliases. */ --ARM_FPU("vfp3", ARM_FP_MODEL_VFP, 3, VFP_REG_D32, FPU_FL_NONE) -+ARM_FPU("vfp3", 3, VFP_REG_D32, FPU_FL_NONE) ---- a/src/gcc/config/arm/arm-modes.def -+++ b/src/gcc/config/arm/arm-modes.def -@@ -59,6 +59,7 @@ CC_MODE (CC_DGEU); - CC_MODE (CC_DGTU); - CC_MODE (CC_C); - CC_MODE (CC_N); -+CC_MODE (CC_V); - - /* Vector modes. */ - VECTOR_MODES (INT, 4); /* V4QI V2HI */ ---- a/src/gcc/config/arm/arm-opts.h -+++ b/src/gcc/config/arm/arm-opts.h -@@ -25,6 +25,8 @@ - #ifndef ARM_OPTS_H - #define ARM_OPTS_H - -+#include "arm-flags.h" -+ - /* The various ARM cores. */ - enum processor_type - { ---- a/src/gcc/config/arm/arm-protos.h -+++ b/src/gcc/config/arm/arm-protos.h -@@ -22,6 +22,8 @@ - #ifndef GCC_ARM_PROTOS_H - #define GCC_ARM_PROTOS_H - -+#include "arm-flags.h" -+ - extern enum unwind_info_type arm_except_unwind_info (struct gcc_options *); - extern int use_return_insn (int, rtx); - extern bool use_simple_return_p (void); -@@ -31,6 +33,7 @@ extern int arm_volatile_func (void); - extern void arm_expand_prologue (void); - extern void arm_expand_epilogue (bool); - extern void arm_declare_function_name (FILE *, const char *, tree); -+extern void arm_asm_declare_function_name (FILE *, const char *, tree); - extern void thumb2_expand_return (bool); - extern const char *arm_strip_name_encoding (const char *); - extern void arm_asm_output_labelref (FILE *, const char *); -@@ -50,8 +53,12 @@ extern tree arm_builtin_decl (unsigned code, bool initialize_p - ATTRIBUTE_UNUSED); - extern void arm_init_builtins (void); - extern void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update); -- -+extern rtx arm_simd_vect_par_cnst_half (machine_mode mode, bool high); -+extern bool arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode, -+ bool high); - #ifdef RTX_CODE -+extern void arm_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode, -+ rtx label_ref); - extern bool arm_vector_mode_supported_p (machine_mode); - extern bool arm_small_register_classes_for_mode_p (machine_mode); - extern int arm_hard_regno_mode_ok (unsigned int, machine_mode); -@@ -130,6 +137,7 @@ extern int arm_const_double_inline_cost (rtx); - extern bool arm_const_double_by_parts (rtx); - extern bool arm_const_double_by_immediates (rtx); - extern void arm_emit_call_insn (rtx, rtx, bool); -+bool detect_cmse_nonsecure_call (tree); - extern const char *output_call (rtx *); - void arm_emit_movpair (rtx, rtx); - extern const char *output_mov_long_double_arm_from_arm (rtx *); -@@ -161,6 +169,7 @@ extern const char *arm_output_iwmmxt_shift_immediate (const char *, rtx *, bool) - extern const char *arm_output_iwmmxt_tinsr (rtx *); - extern unsigned int arm_sync_loop_insns (rtx , rtx *); - extern int arm_attr_length_push_multi(rtx, rtx); -+extern int arm_attr_length_pop_multi(rtx *, bool, bool); - extern void arm_expand_compare_and_swap (rtx op[]); - extern void arm_split_compare_and_swap (rtx op[]); - extern void arm_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx); -@@ -192,7 +201,6 @@ extern const char *thumb_call_via_reg (rtx); - extern void thumb_expand_movmemqi (rtx *); - extern rtx arm_return_addr (int, rtx); - extern void thumb_reload_out_hi (rtx *); --extern void thumb_reload_in_hi (rtx *); - extern void thumb_set_return_address (rtx, rtx); - extern const char *thumb1_output_casesi (rtx *); - extern const char *thumb2_output_casesi (rtx *); -@@ -256,7 +264,6 @@ struct cpu_cost_table; - - struct tune_params - { -- bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool); - const struct cpu_cost_table *insn_extra_cost; - bool (*sched_adjust_cost) (rtx_insn *, rtx, rtx_insn *, int *); - int (*branch_cost) (bool, bool); -@@ -319,6 +326,7 @@ extern int vfp3_const_double_for_bits (rtx); - - extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx, - rtx); -+extern bool arm_fusion_enabled_p (tune_params::fuse_ops); - extern bool arm_valid_symbolic_address_p (rtx); - extern bool arm_validize_comparison (rtx *, rtx *, rtx *); - #endif /* RTX_CODE */ -@@ -344,184 +352,6 @@ extern void arm_cpu_cpp_builtins (struct cpp_reader *); - - extern bool arm_is_constant_pool_ref (rtx); - --/* Flags used to identify the presence of processor capabilities. */ -- --/* Bit values used to identify processor capabilities. */ --#define FL_NONE (0) /* No flags. */ --#define FL_ANY (0xffffffff) /* All flags. */ --#define FL_CO_PROC (1 << 0) /* Has external co-processor bus */ --#define FL_ARCH3M (1 << 1) /* Extended multiply */ --#define FL_MODE26 (1 << 2) /* 26-bit mode support */ --#define FL_MODE32 (1 << 3) /* 32-bit mode support */ --#define FL_ARCH4 (1 << 4) /* Architecture rel 4 */ --#define FL_ARCH5 (1 << 5) /* Architecture rel 5 */ --#define FL_THUMB (1 << 6) /* Thumb aware */ --#define FL_LDSCHED (1 << 7) /* Load scheduling necessary */ --#define FL_STRONG (1 << 8) /* StrongARM */ --#define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */ --#define FL_XSCALE (1 << 10) /* XScale */ --/* spare (1 << 11) */ --#define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds -- media instructions. */ --#define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */ --#define FL_WBUF (1 << 14) /* Schedule for write buffer ops. -- Note: ARM6 & 7 derivatives only. */ --#define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */ --#define FL_THUMB2 (1 << 16) /* Thumb-2. */ --#define FL_NOTM (1 << 17) /* Instructions not present in the 'M' -- profile. */ --#define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */ --#define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */ --#define FL_NEON (1 << 20) /* Neon instructions. */ --#define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M -- architecture. */ --#define FL_ARCH7 (1 << 22) /* Architecture 7. */ --#define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */ --#define FL_ARCH8 (1 << 24) /* Architecture 8. */ --#define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions. */ -- --#define FL_SMALLMUL (1 << 26) /* Small multiply supported. */ --#define FL_NO_VOLATILE_CE (1 << 27) /* No volatile memory in IT block. */ -- --#define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */ --#define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology". */ --#define FL_ARCH6KZ (1 << 31) /* ARMv6KZ architecture. */ -- --#define FL2_ARCH8_1 (1 << 0) /* Architecture 8.1. */ -- --/* Flags that only effect tuning, not available instructions. */ --#define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \ -- | FL_CO_PROC) -- --#define FL_FOR_ARCH2 FL_NOTM --#define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32) --#define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M) --#define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4) --#define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB) --#define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5) --#define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB) --#define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E) --#define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB) --#define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE --#define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6) --#define FL_FOR_ARCH6J FL_FOR_ARCH6 --#define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K) --#define FL_FOR_ARCH6Z FL_FOR_ARCH6 --#define FL_FOR_ARCH6KZ (FL_FOR_ARCH6K | FL_ARCH6KZ) --#define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2) --#define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM) --#define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7) --#define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K) --#define FL_FOR_ARCH7VE (FL_FOR_ARCH7A | FL_THUMB_DIV | FL_ARM_DIV) --#define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV) --#define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV) --#define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM) --#define FL_FOR_ARCH8A (FL_FOR_ARCH7VE | FL_ARCH8) --#define FL2_FOR_ARCH8_1A FL2_ARCH8_1 -- --/* There are too many feature bits to fit in a single word so the set of cpu and -- fpu capabilities is a structure. A feature set is created and manipulated -- with the ARM_FSET macros. */ -- --typedef struct --{ -- unsigned long cpu[2]; --} arm_feature_set; -- -- --/* Initialize a feature set. */ -- --#define ARM_FSET_MAKE(CPU1,CPU2) { { (CPU1), (CPU2) } } -- --#define ARM_FSET_MAKE_CPU1(CPU1) ARM_FSET_MAKE ((CPU1), (FL_NONE)) --#define ARM_FSET_MAKE_CPU2(CPU2) ARM_FSET_MAKE ((FL_NONE), (CPU2)) -- --/* Accessors. */ -- --#define ARM_FSET_CPU1(S) ((S).cpu[0]) --#define ARM_FSET_CPU2(S) ((S).cpu[1]) -- --/* Useful combinations. */ -- --#define ARM_FSET_EMPTY ARM_FSET_MAKE (FL_NONE, FL_NONE) --#define ARM_FSET_ANY ARM_FSET_MAKE (FL_ANY, FL_ANY) -- --/* Tests for a specific CPU feature. */ -- --#define ARM_FSET_HAS_CPU1(A, F) \ -- (((A).cpu[0] & ((unsigned long)(F))) == ((unsigned long)(F))) --#define ARM_FSET_HAS_CPU2(A, F) \ -- (((A).cpu[1] & ((unsigned long)(F))) == ((unsigned long)(F))) --#define ARM_FSET_HAS_CPU(A, F1, F2) \ -- (ARM_FSET_HAS_CPU1 ((A), (F1)) && ARM_FSET_HAS_CPU2 ((A), (F2))) -- --/* Add a feature to a feature set. */ -- --#define ARM_FSET_ADD_CPU1(DST, F) \ -- do { \ -- (DST).cpu[0] |= (F); \ -- } while (0) -- --#define ARM_FSET_ADD_CPU2(DST, F) \ -- do { \ -- (DST).cpu[1] |= (F); \ -- } while (0) -- --/* Remove a feature from a feature set. */ -- --#define ARM_FSET_DEL_CPU1(DST, F) \ -- do { \ -- (DST).cpu[0] &= ~(F); \ -- } while (0) -- --#define ARM_FSET_DEL_CPU2(DST, F) \ -- do { \ -- (DST).cpu[1] &= ~(F); \ -- } while (0) -- --/* Union of feature sets. */ -- --#define ARM_FSET_UNION(DST,F1,F2) \ -- do { \ -- (DST).cpu[0] = (F1).cpu[0] | (F2).cpu[0]; \ -- (DST).cpu[1] = (F1).cpu[1] | (F2).cpu[1]; \ -- } while (0) -- --/* Intersection of feature sets. */ -- --#define ARM_FSET_INTER(DST,F1,F2) \ -- do { \ -- (DST).cpu[0] = (F1).cpu[0] & (F2).cpu[0]; \ -- (DST).cpu[1] = (F1).cpu[1] & (F2).cpu[1]; \ -- } while (0) -- --/* Exclusive disjunction. */ -- --#define ARM_FSET_XOR(DST,F1,F2) \ -- do { \ -- (DST).cpu[0] = (F1).cpu[0] ^ (F2).cpu[0]; \ -- (DST).cpu[1] = (F1).cpu[1] ^ (F2).cpu[1]; \ -- } while (0) -- --/* Difference of feature sets: F1 excluding the elements of F2. */ -- --#define ARM_FSET_EXCLUDE(DST,F1,F2) \ -- do { \ -- (DST).cpu[0] = (F1).cpu[0] & ~(F2).cpu[0]; \ -- (DST).cpu[1] = (F1).cpu[1] & ~(F2).cpu[1]; \ -- } while (0) -- --/* Test for an empty feature set. */ -- --#define ARM_FSET_IS_EMPTY(A) \ -- (!((A).cpu[0]) && !((A).cpu[1])) -- --/* Tests whether the cpu features of A are a subset of B. */ -- --#define ARM_FSET_CPU_SUBSET(A,B) \ -- ((((A).cpu[0] & (B).cpu[0]) == (A).cpu[0]) \ -- && (((A).cpu[1] & (B).cpu[1]) == (A).cpu[1])) -- - /* The bits in this mask specify which - instructions we are allowed to generate. */ - extern arm_feature_set insn_flags; -@@ -601,6 +431,9 @@ extern int arm_tune_cortex_a9; - interworking clean. */ - extern int arm_cpp_interwork; - -+/* Nonzero if chip supports Thumb 1. */ -+extern int arm_arch_thumb1; -+ - /* Nonzero if chip supports Thumb 2. */ - extern int arm_arch_thumb2; - ---- a/src/gcc/config/arm/arm-tables.opt -+++ b/src/gcc/config/arm/arm-tables.opt -@@ -307,9 +307,15 @@ EnumValue - Enum(processor_type) String(cortex-a17.cortex-a7) Value(cortexa17cortexa7) - - EnumValue -+Enum(processor_type) String(cortex-m23) Value(cortexm23) -+ -+EnumValue - Enum(processor_type) String(cortex-a32) Value(cortexa32) - - EnumValue -+Enum(processor_type) String(cortex-m33) Value(cortexm33) -+ -+EnumValue - Enum(processor_type) String(cortex-a35) Value(cortexa35) - - EnumValue -@@ -322,6 +328,9 @@ EnumValue - Enum(processor_type) String(cortex-a72) Value(cortexa72) - - EnumValue -+Enum(processor_type) String(cortex-a73) Value(cortexa73) -+ -+EnumValue - Enum(processor_type) String(exynos-m1) Value(exynosm1) - - EnumValue -@@ -336,6 +345,12 @@ Enum(processor_type) String(cortex-a57.cortex-a53) Value(cortexa57cortexa53) - EnumValue - Enum(processor_type) String(cortex-a72.cortex-a53) Value(cortexa72cortexa53) - -+EnumValue -+Enum(processor_type) String(cortex-a73.cortex-a35) Value(cortexa73cortexa35) -+ -+EnumValue -+Enum(processor_type) String(cortex-a73.cortex-a53) Value(cortexa73cortexa53) -+ - Enum - Name(arm_arch) Type(int) - Known ARM architectures (for use with the -march= option): -@@ -428,10 +443,25 @@ EnumValue - Enum(arm_arch) String(armv8.1-a+crc) Value(28) - - EnumValue --Enum(arm_arch) String(iwmmxt) Value(29) -+Enum(arm_arch) String(armv8.2-a) Value(29) -+ -+EnumValue -+Enum(arm_arch) String(armv8.2-a+fp16) Value(30) - - EnumValue --Enum(arm_arch) String(iwmmxt2) Value(30) -+Enum(arm_arch) String(armv8-m.base) Value(31) -+ -+EnumValue -+Enum(arm_arch) String(armv8-m.main) Value(32) -+ -+EnumValue -+Enum(arm_arch) String(armv8-m.main+dsp) Value(33) -+ -+EnumValue -+Enum(arm_arch) String(iwmmxt) Value(34) -+ -+EnumValue -+Enum(arm_arch) String(iwmmxt2) Value(35) - - Enum - Name(arm_fpu) Type(int) -@@ -441,56 +471,62 @@ EnumValue - Enum(arm_fpu) String(vfp) Value(0) - - EnumValue --Enum(arm_fpu) String(vfpv3) Value(1) -+Enum(arm_fpu) String(vfpv2) Value(1) -+ -+EnumValue -+Enum(arm_fpu) String(vfpv3) Value(2) -+ -+EnumValue -+Enum(arm_fpu) String(vfpv3-fp16) Value(3) - - EnumValue --Enum(arm_fpu) String(vfpv3-fp16) Value(2) -+Enum(arm_fpu) String(vfpv3-d16) Value(4) - - EnumValue --Enum(arm_fpu) String(vfpv3-d16) Value(3) -+Enum(arm_fpu) String(vfpv3-d16-fp16) Value(5) - - EnumValue --Enum(arm_fpu) String(vfpv3-d16-fp16) Value(4) -+Enum(arm_fpu) String(vfpv3xd) Value(6) - - EnumValue --Enum(arm_fpu) String(vfpv3xd) Value(5) -+Enum(arm_fpu) String(vfpv3xd-fp16) Value(7) - - EnumValue --Enum(arm_fpu) String(vfpv3xd-fp16) Value(6) -+Enum(arm_fpu) String(neon) Value(8) - - EnumValue --Enum(arm_fpu) String(neon) Value(7) -+Enum(arm_fpu) String(neon-vfpv3) Value(9) - - EnumValue --Enum(arm_fpu) String(neon-fp16) Value(8) -+Enum(arm_fpu) String(neon-fp16) Value(10) - - EnumValue --Enum(arm_fpu) String(vfpv4) Value(9) -+Enum(arm_fpu) String(vfpv4) Value(11) - - EnumValue --Enum(arm_fpu) String(vfpv4-d16) Value(10) -+Enum(arm_fpu) String(vfpv4-d16) Value(12) - - EnumValue --Enum(arm_fpu) String(fpv4-sp-d16) Value(11) -+Enum(arm_fpu) String(fpv4-sp-d16) Value(13) - - EnumValue --Enum(arm_fpu) String(fpv5-sp-d16) Value(12) -+Enum(arm_fpu) String(fpv5-sp-d16) Value(14) - - EnumValue --Enum(arm_fpu) String(fpv5-d16) Value(13) -+Enum(arm_fpu) String(fpv5-d16) Value(15) - - EnumValue --Enum(arm_fpu) String(neon-vfpv4) Value(14) -+Enum(arm_fpu) String(neon-vfpv4) Value(16) - - EnumValue --Enum(arm_fpu) String(fp-armv8) Value(15) -+Enum(arm_fpu) String(fp-armv8) Value(17) - - EnumValue --Enum(arm_fpu) String(neon-fp-armv8) Value(16) -+Enum(arm_fpu) String(neon-fp-armv8) Value(18) - - EnumValue --Enum(arm_fpu) String(crypto-neon-fp-armv8) Value(17) -+Enum(arm_fpu) String(crypto-neon-fp-armv8) Value(19) - - EnumValue --Enum(arm_fpu) String(vfp3) Value(18) -+Enum(arm_fpu) String(vfp3) Value(20) - ---- a/src/gcc/config/arm/arm-tune.md -+++ b/src/gcc/config/arm/arm-tune.md -@@ -32,8 +32,10 @@ - cortexr4f,cortexr5,cortexr7, - cortexr8,cortexm7,cortexm4, - cortexm3,marvell_pj4,cortexa15cortexa7, -- cortexa17cortexa7,cortexa32,cortexa35, -- cortexa53,cortexa57,cortexa72, -+ cortexa17cortexa7,cortexm23,cortexa32, -+ cortexm33,cortexa35,cortexa53, -+ cortexa57,cortexa72,cortexa73, - exynosm1,qdf24xx,xgene1, -- cortexa57cortexa53,cortexa72cortexa53" -+ cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35, -+ cortexa73cortexa53" - (const (symbol_ref "((enum attr_tune) arm_tune)"))) ---- a/src/gcc/config/arm/arm.c -+++ b/src/gcc/config/arm/arm.c -@@ -27,6 +27,7 @@ - #include "target.h" - #include "rtl.h" - #include "tree.h" -+#include "memmodel.h" - #include "cfghooks.h" - #include "df.h" - #include "tm_p.h" -@@ -61,6 +62,7 @@ - #include "builtins.h" - #include "tm-constrs.h" - #include "rtl-iter.h" -+#include "gimplify.h" - - /* This file should be included last. */ - #include "target-def.h" -@@ -104,7 +106,6 @@ static void arm_print_operand_address (FILE *, machine_mode, rtx); - static bool arm_print_operand_punct_valid_p (unsigned char code); - static const char *fp_const_from_val (REAL_VALUE_TYPE *); - static arm_cc get_arm_condition_code (rtx); --static HOST_WIDE_INT int_log2 (HOST_WIDE_INT); - static const char *output_multi_immediate (rtx *, const char *, const char *, - int, HOST_WIDE_INT); - static const char *shift_op (rtx, HOST_WIDE_INT *); -@@ -135,6 +136,8 @@ static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *); - #if TARGET_DLLIMPORT_DECL_ATTRIBUTES - static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *); - #endif -+static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *); -+static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *); - static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT); - static void arm_output_function_prologue (FILE *, HOST_WIDE_INT); - static int arm_comp_type_attributes (const_tree, const_tree); -@@ -164,12 +167,6 @@ static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, - static bool arm_have_conditional_execution (void); - static bool arm_cannot_force_const_mem (machine_mode, rtx); - static bool arm_legitimate_constant_p (machine_mode, rtx); --static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool); --static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *); --static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); --static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); --static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); --static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); - static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool); - static int arm_address_cost (rtx, machine_mode, addr_space_t, bool); - static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t); -@@ -249,8 +246,6 @@ static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; - static bool arm_output_addr_const_extra (FILE *, rtx); - static bool arm_allocate_stack_slots_for_args (void); - static bool arm_warn_func_return (tree); --static const char *arm_invalid_parameter_type (const_tree t); --static const char *arm_invalid_return_type (const_tree t); - static tree arm_promoted_type (const_tree t); - static tree arm_convert_to_type (tree type, tree expr); - static bool arm_scalar_mode_supported_p (machine_mode); -@@ -300,6 +295,9 @@ static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1, - static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void); - - static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*); -+static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT, -+ const_tree); -+ - - /* Table of machine attributes. */ - static const struct attribute_spec arm_attribute_table[] = -@@ -343,6 +341,11 @@ static const struct attribute_spec arm_attribute_table[] = - { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute, - false }, - #endif -+ /* ARMv8-M Security Extensions support. */ -+ { "cmse_nonsecure_entry", 0, 0, true, false, false, -+ arm_handle_cmse_nonsecure_entry, false }, -+ { "cmse_nonsecure_call", 0, 0, true, false, false, -+ arm_handle_cmse_nonsecure_call, true }, - { NULL, 0, 0, false, false, false, NULL, false } - }; - -@@ -463,7 +466,7 @@ static const struct attribute_spec arm_attribute_table[] = - #undef TARGET_ASM_OUTPUT_MI_THUNK - #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk - #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK --#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall -+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk - - #undef TARGET_RTX_COSTS - #define TARGET_RTX_COSTS arm_rtx_costs -@@ -654,12 +657,6 @@ static const struct attribute_spec arm_attribute_table[] = - #undef TARGET_PREFERRED_RELOAD_CLASS - #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class - --#undef TARGET_INVALID_PARAMETER_TYPE --#define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type -- --#undef TARGET_INVALID_RETURN_TYPE --#define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type -- - #undef TARGET_PROMOTED_TYPE - #define TARGET_PROMOTED_TYPE arm_promoted_type - -@@ -820,6 +817,13 @@ int arm_arch8 = 0; - /* Nonzero if this chip supports the ARMv8.1 extensions. */ - int arm_arch8_1 = 0; - -+/* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */ -+int arm_arch8_2 = 0; -+ -+/* Nonzero if this chip supports the FP16 instructions extension of ARM -+ Architecture 8.2. */ -+int arm_fp16_inst = 0; -+ - /* Nonzero if this chip can benefit from load scheduling. */ - int arm_ld_sched = 0; - -@@ -852,6 +856,9 @@ int arm_tune_cortex_a9 = 0; - interworking clean. */ - int arm_cpp_interwork = 0; - -+/* Nonzero if chip supports Thumb 1. */ -+int arm_arch_thumb1; -+ - /* Nonzero if chip supports Thumb 2. */ - int arm_arch_thumb2; - -@@ -892,6 +899,9 @@ int arm_condexec_masklen = 0; - /* Nonzero if chip supports the ARMv8 CRC instructions. */ - int arm_arch_crc = 0; - -+/* Nonzero if chip supports the ARMv8-M security extensions. */ -+int arm_arch_cmse = 0; -+ - /* Nonzero if the core has a very small, high-latency, multiply unit. */ - int arm_m_profile_small_mul = 0; - -@@ -1684,8 +1694,7 @@ const struct cpu_cost_table v7m_extra_costs = - - const struct tune_params arm_slowmul_tune = - { -- arm_slowmul_rtx_costs, -- NULL, /* Insn extra costs. */ -+ &generic_extra_costs, /* Insn extra costs. */ - NULL, /* Sched adj cost. */ - arm_default_branch_cost, - &arm_default_vec_cost, -@@ -1707,8 +1716,7 @@ const struct tune_params arm_slowmul_tune = - - const struct tune_params arm_fastmul_tune = - { -- arm_fastmul_rtx_costs, -- NULL, /* Insn extra costs. */ -+ &generic_extra_costs, /* Insn extra costs. */ - NULL, /* Sched adj cost. */ - arm_default_branch_cost, - &arm_default_vec_cost, -@@ -1733,8 +1741,7 @@ const struct tune_params arm_fastmul_tune = - - const struct tune_params arm_strongarm_tune = - { -- arm_fastmul_rtx_costs, -- NULL, /* Insn extra costs. */ -+ &generic_extra_costs, /* Insn extra costs. */ - NULL, /* Sched adj cost. */ - arm_default_branch_cost, - &arm_default_vec_cost, -@@ -1756,8 +1763,7 @@ const struct tune_params arm_strongarm_tune = - - const struct tune_params arm_xscale_tune = - { -- arm_xscale_rtx_costs, -- NULL, /* Insn extra costs. */ -+ &generic_extra_costs, /* Insn extra costs. */ - xscale_sched_adjust_cost, - arm_default_branch_cost, - &arm_default_vec_cost, -@@ -1779,8 +1785,7 @@ const struct tune_params arm_xscale_tune = - - const struct tune_params arm_9e_tune = - { -- arm_9e_rtx_costs, -- NULL, /* Insn extra costs. */ -+ &generic_extra_costs, /* Insn extra costs. */ - NULL, /* Sched adj cost. */ - arm_default_branch_cost, - &arm_default_vec_cost, -@@ -1802,8 +1807,7 @@ const struct tune_params arm_9e_tune = - - const struct tune_params arm_marvell_pj4_tune = - { -- arm_9e_rtx_costs, -- NULL, /* Insn extra costs. */ -+ &generic_extra_costs, /* Insn extra costs. */ - NULL, /* Sched adj cost. */ - arm_default_branch_cost, - &arm_default_vec_cost, -@@ -1825,8 +1829,7 @@ const struct tune_params arm_marvell_pj4_tune = - - const struct tune_params arm_v6t2_tune = - { -- arm_9e_rtx_costs, -- NULL, /* Insn extra costs. */ -+ &generic_extra_costs, /* Insn extra costs. */ - NULL, /* Sched adj cost. */ - arm_default_branch_cost, - &arm_default_vec_cost, -@@ -1850,7 +1853,6 @@ const struct tune_params arm_v6t2_tune = - /* Generic Cortex tuning. Use more specific tunings if appropriate. */ - const struct tune_params arm_cortex_tune = - { -- arm_9e_rtx_costs, - &generic_extra_costs, - NULL, /* Sched adj cost. */ - arm_default_branch_cost, -@@ -1873,7 +1875,6 @@ const struct tune_params arm_cortex_tune = - - const struct tune_params arm_cortex_a8_tune = - { -- arm_9e_rtx_costs, - &cortexa8_extra_costs, - NULL, /* Sched adj cost. */ - arm_default_branch_cost, -@@ -1896,7 +1897,6 @@ const struct tune_params arm_cortex_a8_tune = - - const struct tune_params arm_cortex_a7_tune = - { -- arm_9e_rtx_costs, - &cortexa7_extra_costs, - NULL, /* Sched adj cost. */ - arm_default_branch_cost, -@@ -1919,7 +1919,6 @@ const struct tune_params arm_cortex_a7_tune = - - const struct tune_params arm_cortex_a15_tune = - { -- arm_9e_rtx_costs, - &cortexa15_extra_costs, - NULL, /* Sched adj cost. */ - arm_default_branch_cost, -@@ -1942,7 +1941,6 @@ const struct tune_params arm_cortex_a15_tune = - - const struct tune_params arm_cortex_a35_tune = - { -- arm_9e_rtx_costs, - &cortexa53_extra_costs, - NULL, /* Sched adj cost. */ - arm_default_branch_cost, -@@ -1965,7 +1963,6 @@ const struct tune_params arm_cortex_a35_tune = - - const struct tune_params arm_cortex_a53_tune = - { -- arm_9e_rtx_costs, - &cortexa53_extra_costs, - NULL, /* Sched adj cost. */ - arm_default_branch_cost, -@@ -1988,7 +1985,6 @@ const struct tune_params arm_cortex_a53_tune = - - const struct tune_params arm_cortex_a57_tune = - { -- arm_9e_rtx_costs, - &cortexa57_extra_costs, - NULL, /* Sched adj cost. */ - arm_default_branch_cost, -@@ -2011,7 +2007,6 @@ const struct tune_params arm_cortex_a57_tune = - - const struct tune_params arm_exynosm1_tune = - { -- arm_9e_rtx_costs, - &exynosm1_extra_costs, - NULL, /* Sched adj cost. */ - arm_default_branch_cost, -@@ -2034,7 +2029,6 @@ const struct tune_params arm_exynosm1_tune = - - const struct tune_params arm_xgene1_tune = - { -- arm_9e_rtx_costs, - &xgene1_extra_costs, - NULL, /* Sched adj cost. */ - arm_default_branch_cost, -@@ -2055,12 +2049,33 @@ const struct tune_params arm_xgene1_tune = - tune_params::SCHED_AUTOPREF_OFF - }; - -+const struct tune_params arm_qdf24xx_tune = -+{ -+ &qdf24xx_extra_costs, -+ NULL, /* Scheduler cost adjustment. */ -+ arm_default_branch_cost, -+ &arm_default_vec_cost, /* Vectorizer costs. */ -+ 1, /* Constant limit. */ -+ 2, /* Max cond insns. */ -+ 8, /* Memset max inline. */ -+ 4, /* Issue rate. */ -+ ARM_PREFETCH_BENEFICIAL (0, -1, 64), -+ tune_params::PREF_CONST_POOL_FALSE, -+ tune_params::PREF_LDRD_TRUE, -+ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_ALL, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_TRUE, -+ FUSE_OPS (tune_params::FUSE_MOVW_MOVT), -+ tune_params::SCHED_AUTOPREF_FULL -+}; -+ - /* Branches can be dual-issued on Cortex-A5, so conditional execution is - less appealing. Set max_insns_skipped to a low value. */ - - const struct tune_params arm_cortex_a5_tune = - { -- arm_9e_rtx_costs, - &cortexa5_extra_costs, - NULL, /* Sched adj cost. */ - arm_cortex_a5_branch_cost, -@@ -2083,7 +2098,6 @@ const struct tune_params arm_cortex_a5_tune = - - const struct tune_params arm_cortex_a9_tune = - { -- arm_9e_rtx_costs, - &cortexa9_extra_costs, - cortex_a9_sched_adjust_cost, - arm_default_branch_cost, -@@ -2106,7 +2120,6 @@ const struct tune_params arm_cortex_a9_tune = - - const struct tune_params arm_cortex_a12_tune = - { -- arm_9e_rtx_costs, - &cortexa12_extra_costs, - NULL, /* Sched adj cost. */ - arm_default_branch_cost, -@@ -2127,6 +2140,28 @@ const struct tune_params arm_cortex_a12_tune = - tune_params::SCHED_AUTOPREF_OFF - }; - -+const struct tune_params arm_cortex_a73_tune = -+{ -+ &cortexa57_extra_costs, -+ NULL, /* Sched adj cost. */ -+ arm_default_branch_cost, -+ &arm_default_vec_cost, /* Vectorizer costs. */ -+ 1, /* Constant limit. */ -+ 2, /* Max cond insns. */ -+ 8, /* Memset max inline. */ -+ 2, /* Issue rate. */ -+ ARM_PREFETCH_NOT_BENEFICIAL, -+ tune_params::PREF_CONST_POOL_FALSE, -+ tune_params::PREF_LDRD_TRUE, -+ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ -+ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ -+ tune_params::DISPARAGE_FLAGS_ALL, -+ tune_params::PREF_NEON_64_FALSE, -+ tune_params::PREF_NEON_STRINGOPS_TRUE, -+ FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT), -+ tune_params::SCHED_AUTOPREF_FULL -+}; -+ - /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single - cycle to execute each. An LDR from the constant pool also takes two cycles - to execute, but mildly increases pipelining opportunity (consecutive -@@ -2136,7 +2171,6 @@ const struct tune_params arm_cortex_a12_tune = - - const struct tune_params arm_v7m_tune = - { -- arm_9e_rtx_costs, - &v7m_extra_costs, - NULL, /* Sched adj cost. */ - arm_cortex_m_branch_cost, -@@ -2161,7 +2195,6 @@ const struct tune_params arm_v7m_tune = - - const struct tune_params arm_cortex_m7_tune = - { -- arm_9e_rtx_costs, - &v7m_extra_costs, - NULL, /* Sched adj cost. */ - arm_cortex_m7_branch_cost, -@@ -2183,11 +2216,11 @@ const struct tune_params arm_cortex_m7_tune = - }; - - /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than -- arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */ -+ arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and -+ cortex-m23. */ - const struct tune_params arm_v6m_tune = - { -- arm_9e_rtx_costs, -- NULL, /* Insn extra costs. */ -+ &generic_extra_costs, /* Insn extra costs. */ - NULL, /* Sched adj cost. */ - arm_default_branch_cost, - &arm_default_vec_cost, /* Vectorizer costs. */ -@@ -2209,8 +2242,7 @@ const struct tune_params arm_v6m_tune = - - const struct tune_params arm_fa726te_tune = - { -- arm_9e_rtx_costs, -- NULL, /* Insn extra costs. */ -+ &generic_extra_costs, /* Insn extra costs. */ - fa726te_sched_adjust_cost, - arm_default_branch_cost, - &arm_default_vec_cost, -@@ -2264,16 +2296,18 @@ static const struct processors *arm_selected_arch; - static const struct processors *arm_selected_cpu; - static const struct processors *arm_selected_tune; - --/* The name of the preprocessor macro to define for this architecture. */ -+/* The name of the preprocessor macro to define for this architecture. PROFILE -+ is replaced by the architecture name (eg. 8A) in arm_option_override () and -+ is thus chosen to be big enough to hold the longest architecture name. */ - --char arm_arch_name[] = "__ARM_ARCH_0UNK__"; -+char arm_arch_name[] = "__ARM_ARCH_PROFILE__"; - - /* Available values for -mfpu=. */ - - const struct arm_fpu_desc all_fpus[] = - { --#define ARM_FPU(NAME, MODEL, REV, VFP_REGS, FEATURES) \ -- { NAME, MODEL, REV, VFP_REGS, FEATURES }, -+#define ARM_FPU(NAME, REV, VFP_REGS, FEATURES) \ -+ { NAME, REV, VFP_REGS, FEATURES }, - #include "arm-fpus.def" - #undef ARM_FPU - }; -@@ -2752,8 +2786,8 @@ arm_option_check_internal (struct gcc_options *opts) - const struct arm_fpu_desc *fpu_desc = &all_fpus[opts->x_arm_fpu_index]; - - /* iWMMXt and NEON are incompatible. */ -- if (TARGET_IWMMXT && TARGET_VFP -- && ARM_FPU_FSET_HAS (fpu_desc->features, FPU_FL_NEON)) -+ if (TARGET_IWMMXT -+ && ARM_FPU_FSET_HAS (fpu_desc->features, FPU_FL_NEON)) - error ("iWMMXt and NEON are incompatible"); - - /* Make sure that the processor choice does not conflict with any of the -@@ -2907,7 +2941,8 @@ arm_option_override_internal (struct gcc_options *opts, - if (! opts_set->x_arm_restrict_it) - opts->x_arm_restrict_it = arm_arch8; - -- if (!TARGET_THUMB2_P (opts->x_target_flags)) -+ /* ARM execution state and M profile don't have [restrict] IT. */ -+ if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm) - opts->x_arm_restrict_it = 0; - - /* Enable -munaligned-access by default for -@@ -2918,7 +2953,8 @@ arm_option_override_internal (struct gcc_options *opts, - - Disable -munaligned-access by default for - - all pre-ARMv6 architecture-based processors -- - ARMv6-M architecture-based processors. */ -+ - ARMv6-M architecture-based processors -+ - ARMv8-M Baseline processors. */ - - if (! opts_set->x_unaligned_access) - { -@@ -3152,9 +3188,6 @@ arm_option_override (void) - if (TARGET_APCS_REENT) - warning (0, "APCS reentrant code not supported. Ignored"); - -- if (TARGET_APCS_FLOAT) -- warning (0, "passing floating point arguments in fp regs not yet supported"); -- - /* Initialize boolean versions of the flags, for use in the arm.md file. */ - arm_arch3m = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH3M); - arm_arch4 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH4); -@@ -3170,6 +3203,8 @@ arm_option_override (void) - arm_arch7em = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH7EM); - arm_arch8 = ARM_FSET_HAS_CPU1 (insn_flags, FL_ARCH8); - arm_arch8_1 = ARM_FSET_HAS_CPU2 (insn_flags, FL2_ARCH8_1); -+ arm_arch8_2 = ARM_FSET_HAS_CPU2 (insn_flags, FL2_ARCH8_2); -+ arm_arch_thumb1 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB); - arm_arch_thumb2 = ARM_FSET_HAS_CPU1 (insn_flags, FL_THUMB2); - arm_arch_xscale = ARM_FSET_HAS_CPU1 (insn_flags, FL_XSCALE); - -@@ -3184,7 +3219,15 @@ arm_option_override (void) - arm_arch_no_volatile_ce = ARM_FSET_HAS_CPU1 (insn_flags, FL_NO_VOLATILE_CE); - arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0; - arm_arch_crc = ARM_FSET_HAS_CPU1 (insn_flags, FL_CRC32); -+ arm_arch_cmse = ARM_FSET_HAS_CPU2 (insn_flags, FL2_CMSE); - arm_m_profile_small_mul = ARM_FSET_HAS_CPU1 (insn_flags, FL_SMALLMUL); -+ arm_fp16_inst = ARM_FSET_HAS_CPU2 (insn_flags, FL2_FP16INST); -+ if (arm_fp16_inst) -+ { -+ if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) -+ error ("selected fp16 options are incompatible."); -+ arm_fp16_format = ARM_FP16_FORMAT_IEEE; -+ } - - /* V5 code we generate is completely interworking capable, so we turn off - TARGET_INTERWORK here to avoid many tests later on. */ -@@ -3222,10 +3265,8 @@ arm_option_override (void) - /* If soft-float is specified then don't use FPU. */ - if (TARGET_SOFT_FLOAT) - arm_fpu_attr = FPU_NONE; -- else if (TARGET_VFP) -- arm_fpu_attr = FPU_VFP; - else -- gcc_unreachable(); -+ arm_fpu_attr = FPU_VFP; - - if (TARGET_AAPCS_BASED) - { -@@ -3245,15 +3286,14 @@ arm_option_override (void) - if (arm_abi == ARM_ABI_IWMMXT) - arm_pcs_default = ARM_PCS_AAPCS_IWMMXT; - else if (arm_float_abi == ARM_FLOAT_ABI_HARD -- && TARGET_HARD_FLOAT -- && TARGET_VFP) -+ && TARGET_HARD_FLOAT) - arm_pcs_default = ARM_PCS_AAPCS_VFP; - else - arm_pcs_default = ARM_PCS_AAPCS; - } - else - { -- if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP) -+ if (arm_float_abi == ARM_FLOAT_ABI_HARD) - sorry ("-mfloat-abi=hard and VFP"); - - if (arm_abi == ARM_ABI_APCS) -@@ -3298,6 +3338,20 @@ arm_option_override (void) - } - } - -+ if (TARGET_VXWORKS_RTP) -+ { -+ if (!global_options_set.x_arm_pic_data_is_text_relative) -+ arm_pic_data_is_text_relative = 0; -+ } -+ else if (flag_pic -+ && !arm_pic_data_is_text_relative -+ && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE)) -+ /* When text & data segments don't have a fixed displacement, the -+ intended use is with a single, read only, pic base register. -+ Unless the user explicitly requested not to do that, set -+ it. */ -+ target_flags |= MASK_SINGLE_PIC_BASE; -+ - /* If stack checking is disabled, we can use r10 as the PIC register, - which keeps r9 available. The EABI specifies r9 as the PIC register. */ - if (flag_pic && TARGET_SINGLE_PIC_BASE) -@@ -3329,10 +3383,6 @@ arm_option_override (void) - arm_pic_register = pic_register; - } - -- if (TARGET_VXWORKS_RTP -- && !global_options_set.x_arm_pic_data_is_text_relative) -- arm_pic_data_is_text_relative = 0; -- - /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */ - if (fix_cm3_ldrd == 2) - { -@@ -3436,6 +3486,9 @@ arm_option_override (void) - if (target_slow_flash_data) - arm_disable_literal_pool = true; - -+ if (use_cmse && !arm_arch_cmse) -+ error ("target CPU does not support ARMv8-M Security Extensions"); -+ - /* Disable scheduling fusion by default if it's not armv7 processor - or doesn't prefer ldrd/strd. */ - if (flag_schedule_fusion == 2 -@@ -3568,6 +3621,9 @@ arm_compute_func_type (void) - else - type |= arm_isr_value (TREE_VALUE (a)); - -+ if (lookup_attribute ("cmse_nonsecure_entry", attr)) -+ type |= ARM_FT_CMSE_ENTRY; -+ - return type; - } - -@@ -3794,6 +3850,11 @@ use_return_insn (int iscond, rtx sibling) - return 0; - } - -+ /* ARMv8-M nonsecure entry function need to use bxns to return and thus need -+ several instructions if anything needs to be popped. */ -+ if (saved_int_regs && IS_CMSE_ENTRY (func_type)) -+ return 0; -+ - /* If there are saved registers but the LR isn't saved, then we need - two instructions for the return. */ - if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM))) -@@ -3801,7 +3862,7 @@ use_return_insn (int iscond, rtx sibling) - - /* Can't be done if any of the VFP regs are pushed, - since this also requires an insn. */ -- if (TARGET_HARD_FLOAT && TARGET_VFP) -+ if (TARGET_HARD_FLOAT) - for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++) - if (df_regs_ever_live_p (regno) && !call_used_regs[regno]) - return 0; -@@ -3899,7 +3960,7 @@ const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code) - { - case SET: - /* See if we can use movw. */ -- if (arm_arch_thumb2 && (i & 0xffff0000) == 0) -+ if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0) - return 1; - else - /* Otherwise, try mvn. */ -@@ -4118,7 +4179,7 @@ optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val, - yield a shorter sequence, we may as well use zero. */ - insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start); - if (best_start != 0 -- && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val)) -+ && ((HOST_WIDE_INT_1U << best_start) < val)) - { - insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0); - if (insns2 <= insns1) -@@ -4949,7 +5010,7 @@ arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1, - if (mode == VOIDmode) - mode = GET_MODE (*op1); - -- maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1; -+ maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1; - - /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode - we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either -@@ -5255,7 +5316,6 @@ arm_function_value_regno_p (const unsigned int regno) - if (regno == ARG_REGISTER (1) - || (TARGET_32BIT - && TARGET_AAPCS_BASED -- && TARGET_VFP - && TARGET_HARD_FLOAT - && regno == FIRST_VFP_REGNUM) - || (TARGET_IWMMXT_ABI -@@ -5274,7 +5334,7 @@ arm_apply_result_size (void) - - if (TARGET_32BIT) - { -- if (TARGET_HARD_FLOAT_ABI && TARGET_VFP) -+ if (TARGET_HARD_FLOAT_ABI) - size += 32; - if (TARGET_IWMMXT_ABI) - size += 8; -@@ -5549,7 +5609,7 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep) - { - case REAL_TYPE: - mode = TYPE_MODE (type); -- if (mode != DFmode && mode != SFmode) -+ if (mode != DFmode && mode != SFmode && mode != HFmode) - return -1; - - if (*modep == VOIDmode) -@@ -5722,7 +5782,7 @@ use_vfp_abi (enum arm_pcs pcs_variant, bool is_double) - if (pcs_variant != ARM_PCS_AAPCS_LOCAL) - return false; - -- return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT && -+ return (TARGET_32BIT && TARGET_HARD_FLOAT && - (TARGET_VFP_DOUBLE || !is_double)); - } - -@@ -5797,11 +5857,16 @@ aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode, - &pcum->aapcs_vfp_rcount); - } - -+/* Implement the allocate field in aapcs_cp_arg_layout. See the comment there -+ for the behaviour of this function. */ -+ - static bool - aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode, - const_tree type ATTRIBUTE_UNUSED) - { -- int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode); -+ int rmode_size -+ = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode)); -+ int shift = rmode_size / GET_MODE_SIZE (SFmode); - unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1; - int regno; - -@@ -5850,6 +5915,9 @@ aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode, - return false; - } - -+/* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the -+ comment there for the behaviour of this function. */ -+ - static rtx - aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED, - machine_mode mode, -@@ -5940,13 +6008,13 @@ static struct - required for a return from FUNCTION_ARG. */ - bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree); - -- /* Return true if a result of mode MODE (or type TYPE if MODE is -- BLKmode) is can be returned in this co-processor's registers. */ -+ /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can -+ be returned in this co-processor's registers. */ - bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree); - -- /* Allocate and return an RTX element to hold the return type of a -- call, this routine must not fail and will only be called if -- is_return_candidate returned true with the same parameters. */ -+ /* Allocate and return an RTX element to hold the return type of a call. This -+ routine must not fail and will only be called if is_return_candidate -+ returned true with the same parameters. */ - rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree); - - /* Finish processing this argument and prepare to start processing -@@ -6561,6 +6629,185 @@ arm_handle_notshared_attribute (tree *node, - } - #endif - -+/* This function returns true if a function with declaration FNDECL and type -+ FNTYPE uses the stack to pass arguments or return variables and false -+ otherwise. This is used for functions with the attributes -+ 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue -+ diagnostic messages if the stack is used. NAME is the name of the attribute -+ used. */ -+ -+static bool -+cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype) -+{ -+ function_args_iterator args_iter; -+ CUMULATIVE_ARGS args_so_far_v; -+ cumulative_args_t args_so_far; -+ bool first_param = true; -+ tree arg_type, prev_arg_type = NULL_TREE, ret_type; -+ -+ /* Error out if any argument is passed on the stack. */ -+ arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl); -+ args_so_far = pack_cumulative_args (&args_so_far_v); -+ FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter) -+ { -+ rtx arg_rtx; -+ machine_mode arg_mode = TYPE_MODE (arg_type); -+ -+ prev_arg_type = arg_type; -+ if (VOID_TYPE_P (arg_type)) -+ continue; -+ -+ if (!first_param) -+ arm_function_arg_advance (args_so_far, arg_mode, arg_type, true); -+ arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true); -+ if (!arg_rtx -+ || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true)) -+ { -+ error ("%qE attribute not available to functions with arguments " -+ "passed on the stack", name); -+ return true; -+ } -+ first_param = false; -+ } -+ -+ /* Error out for variadic functions since we cannot control how many -+ arguments will be passed and thus stack could be used. stdarg_p () is not -+ used for the checking to avoid browsing arguments twice. */ -+ if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type)) -+ { -+ error ("%qE attribute not available to functions with variable number " -+ "of arguments", name); -+ return true; -+ } -+ -+ /* Error out if return value is passed on the stack. */ -+ ret_type = TREE_TYPE (fntype); -+ if (arm_return_in_memory (ret_type, fntype)) -+ { -+ error ("%qE attribute not available to functions that return value on " -+ "the stack", name); -+ return true; -+ } -+ return false; -+} -+ -+/* Called upon detection of the use of the cmse_nonsecure_entry attribute, this -+ function will check whether the attribute is allowed here and will add the -+ attribute to the function declaration tree or otherwise issue a warning. */ -+ -+static tree -+arm_handle_cmse_nonsecure_entry (tree *node, tree name, -+ tree /* args */, -+ int /* flags */, -+ bool *no_add_attrs) -+{ -+ tree fndecl; -+ -+ if (!use_cmse) -+ { -+ *no_add_attrs = true; -+ warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.", -+ name); -+ return NULL_TREE; -+ } -+ -+ /* Ignore attribute for function types. */ -+ if (TREE_CODE (*node) != FUNCTION_DECL) -+ { -+ warning (OPT_Wattributes, "%qE attribute only applies to functions", -+ name); -+ *no_add_attrs = true; -+ return NULL_TREE; -+ } -+ -+ fndecl = *node; -+ -+ /* Warn for static linkage functions. */ -+ if (!TREE_PUBLIC (fndecl)) -+ { -+ warning (OPT_Wattributes, "%qE attribute has no effect on functions " -+ "with static linkage", name); -+ *no_add_attrs = true; -+ return NULL_TREE; -+ } -+ -+ *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name, -+ TREE_TYPE (fndecl)); -+ return NULL_TREE; -+} -+ -+ -+/* Called upon detection of the use of the cmse_nonsecure_call attribute, this -+ function will check whether the attribute is allowed here and will add the -+ attribute to the function type tree or otherwise issue a diagnostic. The -+ reason we check this at declaration time is to only allow the use of the -+ attribute with declarations of function pointers and not function -+ declarations. This function checks NODE is of the expected type and issues -+ diagnostics otherwise using NAME. If it is not of the expected type -+ *NO_ADD_ATTRS will be set to true. */ -+ -+static tree -+arm_handle_cmse_nonsecure_call (tree *node, tree name, -+ tree /* args */, -+ int /* flags */, -+ bool *no_add_attrs) -+{ -+ tree decl = NULL_TREE, fntype = NULL_TREE; -+ tree type; -+ -+ if (!use_cmse) -+ { -+ *no_add_attrs = true; -+ warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.", -+ name); -+ return NULL_TREE; -+ } -+ -+ if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL) -+ { -+ decl = *node; -+ fntype = TREE_TYPE (decl); -+ } -+ -+ while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE) -+ fntype = TREE_TYPE (fntype); -+ -+ if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE) -+ { -+ warning (OPT_Wattributes, "%qE attribute only applies to base type of a " -+ "function pointer", name); -+ *no_add_attrs = true; -+ return NULL_TREE; -+ } -+ -+ *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype); -+ -+ if (*no_add_attrs) -+ return NULL_TREE; -+ -+ /* Prevent trees being shared among function types with and without -+ cmse_nonsecure_call attribute. */ -+ type = TREE_TYPE (decl); -+ -+ type = build_distinct_type_copy (type); -+ TREE_TYPE (decl) = type; -+ fntype = type; -+ -+ while (TREE_CODE (fntype) != FUNCTION_TYPE) -+ { -+ type = fntype; -+ fntype = TREE_TYPE (fntype); -+ fntype = build_distinct_type_copy (fntype); -+ TREE_TYPE (type) = fntype; -+ } -+ -+ /* Construct a type attribute and add it to the function type. */ -+ tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE, -+ TYPE_ATTRIBUTES (fntype)); -+ TYPE_ATTRIBUTES (fntype) = attrs; -+ return NULL_TREE; -+} -+ - /* Return 0 if the attributes for two types are incompatible, 1 if they - are compatible, and 2 if they are nearly compatible (which causes a - warning to be generated). */ -@@ -6601,6 +6848,14 @@ arm_comp_type_attributes (const_tree type1, const_tree type2) - if (l1 != l2) - return 0; - -+ l1 = lookup_attribute ("cmse_nonsecure_call", -+ TYPE_ATTRIBUTES (type1)) != NULL; -+ l2 = lookup_attribute ("cmse_nonsecure_call", -+ TYPE_ATTRIBUTES (type2)) != NULL; -+ -+ if (l1 != l2) -+ return 0; -+ - return 1; - } - -@@ -6711,7 +6966,7 @@ arm_function_ok_for_sibcall (tree decl, tree exp) - may be used both as target of the call and base register for restoring - the VFP registers */ - if (TARGET_APCS_FRAME && TARGET_ARM -- && TARGET_HARD_FLOAT && TARGET_VFP -+ && TARGET_HARD_FLOAT - && decl && arm_is_long_call_p (decl)) - return false; - -@@ -6727,6 +6982,20 @@ arm_function_ok_for_sibcall (tree decl, tree exp) - if (IS_INTERRUPT (func_type)) - return false; - -+ /* ARMv8-M non-secure entry functions need to return with bxns which is only -+ generated for entry functions themselves. */ -+ if (IS_CMSE_ENTRY (arm_current_func_type ())) -+ return false; -+ -+ /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls, -+ this would complicate matters for later code generation. */ -+ if (TREE_CODE (exp) == CALL_EXPR) -+ { -+ tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp))); -+ if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype))) -+ return false; -+ } -+ - if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) - { - /* Check that the return value locations are the same. For -@@ -7187,8 +7456,7 @@ arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer, - return 1; - - use_ldrd = (TARGET_LDRD -- && (mode == DImode -- || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP)))); -+ && (mode == DImode || mode == DFmode)); - - if (code == POST_INC || code == PRE_DEC - || ((code == PRE_INC || code == POST_DEC) -@@ -7273,8 +7541,7 @@ thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p) - return 1; - - use_ldrd = (TARGET_LDRD -- && (mode == DImode -- || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP)))); -+ && (mode == DImode || mode == DFmode)); - - if (code == POST_INC || code == PRE_DEC - || ((code == PRE_INC || code == POST_DEC) -@@ -7367,7 +7634,6 @@ arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer, - - /* Standard coprocessor addressing modes. */ - if (TARGET_HARD_FLOAT -- && TARGET_VFP - && (mode == SFmode || mode == DFmode)) - return (code == CONST_INT && INTVAL (index) < 1024 - && INTVAL (index) > -1024 -@@ -7487,7 +7753,6 @@ thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p) - /* ??? Combine arm and thumb2 coprocessor addressing modes. */ - /* Standard coprocessor addressing modes. */ - if (TARGET_HARD_FLOAT -- && TARGET_VFP - && (mode == SFmode || mode == DFmode)) - return (code == CONST_INT && INTVAL (index) < 1024 - /* Thumb-2 allows only > -256 index range for it's core register -@@ -8033,8 +8298,7 @@ arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode) - - /* VFP addressing modes actually allow greater offsets, but for - now we just stick with the lowest common denominator. */ -- if (mode == DImode -- || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode)) -+ if (mode == DImode || mode == DFmode) - { - low_n = n & 0x0f; - n &= ~0x0f; -@@ -8226,6 +8490,12 @@ arm_legitimate_constant_p_1 (machine_mode, rtx x) - static bool - thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x) - { -+ /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high -+ RTX. These RTX must therefore be allowed for Thumb-1 so that when run -+ for ARMv8-M Baseline or later the result is valid. */ -+ if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH) -+ x = XEXP (x, 0); -+ - return (CONST_INT_P (x) - || CONST_DOUBLE_P (x) - || CONSTANT_ADDRESS_P (x) -@@ -8312,7 +8582,9 @@ thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) - case CONST_INT: - if (outer == SET) - { -- if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256) -+ if (UINTVAL (x) < 256 -+ /* 16-bit constant. */ -+ || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))) - return 0; - if (thumb_shiftable_const (INTVAL (x))) - return COSTS_N_INSNS (2); -@@ -8329,8 +8601,8 @@ thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) - int i; - /* This duplicates the tests in the andsi3 expander. */ - for (i = 9; i <= 31; i++) -- if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x) -- || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x)) -+ if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x) -+ || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x)) - return COSTS_N_INSNS (2); - } - else if (outer == ASHIFT || outer == ASHIFTRT -@@ -8393,1006 +8665,162 @@ thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) - } - } - --static inline bool --arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) -+/* Estimates the size cost of thumb1 instructions. -+ For now most of the code is copied from thumb1_rtx_costs. We need more -+ fine grain tuning when we have more related test cases. */ -+static inline int -+thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) - { - machine_mode mode = GET_MODE (x); -- enum rtx_code subcode; -- rtx operand; -- enum rtx_code code = GET_CODE (x); -- *total = 0; -+ int words, cost; - - switch (code) - { -- case MEM: -- /* Memory costs quite a lot for the first word, but subsequent words -- load at the equivalent of a single insn each. */ -- *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode)); -- return true; -+ case ASHIFT: -+ case ASHIFTRT: -+ case LSHIFTRT: -+ case ROTATERT: -+ return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2); - -- case DIV: -- case MOD: -- case UDIV: -- case UMOD: -- if (TARGET_HARD_FLOAT && mode == SFmode) -- *total = COSTS_N_INSNS (2); -- else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE) -- *total = COSTS_N_INSNS (4); -- else -- *total = COSTS_N_INSNS (20); -- return false; -+ case PLUS: -+ case MINUS: -+ /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1 -+ defined by RTL expansion, especially for the expansion of -+ multiplication. */ -+ if ((GET_CODE (XEXP (x, 0)) == MULT -+ && power_of_two_operand (XEXP (XEXP (x,0),1), SImode)) -+ || (GET_CODE (XEXP (x, 1)) == MULT -+ && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))) -+ return COSTS_N_INSNS (2); -+ /* On purpose fall through for normal RTX. */ -+ case COMPARE: -+ case NEG: -+ case NOT: -+ return COSTS_N_INSNS (1); - -- case ROTATE: -- if (REG_P (XEXP (x, 1))) -- *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */ -- else if (!CONST_INT_P (XEXP (x, 1))) -- *total = rtx_cost (XEXP (x, 1), mode, code, 1, speed); -+ case MULT: -+ if (CONST_INT_P (XEXP (x, 1))) -+ { -+ /* Thumb1 mul instruction can't operate on const. We must Load it -+ into a register first. */ -+ int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET); -+ /* For the targets which have a very small and high-latency multiply -+ unit, we prefer to synthesize the mult with up to 5 instructions, -+ giving a good balance between size and performance. */ -+ if (arm_arch6m && arm_m_profile_small_mul) -+ return COSTS_N_INSNS (5); -+ else -+ return COSTS_N_INSNS (1) + const_size; -+ } -+ return COSTS_N_INSNS (1); - -- /* Fall through */ -- case ROTATERT: -- if (mode != SImode) -- { -- *total += COSTS_N_INSNS (4); -- return true; -- } -+ case SET: -+ /* A SET doesn't have a mode, so let's look at the SET_DEST to get -+ the mode. */ -+ words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x)))); -+ cost = COSTS_N_INSNS (words); -+ if (satisfies_constraint_J (SET_SRC (x)) -+ || satisfies_constraint_K (SET_SRC (x)) -+ /* Too big an immediate for a 2-byte mov, using MOVT. */ -+ || (CONST_INT_P (SET_SRC (x)) -+ && UINTVAL (SET_SRC (x)) >= 256 -+ && TARGET_HAVE_MOVT -+ && satisfies_constraint_j (SET_SRC (x))) -+ /* thumb1_movdi_insn. */ -+ || ((words > 1) && MEM_P (SET_SRC (x)))) -+ cost += COSTS_N_INSNS (1); -+ return cost; - -- /* Fall through */ -- case ASHIFT: case LSHIFTRT: case ASHIFTRT: -- *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); -- if (mode == DImode) -- { -- *total += COSTS_N_INSNS (3); -- return true; -- } -+ case CONST_INT: -+ if (outer == SET) -+ { -+ if (UINTVAL (x) < 256) -+ return COSTS_N_INSNS (1); -+ /* movw is 4byte long. */ -+ if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)) -+ return COSTS_N_INSNS (2); -+ /* See split "TARGET_THUMB1 && satisfies_constraint_J". */ -+ if (INTVAL (x) >= -255 && INTVAL (x) <= -1) -+ return COSTS_N_INSNS (2); -+ /* See split "TARGET_THUMB1 && satisfies_constraint_K". */ -+ if (thumb_shiftable_const (INTVAL (x))) -+ return COSTS_N_INSNS (2); -+ return COSTS_N_INSNS (3); -+ } -+ else if ((outer == PLUS || outer == COMPARE) -+ && INTVAL (x) < 256 && INTVAL (x) > -256) -+ return 0; -+ else if ((outer == IOR || outer == XOR || outer == AND) -+ && INTVAL (x) < 256 && INTVAL (x) >= -256) -+ return COSTS_N_INSNS (1); -+ else if (outer == AND) -+ { -+ int i; -+ /* This duplicates the tests in the andsi3 expander. */ -+ for (i = 9; i <= 31; i++) -+ if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x) -+ || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x)) -+ return COSTS_N_INSNS (2); -+ } -+ else if (outer == ASHIFT || outer == ASHIFTRT -+ || outer == LSHIFTRT) -+ return 0; -+ return COSTS_N_INSNS (2); - -- *total += COSTS_N_INSNS (1); -- /* Increase the cost of complex shifts because they aren't any faster, -- and reduce dual issue opportunities. */ -- if (arm_tune_cortex_a9 -- && outer != SET && !CONST_INT_P (XEXP (x, 1))) -- ++*total; -+ case CONST: -+ case CONST_DOUBLE: -+ case LABEL_REF: -+ case SYMBOL_REF: -+ return COSTS_N_INSNS (3); - -- return true; -+ case UDIV: -+ case UMOD: -+ case DIV: -+ case MOD: -+ return 100; - -- case MINUS: -- if (mode == DImode) -- { -- *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); -- if (CONST_INT_P (XEXP (x, 0)) -- && const_ok_for_arm (INTVAL (XEXP (x, 0)))) -- { -- *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); -- return true; -- } -- -- if (CONST_INT_P (XEXP (x, 1)) -- && const_ok_for_arm (INTVAL (XEXP (x, 1)))) -- { -- *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); -- return true; -- } -- -- return false; -- } -- -- if (GET_MODE_CLASS (mode) == MODE_FLOAT) -- { -- if (TARGET_HARD_FLOAT -- && (mode == SFmode -- || (mode == DFmode && !TARGET_VFP_SINGLE))) -- { -- *total = COSTS_N_INSNS (1); -- if (CONST_DOUBLE_P (XEXP (x, 0)) -- && arm_const_double_rtx (XEXP (x, 0))) -- { -- *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); -- return true; -- } -- -- if (CONST_DOUBLE_P (XEXP (x, 1)) -- && arm_const_double_rtx (XEXP (x, 1))) -- { -- *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); -- return true; -- } -- -- return false; -- } -- *total = COSTS_N_INSNS (20); -- return false; -- } -- -- *total = COSTS_N_INSNS (1); -- if (CONST_INT_P (XEXP (x, 0)) -- && const_ok_for_arm (INTVAL (XEXP (x, 0)))) -- { -- *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); -- return true; -- } -- -- subcode = GET_CODE (XEXP (x, 1)); -- if (subcode == ASHIFT || subcode == ASHIFTRT -- || subcode == LSHIFTRT -- || subcode == ROTATE || subcode == ROTATERT) -- { -- *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); -- *total += rtx_cost (XEXP (XEXP (x, 1), 0), mode, subcode, 0, speed); -- return true; -- } -- -- /* A shift as a part of RSB costs no more than RSB itself. */ -- if (GET_CODE (XEXP (x, 0)) == MULT -- && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) -- { -- *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code, 0, speed); -- *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); -- return true; -- } -- -- if (subcode == MULT -- && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)) -- { -- *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); -- *total += rtx_cost (XEXP (XEXP (x, 1), 0), mode, subcode, 0, speed); -- return true; -- } -- -- if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE -- || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE) -- { -- *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code, -- 0, speed); -- if (REG_P (XEXP (XEXP (x, 1), 0)) -- && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM) -- *total += COSTS_N_INSNS (1); -- -- return true; -- } -- -- /* Fall through */ -- -- case PLUS: -- if (code == PLUS && arm_arch6 && mode == SImode -- && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND -- || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) -- { -- *total = COSTS_N_INSNS (1); -- *total += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, -- GET_CODE (XEXP (x, 0)), 0, speed); -- *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); -- return true; -- } -- -- /* MLA: All arguments must be registers. We filter out -- multiplication by a power of two, so that we fall down into -- the code below. */ -- if (GET_CODE (XEXP (x, 0)) == MULT -- && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) -- { -- /* The cost comes from the cost of the multiply. */ -- return false; -- } -- -- if (GET_MODE_CLASS (mode) == MODE_FLOAT) -- { -- if (TARGET_HARD_FLOAT -- && (mode == SFmode -- || (mode == DFmode && !TARGET_VFP_SINGLE))) -- { -- *total = COSTS_N_INSNS (1); -- if (CONST_DOUBLE_P (XEXP (x, 1)) -- && arm_const_double_rtx (XEXP (x, 1))) -- { -- *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); -- return true; -- } -- -- return false; -- } -- -- *total = COSTS_N_INSNS (20); -- return false; -- } -- -- if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE -- || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE) -- { -- *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), mode, code, -- 1, speed); -- if (REG_P (XEXP (XEXP (x, 0), 0)) -- && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM) -- *total += COSTS_N_INSNS (1); -- return true; -- } -- -- /* Fall through */ -- -- case AND: case XOR: case IOR: -- -- /* Normally the frame registers will be spilt into reg+const during -- reload, so it is a bad idea to combine them with other instructions, -- since then they might not be moved outside of loops. As a compromise -- we allow integration with ops that have a constant as their second -- operand. */ -- if (REG_OR_SUBREG_REG (XEXP (x, 0)) -- && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0))) -- && !CONST_INT_P (XEXP (x, 1))) -- *total = COSTS_N_INSNS (1); -- -- if (mode == DImode) -- { -- *total += COSTS_N_INSNS (2); -- if (CONST_INT_P (XEXP (x, 1)) -- && const_ok_for_op (INTVAL (XEXP (x, 1)), code)) -- { -- *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); -- return true; -- } -- -- return false; -- } -- -- *total += COSTS_N_INSNS (1); -- if (CONST_INT_P (XEXP (x, 1)) -- && const_ok_for_op (INTVAL (XEXP (x, 1)), code)) -- { -- *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); -- return true; -- } -- subcode = GET_CODE (XEXP (x, 0)); -- if (subcode == ASHIFT || subcode == ASHIFTRT -- || subcode == LSHIFTRT -- || subcode == ROTATE || subcode == ROTATERT) -- { -- *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); -- *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed); -- return true; -- } -- -- if (subcode == MULT -- && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) -- { -- *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); -- *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed); -- return true; -- } -- -- if (subcode == UMIN || subcode == UMAX -- || subcode == SMIN || subcode == SMAX) -- { -- *total = COSTS_N_INSNS (3); -- return true; -- } -- -- return false; -- -- case MULT: -- /* This should have been handled by the CPU specific routines. */ -- gcc_unreachable (); -- -- case TRUNCATE: -- if (arm_arch3m && mode == SImode -- && GET_CODE (XEXP (x, 0)) == LSHIFTRT -- && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT -- && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) -- == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))) -- && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND -- || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND)) -- { -- *total = rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, LSHIFTRT, -- 0, speed); -- return true; -- } -- *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */ -- return false; -- -- case NEG: -- if (GET_MODE_CLASS (mode) == MODE_FLOAT) -- { -- if (TARGET_HARD_FLOAT -- && (mode == SFmode -- || (mode == DFmode && !TARGET_VFP_SINGLE))) -- { -- *total = COSTS_N_INSNS (1); -- return false; -- } -- *total = COSTS_N_INSNS (2); -- return false; -- } -- -- /* Fall through */ -- case NOT: -- *total = COSTS_N_INSNS (ARM_NUM_REGS(mode)); -- if (mode == SImode && code == NOT) -- { -- subcode = GET_CODE (XEXP (x, 0)); -- if (subcode == ASHIFT || subcode == ASHIFTRT -- || subcode == LSHIFTRT -- || subcode == ROTATE || subcode == ROTATERT -- || (subcode == MULT -- && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))) -- { -- *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, -- 0, speed); -- /* Register shifts cost an extra cycle. */ -- if (!CONST_INT_P (XEXP (XEXP (x, 0), 1))) -- *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1), -- mode, subcode, -- 1, speed); -- return true; -- } -- } -- -- return false; -- -- case IF_THEN_ELSE: -- if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) -- { -- *total = COSTS_N_INSNS (4); -- return true; -- } -- -- operand = XEXP (x, 0); -- -- if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE -- || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE) -- && REG_P (XEXP (operand, 0)) -- && REGNO (XEXP (operand, 0)) == CC_REGNUM)) -- *total += COSTS_N_INSNS (1); -- *total += rtx_cost (XEXP (x, 1), VOIDmode, code, 1, speed); -- *total += rtx_cost (XEXP (x, 2), VOIDmode, code, 2, speed); -- return true; -- -- case NE: -- if (mode == SImode && XEXP (x, 1) == const0_rtx) -- { -- *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code, -- 0, speed); -- return true; -- } -- goto scc_insn; -- -- case GE: -- if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM) -- && mode == SImode && XEXP (x, 1) == const0_rtx) -- { -- *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code, -- 0, speed); -- return true; -- } -- goto scc_insn; -- -- case LT: -- if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM) -- && mode == SImode && XEXP (x, 1) == const0_rtx) -- { -- *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code, -- 0, speed); -- return true; -- } -- goto scc_insn; -- -- case EQ: -- case GT: -- case LE: -- case GEU: -- case LTU: -- case GTU: -- case LEU: -- case UNORDERED: -- case ORDERED: -- case UNEQ: -- case UNGE: -- case UNLT: -- case UNGT: -- case UNLE: -- scc_insn: -- /* SCC insns. In the case where the comparison has already been -- performed, then they cost 2 instructions. Otherwise they need -- an additional comparison before them. */ -- *total = COSTS_N_INSNS (2); -- if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM) -- { -- return true; -- } -- -- /* Fall through */ -- case COMPARE: -- if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM) -- { -- *total = 0; -- return true; -- } -- -- *total += COSTS_N_INSNS (1); -- if (CONST_INT_P (XEXP (x, 1)) -- && const_ok_for_op (INTVAL (XEXP (x, 1)), code)) -- { -- *total += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed); -- return true; -- } -- -- subcode = GET_CODE (XEXP (x, 0)); -- if (subcode == ASHIFT || subcode == ASHIFTRT -- || subcode == LSHIFTRT -- || subcode == ROTATE || subcode == ROTATERT) -- { -- mode = GET_MODE (XEXP (x, 0)); -- *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); -- *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed); -- return true; -- } -- -- if (subcode == MULT -- && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) -- { -- mode = GET_MODE (XEXP (x, 0)); -- *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); -- *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, subcode, 0, speed); -- return true; -- } -- -- return false; -- -- case UMIN: -- case UMAX: -- case SMIN: -- case SMAX: -- *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code, 0, speed); -- if (!CONST_INT_P (XEXP (x, 1)) -- || !const_ok_for_arm (INTVAL (XEXP (x, 1)))) -- *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed); -- return true; -- -- case ABS: -- if (GET_MODE_CLASS (mode) == MODE_FLOAT) -- { -- if (TARGET_HARD_FLOAT -- && (mode == SFmode -- || (mode == DFmode && !TARGET_VFP_SINGLE))) -- { -- *total = COSTS_N_INSNS (1); -- return false; -- } -- *total = COSTS_N_INSNS (20); -- return false; -- } -- *total = COSTS_N_INSNS (1); -- if (mode == DImode) -- *total += COSTS_N_INSNS (3); -- return false; -- -- case SIGN_EXTEND: -- case ZERO_EXTEND: -- *total = 0; -- if (GET_MODE_CLASS (mode) == MODE_INT) -- { -- rtx op = XEXP (x, 0); -- machine_mode opmode = GET_MODE (op); -- -- if (mode == DImode) -- *total += COSTS_N_INSNS (1); -- -- if (opmode != SImode) -- { -- if (MEM_P (op)) -- { -- /* If !arm_arch4, we use one of the extendhisi2_mem -- or movhi_bytes patterns for HImode. For a QImode -- sign extension, we first zero-extend from memory -- and then perform a shift sequence. */ -- if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND)) -- *total += COSTS_N_INSNS (2); -- } -- else if (arm_arch6) -- *total += COSTS_N_INSNS (1); -- -- /* We don't have the necessary insn, so we need to perform some -- other operation. */ -- else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode) -- /* An and with constant 255. */ -- *total += COSTS_N_INSNS (1); -- else -- /* A shift sequence. Increase costs slightly to avoid -- combining two shifts into an extend operation. */ -- *total += COSTS_N_INSNS (2) + 1; -- } -- -- return false; -- } -- -- switch (GET_MODE (XEXP (x, 0))) -- { -- case V8QImode: -- case V4HImode: -- case V2SImode: -- case V4QImode: -- case V2HImode: -- *total = COSTS_N_INSNS (1); -- return false; -- -- default: -- gcc_unreachable (); -- } -- gcc_unreachable (); -- -- case ZERO_EXTRACT: -- case SIGN_EXTRACT: -- mode = GET_MODE (XEXP (x, 0)); -- *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code, 0, speed); -- return true; -- -- case CONST_INT: -- if (const_ok_for_arm (INTVAL (x)) -- || const_ok_for_arm (~INTVAL (x))) -- *total = COSTS_N_INSNS (1); -- else -- *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX, -- INTVAL (x), NULL_RTX, -- NULL_RTX, 0, 0)); -- return true; -- -- case CONST: -- case LABEL_REF: -- case SYMBOL_REF: -- *total = COSTS_N_INSNS (3); -- return true; -- -- case HIGH: -- *total = COSTS_N_INSNS (1); -- return true; -- -- case LO_SUM: -- *total = COSTS_N_INSNS (1); -- *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); -- return true; -- -- case CONST_DOUBLE: -- if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x) -- && (mode == SFmode || !TARGET_VFP_SINGLE)) -- *total = COSTS_N_INSNS (1); -- else -- *total = COSTS_N_INSNS (4); -- return true; -- -- case SET: -- /* The vec_extract patterns accept memory operands that require an -- address reload. Account for the cost of that reload to give the -- auto-inc-dec pass an incentive to try to replace them. */ -- if (TARGET_NEON && MEM_P (SET_DEST (x)) -- && GET_CODE (SET_SRC (x)) == VEC_SELECT) -- { -- mode = GET_MODE (SET_DEST (x)); -- *total = rtx_cost (SET_DEST (x), mode, code, 0, speed); -- if (!neon_vector_mem_operand (SET_DEST (x), 2, true)) -- *total += COSTS_N_INSNS (1); -- return true; -- } -- /* Likewise for the vec_set patterns. */ -- if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE -- && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE -- && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0))) -- { -- rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0); -- mode = GET_MODE (SET_DEST (x)); -- *total = rtx_cost (mem, mode, code, 0, speed); -- if (!neon_vector_mem_operand (mem, 2, true)) -- *total += COSTS_N_INSNS (1); -- return true; -- } -- return false; -- -- case UNSPEC: -- /* We cost this as high as our memory costs to allow this to -- be hoisted from loops. */ -- if (XINT (x, 1) == UNSPEC_PIC_UNIFIED) -- { -- *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode)); -- } -- return true; -- -- case CONST_VECTOR: -- if (TARGET_NEON -- && TARGET_HARD_FLOAT -- && outer == SET -- && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)) -- && neon_immediate_valid_for_move (x, mode, NULL, NULL)) -- *total = COSTS_N_INSNS (1); -- else -- *total = COSTS_N_INSNS (4); -- return true; -- -- default: -- *total = COSTS_N_INSNS (4); -- return false; -- } --} -- --/* Estimates the size cost of thumb1 instructions. -- For now most of the code is copied from thumb1_rtx_costs. We need more -- fine grain tuning when we have more related test cases. */ --static inline int --thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) --{ -- machine_mode mode = GET_MODE (x); -- int words; -- -- switch (code) -- { -- case ASHIFT: -- case ASHIFTRT: -- case LSHIFTRT: -- case ROTATERT: -- return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2); -- -- case PLUS: -- case MINUS: -- /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1 -- defined by RTL expansion, especially for the expansion of -- multiplication. */ -- if ((GET_CODE (XEXP (x, 0)) == MULT -- && power_of_two_operand (XEXP (XEXP (x,0),1), SImode)) -- || (GET_CODE (XEXP (x, 1)) == MULT -- && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))) -- return COSTS_N_INSNS (2); -- /* On purpose fall through for normal RTX. */ -- case COMPARE: -- case NEG: -- case NOT: -- return COSTS_N_INSNS (1); -- -- case MULT: -- if (CONST_INT_P (XEXP (x, 1))) -- { -- /* Thumb1 mul instruction can't operate on const. We must Load it -- into a register first. */ -- int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET); -- /* For the targets which have a very small and high-latency multiply -- unit, we prefer to synthesize the mult with up to 5 instructions, -- giving a good balance between size and performance. */ -- if (arm_arch6m && arm_m_profile_small_mul) -- return COSTS_N_INSNS (5); -- else -- return COSTS_N_INSNS (1) + const_size; -- } -- return COSTS_N_INSNS (1); -- -- case SET: -- /* A SET doesn't have a mode, so let's look at the SET_DEST to get -- the mode. */ -- words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x)))); -- return COSTS_N_INSNS (words) -- + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x)) -- || satisfies_constraint_K (SET_SRC (x)) -- /* thumb1_movdi_insn. */ -- || ((words > 1) && MEM_P (SET_SRC (x)))); -- -- case CONST_INT: -- if (outer == SET) -- { -- if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256) -- return COSTS_N_INSNS (1); -- /* See split "TARGET_THUMB1 && satisfies_constraint_J". */ -- if (INTVAL (x) >= -255 && INTVAL (x) <= -1) -- return COSTS_N_INSNS (2); -- /* See split "TARGET_THUMB1 && satisfies_constraint_K". */ -- if (thumb_shiftable_const (INTVAL (x))) -- return COSTS_N_INSNS (2); -- return COSTS_N_INSNS (3); -- } -- else if ((outer == PLUS || outer == COMPARE) -- && INTVAL (x) < 256 && INTVAL (x) > -256) -- return 0; -- else if ((outer == IOR || outer == XOR || outer == AND) -- && INTVAL (x) < 256 && INTVAL (x) >= -256) -- return COSTS_N_INSNS (1); -- else if (outer == AND) -- { -- int i; -- /* This duplicates the tests in the andsi3 expander. */ -- for (i = 9; i <= 31; i++) -- if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x) -- || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x)) -- return COSTS_N_INSNS (2); -- } -- else if (outer == ASHIFT || outer == ASHIFTRT -- || outer == LSHIFTRT) -- return 0; -- return COSTS_N_INSNS (2); -- -- case CONST: -- case CONST_DOUBLE: -- case LABEL_REF: -- case SYMBOL_REF: -- return COSTS_N_INSNS (3); -- -- case UDIV: -- case UMOD: -- case DIV: -- case MOD: -- return 100; -- -- case TRUNCATE: -- return 99; -- -- case AND: -- case XOR: -- case IOR: -- return COSTS_N_INSNS (1); -- -- case MEM: -- return (COSTS_N_INSNS (1) -- + COSTS_N_INSNS (1) -- * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD) -- + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) -- ? COSTS_N_INSNS (1) : 0)); -- -- case IF_THEN_ELSE: -- /* XXX a guess. */ -- if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) -- return 14; -- return 2; -- -- case ZERO_EXTEND: -- /* XXX still guessing. */ -- switch (GET_MODE (XEXP (x, 0))) -- { -- case QImode: -- return (1 + (mode == DImode ? 4 : 0) -- + (MEM_P (XEXP (x, 0)) ? 10 : 0)); -- -- case HImode: -- return (4 + (mode == DImode ? 4 : 0) -- + (MEM_P (XEXP (x, 0)) ? 10 : 0)); -- -- case SImode: -- return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0)); -- -- default: -- return 99; -- } -- -- default: -- return 99; -- } --} -- --/* RTX costs when optimizing for size. */ --static bool --arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, -- int *total) --{ -- machine_mode mode = GET_MODE (x); -- if (TARGET_THUMB1) -- { -- *total = thumb1_size_rtx_costs (x, code, outer_code); -- return true; -- } -- -- /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */ -- switch (code) -- { -- case MEM: -- /* A memory access costs 1 insn if the mode is small, or the address is -- a single register, otherwise it costs one insn per word. */ -- if (REG_P (XEXP (x, 0))) -- *total = COSTS_N_INSNS (1); -- else if (flag_pic -- && GET_CODE (XEXP (x, 0)) == PLUS -- && will_be_in_index_register (XEXP (XEXP (x, 0), 1))) -- /* This will be split into two instructions. -- See arm.md:calculate_pic_address. */ -- *total = COSTS_N_INSNS (2); -- else -- *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); -- return true; -- -- case DIV: -- case MOD: -- case UDIV: -- case UMOD: -- /* Needs a libcall, so it costs about this. */ -- *total = COSTS_N_INSNS (2); -- return false; -- -- case ROTATE: -- if (mode == SImode && REG_P (XEXP (x, 1))) -- { -- *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code, -- 0, false); -- return true; -- } -- /* Fall through */ -- case ROTATERT: -- case ASHIFT: -- case LSHIFTRT: -- case ASHIFTRT: -- if (mode == DImode && CONST_INT_P (XEXP (x, 1))) -- { -- *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), mode, code, -- 0, false); -- return true; -- } -- else if (mode == SImode) -- { -- *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code, -- 0, false); -- /* Slightly disparage register shifts, but not by much. */ -- if (!CONST_INT_P (XEXP (x, 1))) -- *total += 1 + rtx_cost (XEXP (x, 1), mode, code, 1, false); -- return true; -- } -- -- /* Needs a libcall. */ -- *total = COSTS_N_INSNS (2); -- return false; -- -- case MINUS: -- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT -- && (mode == SFmode || !TARGET_VFP_SINGLE)) -- { -- *total = COSTS_N_INSNS (1); -- return false; -- } -- -- if (mode == SImode) -- { -- enum rtx_code subcode0 = GET_CODE (XEXP (x, 0)); -- enum rtx_code subcode1 = GET_CODE (XEXP (x, 1)); -- -- if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT -- || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT -- || subcode1 == ROTATE || subcode1 == ROTATERT -- || subcode1 == ASHIFT || subcode1 == LSHIFTRT -- || subcode1 == ASHIFTRT) -- { -- /* It's just the cost of the two operands. */ -- *total = 0; -- return false; -- } -- -- *total = COSTS_N_INSNS (1); -- return false; -- } -- -- *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); -- return false; -- -- case PLUS: -- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT -- && (mode == SFmode || !TARGET_VFP_SINGLE)) -- { -- *total = COSTS_N_INSNS (1); -- return false; -- } -- -- /* A shift as a part of ADD costs nothing. */ -- if (GET_CODE (XEXP (x, 0)) == MULT -- && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) -- { -- *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1); -- *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code, 0, false); -- *total += rtx_cost (XEXP (x, 1), mode, code, 1, false); -- return true; -- } -- -- /* Fall through */ -- case AND: case XOR: case IOR: -- if (mode == SImode) -- { -- enum rtx_code subcode = GET_CODE (XEXP (x, 0)); -- -- if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT -- || subcode == LSHIFTRT || subcode == ASHIFTRT -- || (code == AND && subcode == NOT)) -- { -- /* It's just the cost of the two operands. */ -- *total = 0; -- return false; -- } -- } -- -- *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); -- return false; -- -- case MULT: -- *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); -- return false; -- -- case NEG: -- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT -- && (mode == SFmode || !TARGET_VFP_SINGLE)) -- { -- *total = COSTS_N_INSNS (1); -- return false; -- } -- -- /* Fall through */ -- case NOT: -- *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); -- -- return false; -+ case TRUNCATE: -+ return 99; - -- case IF_THEN_ELSE: -- *total = 0; -- return false; -+ case AND: -+ case XOR: -+ case IOR: -+ return COSTS_N_INSNS (1); - -- case COMPARE: -- if (cc_register (XEXP (x, 0), VOIDmode)) -- * total = 0; -- else -- *total = COSTS_N_INSNS (1); -- return false; -+ case MEM: -+ return (COSTS_N_INSNS (1) -+ + COSTS_N_INSNS (1) -+ * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD) -+ + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) -+ ? COSTS_N_INSNS (1) : 0)); - -- case ABS: -- if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT -- && (mode == SFmode || !TARGET_VFP_SINGLE)) -- *total = COSTS_N_INSNS (1); -- else -- *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode)); -- return false; -+ case IF_THEN_ELSE: -+ /* XXX a guess. */ -+ if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) -+ return 14; -+ return 2; - -- case SIGN_EXTEND: - case ZERO_EXTEND: -- return arm_rtx_costs_1 (x, outer_code, total, 0); -- -- case CONST_INT: -- if (const_ok_for_arm (INTVAL (x))) -- /* A multiplication by a constant requires another instruction -- to load the constant to a register. */ -- *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT) -- ? 1 : 0); -- else if (const_ok_for_arm (~INTVAL (x))) -- *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1); -- else if (const_ok_for_arm (-INTVAL (x))) -- { -- if (outer_code == COMPARE || outer_code == PLUS -- || outer_code == MINUS) -- *total = 0; -- else -- *total = COSTS_N_INSNS (1); -- } -- else -- *total = COSTS_N_INSNS (2); -- return true; -- -- case CONST: -- case LABEL_REF: -- case SYMBOL_REF: -- *total = COSTS_N_INSNS (2); -- return true; -- -- case CONST_DOUBLE: -- *total = COSTS_N_INSNS (4); -- return true; -+ /* XXX still guessing. */ -+ switch (GET_MODE (XEXP (x, 0))) -+ { -+ case QImode: -+ return (1 + (mode == DImode ? 4 : 0) -+ + (MEM_P (XEXP (x, 0)) ? 10 : 0)); - -- case CONST_VECTOR: -- if (TARGET_NEON -- && TARGET_HARD_FLOAT -- && outer_code == SET -- && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)) -- && neon_immediate_valid_for_move (x, mode, NULL, NULL)) -- *total = COSTS_N_INSNS (1); -- else -- *total = COSTS_N_INSNS (4); -- return true; -+ case HImode: -+ return (4 + (mode == DImode ? 4 : 0) -+ + (MEM_P (XEXP (x, 0)) ? 10 : 0)); - -- case HIGH: -- case LO_SUM: -- /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the -- cost of these slightly. */ -- *total = COSTS_N_INSNS (1) + 1; -- return true; -+ case SImode: -+ return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0)); - -- case SET: -- return false; -+ default: -+ return 99; -+ } - - default: -- if (mode != VOIDmode) -- *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); -- else -- *total = COSTS_N_INSNS (4); /* How knows? */ -- return false; -+ return 99; - } - } - -@@ -9519,7 +8947,7 @@ arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost) - flags are live or not, and thus no realistic way to determine what - the size will eventually be. */ - static bool --arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, -+arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code, - const struct cpu_cost_table *extra_cost, - int *cost, bool speed_p) - { -@@ -10771,8 +10199,6 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, - if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode) - && MEM_P (XEXP (x, 0))) - { -- *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p); -- - if (mode == DImode) - *cost += COSTS_N_INSNS (1); - -@@ -11164,390 +10590,70 @@ arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, - /* Vector costs? */ - } - *cost = LIBCALL_COST (1); -- return false; -- -- case FLOAT: -- case UNSIGNED_FLOAT: -- if (TARGET_HARD_FLOAT) -- { -- /* ??? Increase the cost to deal with transferring from CORE -- -> FP registers? */ -- if (speed_p) -- *cost += extra_cost->fp[mode == DFmode].fromint; -- return false; -- } -- *cost = LIBCALL_COST (1); -- return false; -- -- case CALL: -- return true; -- -- case ASM_OPERANDS: -- { -- /* Just a guess. Guess number of instructions in the asm -- plus one insn per input. Always a minimum of COSTS_N_INSNS (1) -- though (see PR60663). */ -- int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x))); -- int num_operands = ASM_OPERANDS_INPUT_LENGTH (x); -- -- *cost = COSTS_N_INSNS (asm_length + num_operands); -- return true; -- } -- default: -- if (mode != VOIDmode) -- *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode)); -- else -- *cost = COSTS_N_INSNS (4); /* Who knows? */ -- return false; -- } --} -- --#undef HANDLE_NARROW_SHIFT_ARITH -- --/* RTX costs when optimizing for size. */ --static bool --arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code, -- int opno ATTRIBUTE_UNUSED, int *total, bool speed) --{ -- bool result; -- int code = GET_CODE (x); -- -- if (TARGET_OLD_RTX_COSTS -- || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS)) -- { -- /* Old way. (Deprecated.) */ -- if (!speed) -- result = arm_size_rtx_costs (x, (enum rtx_code) code, -- (enum rtx_code) outer_code, total); -- else -- result = current_tune->rtx_costs (x, (enum rtx_code) code, -- (enum rtx_code) outer_code, total, -- speed); -- } -- else -- { -- /* New way. */ -- if (current_tune->insn_extra_cost) -- result = arm_new_rtx_costs (x, (enum rtx_code) code, -- (enum rtx_code) outer_code, -- current_tune->insn_extra_cost, -- total, speed); -- /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS -- && current_tune->insn_extra_cost != NULL */ -- else -- result = arm_new_rtx_costs (x, (enum rtx_code) code, -- (enum rtx_code) outer_code, -- &generic_extra_costs, total, speed); -- } -- -- if (dump_file && (dump_flags & TDF_DETAILS)) -- { -- print_rtl_single (dump_file, x); -- fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold", -- *total, result ? "final" : "partial"); -- } -- return result; --} -- --/* RTX costs for cores with a slow MUL implementation. Thumb-2 is not -- supported on any "slowmul" cores, so it can be ignored. */ -- --static bool --arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, -- int *total, bool speed) --{ -- machine_mode mode = GET_MODE (x); -- -- if (TARGET_THUMB) -- { -- *total = thumb1_rtx_costs (x, code, outer_code); -- return true; -- } -- -- switch (code) -- { -- case MULT: -- if (GET_MODE_CLASS (mode) == MODE_FLOAT -- || mode == DImode) -- { -- *total = COSTS_N_INSNS (20); -- return false; -- } -- -- if (CONST_INT_P (XEXP (x, 1))) -- { -- unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1)) -- & (unsigned HOST_WIDE_INT) 0xffffffff); -- int cost, const_ok = const_ok_for_arm (i); -- int j, booth_unit_size; -- -- /* Tune as appropriate. */ -- cost = const_ok ? 4 : 8; -- booth_unit_size = 2; -- for (j = 0; i && j < 32; j += booth_unit_size) -- { -- i >>= booth_unit_size; -- cost++; -- } -- -- *total = COSTS_N_INSNS (cost); -- *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed); -- return true; -- } -- -- *total = COSTS_N_INSNS (20); -- return false; -- -- default: -- return arm_rtx_costs_1 (x, outer_code, total, speed);; -- } --} -- -- --/* RTX cost for cores with a fast multiply unit (M variants). */ -- --static bool --arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, -- int *total, bool speed) --{ -- machine_mode mode = GET_MODE (x); -- -- if (TARGET_THUMB1) -- { -- *total = thumb1_rtx_costs (x, code, outer_code); -- return true; -- } -- -- /* ??? should thumb2 use different costs? */ -- switch (code) -- { -- case MULT: -- /* There is no point basing this on the tuning, since it is always the -- fast variant if it exists at all. */ -- if (mode == DImode -- && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1))) -- && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND -- || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) -- { -- *total = COSTS_N_INSNS(2); -- return false; -- } -- -- -- if (mode == DImode) -- { -- *total = COSTS_N_INSNS (5); -- return false; -- } -- -- if (CONST_INT_P (XEXP (x, 1))) -- { -- unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1)) -- & (unsigned HOST_WIDE_INT) 0xffffffff); -- int cost, const_ok = const_ok_for_arm (i); -- int j, booth_unit_size; -- -- /* Tune as appropriate. */ -- cost = const_ok ? 4 : 8; -- booth_unit_size = 8; -- for (j = 0; i && j < 32; j += booth_unit_size) -- { -- i >>= booth_unit_size; -- cost++; -- } -- -- *total = COSTS_N_INSNS(cost); -- return false; -- } -- -- if (mode == SImode) -- { -- *total = COSTS_N_INSNS (4); -- return false; -- } -- -- if (GET_MODE_CLASS (mode) == MODE_FLOAT) -- { -- if (TARGET_HARD_FLOAT -- && (mode == SFmode -- || (mode == DFmode && !TARGET_VFP_SINGLE))) -- { -- *total = COSTS_N_INSNS (1); -- return false; -- } -- } -- -- /* Requires a lib call */ -- *total = COSTS_N_INSNS (20); -- return false; -- -- default: -- return arm_rtx_costs_1 (x, outer_code, total, speed); -- } --} -- -- --/* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores, -- so it can be ignored. */ -- --static bool --arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, -- int *total, bool speed) --{ -- machine_mode mode = GET_MODE (x); -- -- if (TARGET_THUMB) -- { -- *total = thumb1_rtx_costs (x, code, outer_code); -- return true; -- } -- -- switch (code) -- { -- case COMPARE: -- if (GET_CODE (XEXP (x, 0)) != MULT) -- return arm_rtx_costs_1 (x, outer_code, total, speed); -- -- /* A COMPARE of a MULT is slow on XScale; the muls instruction -- will stall until the multiplication is complete. */ -- *total = COSTS_N_INSNS (3); -- return false; -- -- case MULT: -- /* There is no point basing this on the tuning, since it is always the -- fast variant if it exists at all. */ -- if (mode == DImode -- && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1))) -- && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND -- || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) -- { -- *total = COSTS_N_INSNS (2); -- return false; -- } -- -- -- if (mode == DImode) -- { -- *total = COSTS_N_INSNS (5); -- return false; -- } -- -- if (CONST_INT_P (XEXP (x, 1))) -- { -- /* If operand 1 is a constant we can more accurately -- calculate the cost of the multiply. The multiplier can -- retire 15 bits on the first cycle and a further 12 on the -- second. We do, of course, have to load the constant into -- a register first. */ -- unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1)); -- /* There's a general overhead of one cycle. */ -- int cost = 1; -- unsigned HOST_WIDE_INT masked_const; -- -- if (i & 0x80000000) -- i = ~i; -- -- i &= (unsigned HOST_WIDE_INT) 0xffffffff; -- -- masked_const = i & 0xffff8000; -- if (masked_const != 0) -- { -- cost++; -- masked_const = i & 0xf8000000; -- if (masked_const != 0) -- cost++; -- } -- *total = COSTS_N_INSNS (cost); -- return false; -- } -+ return false; - -- if (mode == SImode) -+ case FLOAT: -+ case UNSIGNED_FLOAT: -+ if (TARGET_HARD_FLOAT) - { -- *total = COSTS_N_INSNS (3); -+ /* ??? Increase the cost to deal with transferring from CORE -+ -> FP registers? */ -+ if (speed_p) -+ *cost += extra_cost->fp[mode == DFmode].fromint; - return false; - } -- -- /* Requires a lib call */ -- *total = COSTS_N_INSNS (20); -+ *cost = LIBCALL_COST (1); - return false; - -+ case CALL: -+ return true; -+ -+ case ASM_OPERANDS: -+ { -+ /* Just a guess. Guess number of instructions in the asm -+ plus one insn per input. Always a minimum of COSTS_N_INSNS (1) -+ though (see PR60663). */ -+ int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x))); -+ int num_operands = ASM_OPERANDS_INPUT_LENGTH (x); -+ -+ *cost = COSTS_N_INSNS (asm_length + num_operands); -+ return true; -+ } - default: -- return arm_rtx_costs_1 (x, outer_code, total, speed); -+ if (mode != VOIDmode) -+ *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode)); -+ else -+ *cost = COSTS_N_INSNS (4); /* Who knows? */ -+ return false; - } - } - -+#undef HANDLE_NARROW_SHIFT_ARITH - --/* RTX costs for 9e (and later) cores. */ -+/* RTX costs entry point. */ - - static bool --arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, -- int *total, bool speed) -+arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code, -+ int opno ATTRIBUTE_UNUSED, int *total, bool speed) - { -- machine_mode mode = GET_MODE (x); -- -- if (TARGET_THUMB1) -- { -- switch (code) -- { -- case MULT: -- /* Small multiply: 32 cycles for an integer multiply inst. */ -- if (arm_arch6m && arm_m_profile_small_mul) -- *total = COSTS_N_INSNS (32); -- else -- *total = COSTS_N_INSNS (3); -- return true; -+ bool result; -+ int code = GET_CODE (x); -+ gcc_assert (current_tune->insn_extra_cost); - -- default: -- *total = thumb1_rtx_costs (x, code, outer_code); -- return true; -- } -- } -+ result = arm_rtx_costs_internal (x, (enum rtx_code) code, -+ (enum rtx_code) outer_code, -+ current_tune->insn_extra_cost, -+ total, speed); - -- switch (code) -+ if (dump_file && (dump_flags & TDF_DETAILS)) - { -- case MULT: -- /* There is no point basing this on the tuning, since it is always the -- fast variant if it exists at all. */ -- if (mode == DImode -- && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1))) -- && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND -- || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) -- { -- *total = COSTS_N_INSNS (2); -- return false; -- } -- -- -- if (mode == DImode) -- { -- *total = COSTS_N_INSNS (5); -- return false; -- } -- -- if (mode == SImode) -- { -- *total = COSTS_N_INSNS (2); -- return false; -- } -- -- if (GET_MODE_CLASS (mode) == MODE_FLOAT) -- { -- if (TARGET_HARD_FLOAT -- && (mode == SFmode -- || (mode == DFmode && !TARGET_VFP_SINGLE))) -- { -- *total = COSTS_N_INSNS (1); -- return false; -- } -- } -- -- *total = COSTS_N_INSNS (20); -- return false; -- -- default: -- return arm_rtx_costs_1 (x, outer_code, total, speed); -+ print_rtl_single (dump_file, x); -+ fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold", -+ *total, result ? "final" : "partial"); - } -+ return result; - } -+ - /* All address computations that can be done are free, but rtx cost returns - the same for practically all of them. So we weight the different types - of address here in the order (most pref first): -@@ -12269,7 +11375,7 @@ vfp3_const_double_index (rtx x) - - /* We can permit four significant bits of mantissa only, plus a high bit - which is always 1. */ -- mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1; -+ mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1; - if ((mantissa & mask) != 0) - return -1; - -@@ -12423,6 +11529,12 @@ neon_valid_immediate (rtx op, machine_mode mode, int inverse, - return 18; - } - -+ /* The tricks done in the code below apply for little-endian vector layout. -+ For big-endian vectors only allow vectors of the form { a, a, a..., a }. -+ FIXME: Implement logic for big-endian vectors. */ -+ if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op)) -+ return -1; -+ - /* Splat vector constant out into a byte vector. */ - for (i = 0; i < n_elts; i++) - { -@@ -13151,7 +12263,7 @@ coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb) - { - if (mode == HFmode) - { -- if (!TARGET_NEON_FP16) -+ if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST) - return GENERAL_REGS; - if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true)) - return NO_REGS; -@@ -15988,14 +15100,17 @@ gen_operands_ldrd_strd (rtx *operands, bool load, - /* If the same input register is used in both stores - when storing different constants, try to find a free register. - For example, the code -- mov r0, 0 -- str r0, [r2] -- mov r0, 1 -- str r0, [r2, #4] -+ mov r0, 0 -+ str r0, [r2] -+ mov r0, 1 -+ str r0, [r2, #4] - can be transformed into -- mov r1, 0 -- strd r1, r0, [r2] -- in Thumb mode assuming that r1 is free. */ -+ mov r1, 0 -+ mov r0, 1 -+ strd r1, r0, [r2] -+ in Thumb mode assuming that r1 is free. -+ For ARM mode do the same but only if the starting register -+ can be made to be even. */ - if (const_store - && REGNO (operands[0]) == REGNO (operands[1]) - && INTVAL (operands[4]) != INTVAL (operands[5])) -@@ -16014,7 +15129,6 @@ gen_operands_ldrd_strd (rtx *operands, bool load, - } - else if (TARGET_ARM) - { -- return false; - int regno = REGNO (operands[0]); - if (!peep2_reg_dead_p (4, operands[0])) - { -@@ -16368,7 +15482,7 @@ get_jump_table_size (rtx_jump_table_data *insn) - { - case 1: - /* Round up size of TBB table to a halfword boundary. */ -- size = (size + 1) & ~(HOST_WIDE_INT)1; -+ size = (size + 1) & ~HOST_WIDE_INT_1; - break; - case 2: - /* No padding necessary for TBH. */ -@@ -16837,35 +15951,37 @@ dump_minipool (rtx_insn *scan) - fputc ('\n', dump_file); - } - -+ rtx val = copy_rtx (mp->value); -+ - switch (GET_MODE_SIZE (mp->mode)) - { - #ifdef HAVE_consttable_1 - case 1: -- scan = emit_insn_after (gen_consttable_1 (mp->value), scan); -+ scan = emit_insn_after (gen_consttable_1 (val), scan); - break; - - #endif - #ifdef HAVE_consttable_2 - case 2: -- scan = emit_insn_after (gen_consttable_2 (mp->value), scan); -+ scan = emit_insn_after (gen_consttable_2 (val), scan); - break; - - #endif - #ifdef HAVE_consttable_4 - case 4: -- scan = emit_insn_after (gen_consttable_4 (mp->value), scan); -+ scan = emit_insn_after (gen_consttable_4 (val), scan); - break; - - #endif - #ifdef HAVE_consttable_8 - case 8: -- scan = emit_insn_after (gen_consttable_8 (mp->value), scan); -+ scan = emit_insn_after (gen_consttable_8 (val), scan); - break; - - #endif - #ifdef HAVE_consttable_16 - case 16: -- scan = emit_insn_after (gen_consttable_16 (mp->value), scan); -+ scan = emit_insn_after (gen_consttable_16 (val), scan); - break; - - #endif -@@ -17269,6 +16385,470 @@ note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes) - return; - } - -+/* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs -+ and unions in the context of ARMv8-M Security Extensions. It is used as a -+ helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' -+ functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one -+ or four masks, depending on whether it is being computed for a -+ 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument -+ respectively. The tree for the type of the argument or a field within an -+ argument is passed in ARG_TYPE, the current register this argument or field -+ starts in is kept in the pointer REGNO and updated accordingly, the bit this -+ argument or field starts at is passed in STARTING_BIT and the last used bit -+ is kept in LAST_USED_BIT which is also updated accordingly. */ -+ -+static unsigned HOST_WIDE_INT -+comp_not_to_clear_mask_str_un (tree arg_type, int * regno, -+ uint32_t * padding_bits_to_clear, -+ unsigned starting_bit, int * last_used_bit) -+ -+{ -+ unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0; -+ -+ if (TREE_CODE (arg_type) == RECORD_TYPE) -+ { -+ unsigned current_bit = starting_bit; -+ tree field; -+ long int offset, size; -+ -+ -+ field = TYPE_FIELDS (arg_type); -+ while (field) -+ { -+ /* The offset within a structure is always an offset from -+ the start of that structure. Make sure we take that into the -+ calculation of the register based offset that we use here. */ -+ offset = starting_bit; -+ offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0); -+ offset %= 32; -+ -+ /* This is the actual size of the field, for bitfields this is the -+ bitfield width and not the container size. */ -+ size = TREE_INT_CST_ELT (DECL_SIZE (field), 0); -+ -+ if (*last_used_bit != offset) -+ { -+ if (offset < *last_used_bit) -+ { -+ /* This field's offset is before the 'last_used_bit', that -+ means this field goes on the next register. So we need to -+ pad the rest of the current register and increase the -+ register number. */ -+ uint32_t mask; -+ mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit); -+ mask++; -+ -+ padding_bits_to_clear[*regno] |= mask; -+ not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno; -+ (*regno)++; -+ } -+ else -+ { -+ /* Otherwise we pad the bits between the last field's end and -+ the start of the new field. */ -+ uint32_t mask; -+ -+ mask = ((uint32_t)-1) >> (32 - offset); -+ mask -= ((uint32_t) 1 << *last_used_bit) - 1; -+ padding_bits_to_clear[*regno] |= mask; -+ } -+ current_bit = offset; -+ } -+ -+ /* Calculate further padding bits for inner structs/unions too. */ -+ if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field))) -+ { -+ *last_used_bit = current_bit; -+ not_to_clear_reg_mask -+ |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno, -+ padding_bits_to_clear, offset, -+ last_used_bit); -+ } -+ else -+ { -+ /* Update 'current_bit' with this field's size. If the -+ 'current_bit' lies in a subsequent register, update 'regno' and -+ reset 'current_bit' to point to the current bit in that new -+ register. */ -+ current_bit += size; -+ while (current_bit >= 32) -+ { -+ current_bit-=32; -+ not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno; -+ (*regno)++; -+ } -+ *last_used_bit = current_bit; -+ } -+ -+ field = TREE_CHAIN (field); -+ } -+ not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno; -+ } -+ else if (TREE_CODE (arg_type) == UNION_TYPE) -+ { -+ tree field, field_t; -+ int i, regno_t, field_size; -+ int max_reg = -1; -+ int max_bit = -1; -+ uint32_t mask; -+ uint32_t padding_bits_to_clear_res[NUM_ARG_REGS] -+ = {-1, -1, -1, -1}; -+ -+ /* To compute the padding bits in a union we only consider bits as -+ padding bits if they are always either a padding bit or fall outside a -+ fields size for all fields in the union. */ -+ field = TYPE_FIELDS (arg_type); -+ while (field) -+ { -+ uint32_t padding_bits_to_clear_t[NUM_ARG_REGS] -+ = {0U, 0U, 0U, 0U}; -+ int last_used_bit_t = *last_used_bit; -+ regno_t = *regno; -+ field_t = TREE_TYPE (field); -+ -+ /* If the field's type is either a record or a union make sure to -+ compute their padding bits too. */ -+ if (RECORD_OR_UNION_TYPE_P (field_t)) -+ not_to_clear_reg_mask -+ |= comp_not_to_clear_mask_str_un (field_t, ®no_t, -+ &padding_bits_to_clear_t[0], -+ starting_bit, &last_used_bit_t); -+ else -+ { -+ field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0); -+ regno_t = (field_size / 32) + *regno; -+ last_used_bit_t = (starting_bit + field_size) % 32; -+ } -+ -+ for (i = *regno; i < regno_t; i++) -+ { -+ /* For all but the last register used by this field only keep the -+ padding bits that were padding bits in this field. */ -+ padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i]; -+ } -+ -+ /* For the last register, keep all padding bits that were padding -+ bits in this field and any padding bits that are still valid -+ as padding bits but fall outside of this field's size. */ -+ mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1; -+ padding_bits_to_clear_res[regno_t] -+ &= padding_bits_to_clear_t[regno_t] | mask; -+ -+ /* Update the maximum size of the fields in terms of registers used -+ ('max_reg') and the 'last_used_bit' in said register. */ -+ if (max_reg < regno_t) -+ { -+ max_reg = regno_t; -+ max_bit = last_used_bit_t; -+ } -+ else if (max_reg == regno_t && max_bit < last_used_bit_t) -+ max_bit = last_used_bit_t; -+ -+ field = TREE_CHAIN (field); -+ } -+ -+ /* Update the current padding_bits_to_clear using the intersection of the -+ padding bits of all the fields. */ -+ for (i=*regno; i < max_reg; i++) -+ padding_bits_to_clear[i] |= padding_bits_to_clear_res[i]; -+ -+ /* Do not keep trailing padding bits, we do not know yet whether this -+ is the end of the argument. */ -+ mask = ((uint32_t) 1 << max_bit) - 1; -+ padding_bits_to_clear[max_reg] -+ |= padding_bits_to_clear_res[max_reg] & mask; -+ -+ *regno = max_reg; -+ *last_used_bit = max_bit; -+ } -+ else -+ /* This function should only be used for structs and unions. */ -+ gcc_unreachable (); -+ -+ return not_to_clear_reg_mask; -+} -+ -+/* In the context of ARMv8-M Security Extensions, this function is used for both -+ 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what -+ registers are used when returning or passing arguments, which is then -+ returned as a mask. It will also compute a mask to indicate padding/unused -+ bits for each of these registers, and passes this through the -+ PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in -+ ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and -+ the starting register used to pass this argument or return value is passed -+ in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these -+ for struct and union types. */ -+ -+static unsigned HOST_WIDE_INT -+compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno, -+ uint32_t * padding_bits_to_clear) -+ -+{ -+ int last_used_bit = 0; -+ unsigned HOST_WIDE_INT not_to_clear_mask; -+ -+ if (RECORD_OR_UNION_TYPE_P (arg_type)) -+ { -+ not_to_clear_mask -+ = comp_not_to_clear_mask_str_un (arg_type, ®no, -+ padding_bits_to_clear, 0, -+ &last_used_bit); -+ -+ -+ /* If the 'last_used_bit' is not zero, that means we are still using a -+ part of the last 'regno'. In such cases we must clear the trailing -+ bits. Otherwise we are not using regno and we should mark it as to -+ clear. */ -+ if (last_used_bit != 0) -+ padding_bits_to_clear[regno] -+ |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1; -+ else -+ not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno); -+ } -+ else -+ { -+ not_to_clear_mask = 0; -+ /* We are not dealing with structs nor unions. So these arguments may be -+ passed in floating point registers too. In some cases a BLKmode is -+ used when returning or passing arguments in multiple VFP registers. */ -+ if (GET_MODE (arg_rtx) == BLKmode) -+ { -+ int i, arg_regs; -+ rtx reg; -+ -+ /* This should really only occur when dealing with the hard-float -+ ABI. */ -+ gcc_assert (TARGET_HARD_FLOAT_ABI); -+ -+ for (i = 0; i < XVECLEN (arg_rtx, 0); i++) -+ { -+ reg = XEXP (XVECEXP (arg_rtx, 0, i), 0); -+ gcc_assert (REG_P (reg)); -+ -+ not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg); -+ -+ /* If we are dealing with DF mode, make sure we don't -+ clear either of the registers it addresses. */ -+ arg_regs = ARM_NUM_REGS (GET_MODE (reg)); -+ if (arg_regs > 1) -+ { -+ unsigned HOST_WIDE_INT mask; -+ mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs); -+ mask -= HOST_WIDE_INT_1U << REGNO (reg); -+ not_to_clear_mask |= mask; -+ } -+ } -+ } -+ else -+ { -+ /* Otherwise we can rely on the MODE to determine how many registers -+ are being used by this argument. */ -+ int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx)); -+ not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx); -+ if (arg_regs > 1) -+ { -+ unsigned HOST_WIDE_INT -+ mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs); -+ mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx); -+ not_to_clear_mask |= mask; -+ } -+ } -+ } -+ -+ return not_to_clear_mask; -+} -+ -+/* Saves callee saved registers, clears callee saved registers and caller saved -+ registers not used to pass arguments before a cmse_nonsecure_call. And -+ restores the callee saved registers after. */ -+ -+static void -+cmse_nonsecure_call_clear_caller_saved (void) -+{ -+ basic_block bb; -+ -+ FOR_EACH_BB_FN (bb, cfun) -+ { -+ rtx_insn *insn; -+ -+ FOR_BB_INSNS (bb, insn) -+ { -+ uint64_t to_clear_mask, float_mask; -+ rtx_insn *seq; -+ rtx pat, call, unspec, reg, cleared_reg, tmp; -+ unsigned int regno, maxregno; -+ rtx address; -+ CUMULATIVE_ARGS args_so_far_v; -+ cumulative_args_t args_so_far; -+ tree arg_type, fntype; -+ bool using_r4, first_param = true; -+ function_args_iterator args_iter; -+ uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U}; -+ uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0]; -+ -+ if (!NONDEBUG_INSN_P (insn)) -+ continue; -+ -+ if (!CALL_P (insn)) -+ continue; -+ -+ pat = PATTERN (insn); -+ gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0); -+ call = XVECEXP (pat, 0, 0); -+ -+ /* Get the real call RTX if the insn sets a value, ie. returns. */ -+ if (GET_CODE (call) == SET) -+ call = SET_SRC (call); -+ -+ /* Check if it is a cmse_nonsecure_call. */ -+ unspec = XEXP (call, 0); -+ if (GET_CODE (unspec) != UNSPEC -+ || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM) -+ continue; -+ -+ /* Determine the caller-saved registers we need to clear. */ -+ to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1; -+ maxregno = NUM_ARG_REGS - 1; -+ /* Only look at the caller-saved floating point registers in case of -+ -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the -+ lazy store and loads which clear both caller- and callee-saved -+ registers. */ -+ if (TARGET_HARD_FLOAT_ABI) -+ { -+ float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1; -+ float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1); -+ to_clear_mask |= float_mask; -+ maxregno = D7_VFP_REGNUM; -+ } -+ -+ /* Make sure the register used to hold the function address is not -+ cleared. */ -+ address = RTVEC_ELT (XVEC (unspec, 0), 0); -+ gcc_assert (MEM_P (address)); -+ gcc_assert (REG_P (XEXP (address, 0))); -+ to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0))); -+ -+ /* Set basic block of call insn so that df rescan is performed on -+ insns inserted here. */ -+ set_block_for_insn (insn, bb); -+ df_set_flags (DF_DEFER_INSN_RESCAN); -+ start_sequence (); -+ -+ /* Make sure the scheduler doesn't schedule other insns beyond -+ here. */ -+ emit_insn (gen_blockage ()); -+ -+ /* Walk through all arguments and clear registers appropriately. -+ */ -+ fntype = TREE_TYPE (MEM_EXPR (address)); -+ arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, -+ NULL_TREE); -+ args_so_far = pack_cumulative_args (&args_so_far_v); -+ FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter) -+ { -+ rtx arg_rtx; -+ machine_mode arg_mode = TYPE_MODE (arg_type); -+ -+ if (VOID_TYPE_P (arg_type)) -+ continue; -+ -+ if (!first_param) -+ arm_function_arg_advance (args_so_far, arg_mode, arg_type, -+ true); -+ -+ arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, -+ true); -+ gcc_assert (REG_P (arg_rtx)); -+ to_clear_mask -+ &= ~compute_not_to_clear_mask (arg_type, arg_rtx, -+ REGNO (arg_rtx), -+ padding_bits_to_clear_ptr); -+ -+ first_param = false; -+ } -+ -+ /* Clear padding bits where needed. */ -+ cleared_reg = XEXP (address, 0); -+ reg = gen_rtx_REG (SImode, IP_REGNUM); -+ using_r4 = false; -+ for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++) -+ { -+ if (padding_bits_to_clear[regno] == 0) -+ continue; -+ -+ /* If this is a Thumb-1 target copy the address of the function -+ we are calling from 'r4' into 'ip' such that we can use r4 to -+ clear the unused bits in the arguments. */ -+ if (TARGET_THUMB1 && !using_r4) -+ { -+ using_r4 = true; -+ reg = cleared_reg; -+ emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM), -+ reg); -+ } -+ -+ tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u)); -+ emit_move_insn (reg, tmp); -+ /* Also fill the top half of the negated -+ padding_bits_to_clear. */ -+ if (((~padding_bits_to_clear[regno]) >> 16) > 0) -+ { -+ tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16); -+ emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg, -+ GEN_INT (16), -+ GEN_INT (16)), -+ tmp)); -+ } -+ -+ emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno), -+ gen_rtx_REG (SImode, regno), -+ reg)); -+ -+ } -+ if (using_r4) -+ emit_move_insn (cleared_reg, -+ gen_rtx_REG (SImode, IP_REGNUM)); -+ -+ /* We use right shift and left shift to clear the LSB of the address -+ we jump to instead of using bic, to avoid having to use an extra -+ register on Thumb-1. */ -+ tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx); -+ emit_insn (gen_rtx_SET (cleared_reg, tmp)); -+ tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx); -+ emit_insn (gen_rtx_SET (cleared_reg, tmp)); -+ -+ /* Clearing all registers that leak before doing a non-secure -+ call. */ -+ for (regno = R0_REGNUM; regno <= maxregno; regno++) -+ { -+ if (!(to_clear_mask & (1LL << regno))) -+ continue; -+ -+ /* If regno is an even vfp register and its successor is also to -+ be cleared, use vmov. */ -+ if (IS_VFP_REGNUM (regno)) -+ { -+ if (TARGET_VFP_DOUBLE -+ && VFP_REGNO_OK_FOR_DOUBLE (regno) -+ && to_clear_mask & (1LL << (regno + 1))) -+ emit_move_insn (gen_rtx_REG (DFmode, regno++), -+ CONST0_RTX (DFmode)); -+ else -+ emit_move_insn (gen_rtx_REG (SFmode, regno), -+ CONST0_RTX (SFmode)); -+ } -+ else -+ emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg); -+ } -+ -+ seq = get_insns (); -+ end_sequence (); -+ emit_insn_before (seq, insn); -+ -+ } -+ } -+} -+ - /* Rewrite move insn into subtract of 0 if the condition codes will - be useful in next conditional jump insn. */ - -@@ -17569,6 +17149,8 @@ arm_reorg (void) - HOST_WIDE_INT address = 0; - Mfix * fix; - -+ if (use_cmse) -+ cmse_nonsecure_call_clear_caller_saved (); - if (TARGET_THUMB1) - thumb1_reorg (); - else if (TARGET_THUMB2) -@@ -17941,6 +17523,23 @@ vfp_emit_fstmd (int base_reg, int count) - return count * 8; - } - -+/* Returns true if -mcmse has been passed and the function pointed to by 'addr' -+ has the cmse_nonsecure_call attribute and returns false otherwise. */ -+ -+bool -+detect_cmse_nonsecure_call (tree addr) -+{ -+ if (!addr) -+ return FALSE; -+ -+ tree fntype = TREE_TYPE (addr); -+ if (use_cmse && lookup_attribute ("cmse_nonsecure_call", -+ TYPE_ATTRIBUTES (fntype))) -+ return TRUE; -+ return FALSE; -+} -+ -+ - /* Emit a call instruction with pattern PAT. ADDR is the address of - the call target. */ - -@@ -18600,6 +18199,8 @@ output_move_vfp (rtx *operands) - rtx reg, mem, addr, ops[2]; - int load = REG_P (operands[0]); - int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8; -+ int sp = (!TARGET_VFP_FP16INST -+ || GET_MODE_SIZE (GET_MODE (operands[0])) == 4); - int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT; - const char *templ; - char buff[50]; -@@ -18612,8 +18213,10 @@ output_move_vfp (rtx *operands) - - gcc_assert (REG_P (reg)); - gcc_assert (IS_VFP_REGNUM (REGNO (reg))); -- gcc_assert (mode == SFmode -+ gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT) -+ || mode == SFmode - || mode == DFmode -+ || mode == HImode - || mode == SImode - || mode == DImode - || (TARGET_NEON && VALID_NEON_DREG_MODE (mode))); -@@ -18644,7 +18247,7 @@ output_move_vfp (rtx *operands) - - sprintf (buff, templ, - load ? "ld" : "st", -- dp ? "64" : "32", -+ dp ? "64" : sp ? "32" : "16", - dp ? "P" : "", - integer_p ? "\t%@ int" : ""); - output_asm_insn (buff, ops); -@@ -19070,7 +18673,8 @@ shift_op (rtx op, HOST_WIDE_INT *amountp) - return NULL; - } - -- *amountp = int_log2 (*amountp); -+ *amountp = exact_log2 (*amountp); -+ gcc_assert (IN_RANGE (*amountp, 0, 31)); - return ARM_LSL_NAME; - - default: -@@ -19102,22 +18706,6 @@ shift_op (rtx op, HOST_WIDE_INT *amountp) - return mnem; - } - --/* Obtain the shift from the POWER of two. */ -- --static HOST_WIDE_INT --int_log2 (HOST_WIDE_INT power) --{ -- HOST_WIDE_INT shift = 0; -- -- while ((((HOST_WIDE_INT) 1 << shift) & power) == 0) -- { -- gcc_assert (shift <= 31); -- shift++; -- } -- -- return shift; --} -- - /* Output a .ascii pseudo-op, keeping track of lengths. This is - because /bin/as is horribly restrictive. The judgement about - whether or not each character is 'printable' (and can be output as -@@ -19474,7 +19062,7 @@ arm_get_vfp_saved_size (void) - - saved = 0; - /* Space for saved VFP registers. */ -- if (TARGET_HARD_FLOAT && TARGET_VFP) -+ if (TARGET_HARD_FLOAT) - { - count = 0; - for (regno = FIRST_VFP_REGNUM; -@@ -19563,6 +19151,7 @@ output_return_instruction (rtx operand, bool really_return, bool reverse, - (e.g. interworking) then we can load the return address - directly into the PC. Otherwise we must load it into LR. */ - if (really_return -+ && !IS_CMSE_ENTRY (func_type) - && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK)) - return_reg = reg_names[PC_REGNUM]; - else -@@ -19703,18 +19292,93 @@ output_return_instruction (rtx operand, bool really_return, bool reverse, - break; - - default: -+ if (IS_CMSE_ENTRY (func_type)) -+ { -+ /* Check if we have to clear the 'GE bits' which is only used if -+ parallel add and subtraction instructions are available. */ -+ if (TARGET_INT_SIMD) -+ snprintf (instr, sizeof (instr), -+ "msr%s\tAPSR_nzcvqg, %%|lr", conditional); -+ else -+ snprintf (instr, sizeof (instr), -+ "msr%s\tAPSR_nzcvq, %%|lr", conditional); -+ -+ output_asm_insn (instr, & operand); -+ if (TARGET_HARD_FLOAT && !TARGET_THUMB1) -+ { -+ /* Clear the cumulative exception-status bits (0-4,7) and the -+ condition code bits (28-31) of the FPSCR. We need to -+ remember to clear the first scratch register used (IP) and -+ save and restore the second (r4). */ -+ snprintf (instr, sizeof (instr), "push\t{%%|r4}"); -+ output_asm_insn (instr, & operand); -+ snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr"); -+ output_asm_insn (instr, & operand); -+ snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376"); -+ output_asm_insn (instr, & operand); -+ snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095"); -+ output_asm_insn (instr, & operand); -+ snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4"); -+ output_asm_insn (instr, & operand); -+ snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip"); -+ output_asm_insn (instr, & operand); -+ snprintf (instr, sizeof (instr), "pop\t{%%|r4}"); -+ output_asm_insn (instr, & operand); -+ snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr"); -+ output_asm_insn (instr, & operand); -+ } -+ snprintf (instr, sizeof (instr), "bxns\t%%|lr"); -+ } - /* Use bx if it's available. */ -- if (arm_arch5 || arm_arch4t) -+ else if (arm_arch5 || arm_arch4t) - sprintf (instr, "bx%s\t%%|lr", conditional); - else - sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional); - break; - } - -- output_asm_insn (instr, & operand); -+ output_asm_insn (instr, & operand); -+ } -+ -+ return ""; -+} -+ -+/* Output in FILE asm statements needed to declare the NAME of the function -+ defined by its DECL node. */ -+ -+void -+arm_asm_declare_function_name (FILE *file, const char *name, tree decl) -+{ -+ size_t cmse_name_len; -+ char *cmse_name = 0; -+ char cmse_prefix[] = "__acle_se_"; -+ -+ /* When compiling with ARMv8-M Security Extensions enabled, we should print an -+ extra function label for each function with the 'cmse_nonsecure_entry' -+ attribute. This extra function label should be prepended with -+ '__acle_se_', telling the linker that it needs to create secure gateway -+ veneers for this function. */ -+ if (use_cmse && lookup_attribute ("cmse_nonsecure_entry", -+ DECL_ATTRIBUTES (decl))) -+ { -+ cmse_name_len = sizeof (cmse_prefix) + strlen (name); -+ cmse_name = XALLOCAVEC (char, cmse_name_len); -+ snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name); -+ targetm.asm_out.globalize_label (file, cmse_name); -+ -+ ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl); -+ ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function"); - } - -- return ""; -+ ARM_DECLARE_FUNCTION_NAME (file, name, decl); -+ ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function"); -+ ASM_DECLARE_RESULT (file, DECL_RESULT (decl)); -+ ASM_OUTPUT_LABEL (file, name); -+ -+ if (cmse_name) -+ ASM_OUTPUT_LABEL (file, cmse_name); -+ -+ ARM_OUTPUT_FN_UNWIND (file, TRUE); - } - - /* Write the function name into the code section, directly preceding -@@ -19766,10 +19430,6 @@ arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size) - { - unsigned long func_type; - -- /* ??? Do we want to print some of the below anyway? */ -- if (TARGET_THUMB1) -- return; -- - /* Sanity check. */ - gcc_assert (!arm_ccfsm_state && !arm_target_insn); - -@@ -19804,6 +19464,8 @@ arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size) - asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n"); - if (IS_STACKALIGN (func_type)) - asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n"); -+ if (IS_CMSE_ENTRY (func_type)) -+ asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n"); - - asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n", - crtl->args.size, -@@ -20473,7 +20135,7 @@ arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg) - REG_NOTES (par) = dwarf; - - /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */ -- if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM) -+ if (REGNO (base_reg) == IP_REGNUM) - { - RTX_FRAME_RELATED_P (par) = 1; - add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx); -@@ -20934,7 +20596,7 @@ arm_get_frame_offsets (void) - func_type = arm_current_func_type (); - /* Space for saved VFP registers. */ - if (! IS_VOLATILE (func_type) -- && TARGET_HARD_FLOAT && TARGET_VFP) -+ && TARGET_HARD_FLOAT) - saved += arm_get_vfp_saved_size (); - } - else /* TARGET_THUMB1 */ -@@ -21155,7 +20817,7 @@ arm_save_coproc_regs(void) - saved_size += 8; - } - -- if (TARGET_HARD_FLOAT && TARGET_VFP) -+ if (TARGET_HARD_FLOAT) - { - start_reg = FIRST_VFP_REGNUM; - -@@ -22941,6 +22603,8 @@ maybe_get_arm_condition_code (rtx comparison) - { - case LTU: return ARM_CS; - case GEU: return ARM_CC; -+ case NE: return ARM_CS; -+ case EQ: return ARM_CC; - default: return ARM_NV; - } - -@@ -22966,6 +22630,14 @@ maybe_get_arm_condition_code (rtx comparison) - default: return ARM_NV; - } - -+ case CC_Vmode: -+ switch (comp_code) -+ { -+ case NE: return ARM_VS; -+ case EQ: return ARM_VC; -+ default: return ARM_NV; -+ } -+ - case CCmode: - switch (comp_code) - { -@@ -23396,7 +23068,7 @@ arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode) - { - if (GET_MODE_CLASS (mode) == MODE_CC) - return (regno == CC_REGNUM -- || (TARGET_HARD_FLOAT && TARGET_VFP -+ || (TARGET_HARD_FLOAT - && regno == VFPCC_REGNUM)); - - if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC) -@@ -23410,8 +23082,7 @@ arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode) - start of an even numbered register pair. */ - return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM); - -- if (TARGET_HARD_FLOAT && TARGET_VFP -- && IS_VFP_REGNUM (regno)) -+ if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno)) - { - if (mode == SFmode || mode == SImode) - return VFP_REGNO_OK_FOR_SINGLE (regno); -@@ -23419,10 +23090,12 @@ arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode) - if (mode == DFmode) - return VFP_REGNO_OK_FOR_DOUBLE (regno); - -- /* VFP registers can hold HFmode values, but there is no point in -- putting them there unless we have hardware conversion insns. */ - if (mode == HFmode) -- return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno); -+ return VFP_REGNO_OK_FOR_SINGLE (regno); -+ -+ /* VFP registers can hold HImode values. */ -+ if (mode == HImode) -+ return VFP_REGNO_OK_FOR_SINGLE (regno); - - if (TARGET_NEON) - return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno)) -@@ -23626,26 +23299,6 @@ arm_debugger_arg_offset (int value, rtx addr) - return value; - } - --/* Implement TARGET_INVALID_PARAMETER_TYPE. */ -- --static const char * --arm_invalid_parameter_type (const_tree t) --{ -- if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) -- return N_("function parameters cannot have __fp16 type"); -- return NULL; --} -- --/* Implement TARGET_INVALID_PARAMETER_TYPE. */ -- --static const char * --arm_invalid_return_type (const_tree t) --{ -- if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) -- return N_("functions cannot return __fp16 type"); -- return NULL; --} -- - /* Implement TARGET_PROMOTED_TYPE. */ - - static tree -@@ -23885,8 +23538,8 @@ thumb_pop (FILE *f, unsigned long mask) - if (mask & (1 << PC_REGNUM)) - { - /* Catch popping the PC. */ -- if (TARGET_INTERWORK || TARGET_BACKTRACE -- || crtl->calls_eh_return) -+ if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return -+ || IS_CMSE_ENTRY (arm_current_func_type ())) - { - /* The PC is never poped directly, instead - it is popped into r3 and then BX is used. */ -@@ -23947,7 +23600,14 @@ thumb_exit (FILE *f, int reg_containing_return_addr) - if (crtl->calls_eh_return) - asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM); - -- asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr); -+ if (IS_CMSE_ENTRY (arm_current_func_type ())) -+ { -+ asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", -+ reg_containing_return_addr); -+ asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr); -+ } -+ else -+ asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr); - return; - } - /* Otherwise if we are not supporting interworking and we have not created -@@ -23956,7 +23616,8 @@ thumb_exit (FILE *f, int reg_containing_return_addr) - else if (!TARGET_INTERWORK - && !TARGET_BACKTRACE - && !is_called_in_ARM_mode (current_function_decl) -- && !crtl->calls_eh_return) -+ && !crtl->calls_eh_return -+ && !IS_CMSE_ENTRY (arm_current_func_type ())) - { - asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM); - return; -@@ -24179,7 +23840,21 @@ thumb_exit (FILE *f, int reg_containing_return_addr) - asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM); - - /* Return to caller. */ -- asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr); -+ if (IS_CMSE_ENTRY (arm_current_func_type ())) -+ { -+ /* This is for the cases where LR is not being used to contain the return -+ address. It may therefore contain information that we might not want -+ to leak, hence it must be cleared. The value in R0 will never be a -+ secret at this point, so it is safe to use it, see the clearing code -+ in 'cmse_nonsecure_entry_clear_before_return'. */ -+ if (reg_containing_return_addr != LR_REGNUM) -+ asm_fprintf (f, "\tmov\tlr, r0\n"); -+ -+ asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr); -+ asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr); -+ } -+ else -+ asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr); - } - - /* Scan INSN just before assembler is output for it. -@@ -25044,6 +24719,149 @@ thumb1_expand_prologue (void) - cfun->machine->lr_save_eliminated = 0; - } - -+/* Clear caller saved registers not used to pass return values and leaked -+ condition flags before exiting a cmse_nonsecure_entry function. */ -+ -+void -+cmse_nonsecure_entry_clear_before_return (void) -+{ -+ uint64_t to_clear_mask[2]; -+ uint32_t padding_bits_to_clear = 0; -+ uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear; -+ int regno, maxregno = IP_REGNUM; -+ tree result_type; -+ rtx result_rtl; -+ -+ to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1; -+ to_clear_mask[0] |= (1ULL << IP_REGNUM); -+ -+ /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP -+ registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold -+ to make sure the instructions used to clear them are present. */ -+ if (TARGET_HARD_FLOAT && !TARGET_THUMB1) -+ { -+ uint64_t float_mask = (1ULL << (D7_VFP_REGNUM + 1)) - 1; -+ maxregno = LAST_VFP_REGNUM; -+ -+ float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1); -+ to_clear_mask[0] |= float_mask; -+ -+ float_mask = (1ULL << (maxregno - 63)) - 1; -+ to_clear_mask[1] = float_mask; -+ -+ /* Make sure we don't clear the two scratch registers used to clear the -+ relevant FPSCR bits in output_return_instruction. */ -+ emit_use (gen_rtx_REG (SImode, IP_REGNUM)); -+ to_clear_mask[0] &= ~(1ULL << IP_REGNUM); -+ emit_use (gen_rtx_REG (SImode, 4)); -+ to_clear_mask[0] &= ~(1ULL << 4); -+ } -+ -+ /* If the user has defined registers to be caller saved, these are no longer -+ restored by the function before returning and must thus be cleared for -+ security purposes. */ -+ for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++) -+ { -+ /* We do not touch registers that can be used to pass arguments as per -+ the AAPCS, since these should never be made callee-saved by user -+ options. */ -+ if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM)) -+ continue; -+ if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM)) -+ continue; -+ if (call_used_regs[regno]) -+ to_clear_mask[regno / 64] |= (1ULL << (regno % 64)); -+ } -+ -+ /* Make sure we do not clear the registers used to return the result in. */ -+ result_type = TREE_TYPE (DECL_RESULT (current_function_decl)); -+ if (!VOID_TYPE_P (result_type)) -+ { -+ result_rtl = arm_function_value (result_type, current_function_decl, 0); -+ -+ /* No need to check that we return in registers, because we don't -+ support returning on stack yet. */ -+ to_clear_mask[0] -+ &= ~compute_not_to_clear_mask (result_type, result_rtl, 0, -+ padding_bits_to_clear_ptr); -+ } -+ -+ if (padding_bits_to_clear != 0) -+ { -+ rtx reg_rtx; -+ /* Padding bits to clear is not 0 so we know we are dealing with -+ returning a composite type, which only uses r0. Let's make sure that -+ r1-r3 is cleared too, we will use r1 as a scratch register. */ -+ gcc_assert ((to_clear_mask[0] & 0xe) == 0xe); -+ -+ reg_rtx = gen_rtx_REG (SImode, R1_REGNUM); -+ -+ /* Fill the lower half of the negated padding_bits_to_clear. */ -+ emit_move_insn (reg_rtx, -+ GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u))); -+ -+ /* Also fill the top half of the negated padding_bits_to_clear. */ -+ if (((~padding_bits_to_clear) >> 16) > 0) -+ emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx, -+ GEN_INT (16), -+ GEN_INT (16)), -+ GEN_INT ((~padding_bits_to_clear) >> 16))); -+ -+ emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM), -+ gen_rtx_REG (SImode, R0_REGNUM), -+ reg_rtx)); -+ } -+ -+ for (regno = R0_REGNUM; regno <= maxregno; regno++) -+ { -+ if (!(to_clear_mask[regno / 64] & (1ULL << (regno % 64)))) -+ continue; -+ -+ if (IS_VFP_REGNUM (regno)) -+ { -+ /* If regno is an even vfp register and its successor is also to -+ be cleared, use vmov. */ -+ if (TARGET_VFP_DOUBLE -+ && VFP_REGNO_OK_FOR_DOUBLE (regno) -+ && to_clear_mask[regno / 64] & (1ULL << ((regno % 64) + 1))) -+ { -+ emit_move_insn (gen_rtx_REG (DFmode, regno), -+ CONST1_RTX (DFmode)); -+ emit_use (gen_rtx_REG (DFmode, regno)); -+ regno++; -+ } -+ else -+ { -+ emit_move_insn (gen_rtx_REG (SFmode, regno), -+ CONST1_RTX (SFmode)); -+ emit_use (gen_rtx_REG (SFmode, regno)); -+ } -+ } -+ else -+ { -+ if (TARGET_THUMB1) -+ { -+ if (regno == R0_REGNUM) -+ emit_move_insn (gen_rtx_REG (SImode, regno), -+ const0_rtx); -+ else -+ /* R0 has either been cleared before, see code above, or it -+ holds a return value, either way it is not secret -+ information. */ -+ emit_move_insn (gen_rtx_REG (SImode, regno), -+ gen_rtx_REG (SImode, R0_REGNUM)); -+ emit_use (gen_rtx_REG (SImode, regno)); -+ } -+ else -+ { -+ emit_move_insn (gen_rtx_REG (SImode, regno), -+ gen_rtx_REG (SImode, LR_REGNUM)); -+ emit_use (gen_rtx_REG (SImode, regno)); -+ } -+ } -+ } -+} -+ - /* Generate pattern *pop_multiple_with_stack_update_and_return if single - POP instruction can be generated. LR should be replaced by PC. All - the checks required are already done by USE_RETURN_INSN (). Hence, -@@ -25065,6 +24883,12 @@ thumb2_expand_return (bool simple_return) - - if (!simple_return && saved_regs_mask) - { -+ /* TODO: Verify that this path is never taken for cmse_nonsecure_entry -+ functions or adapt code to handle according to ACLE. This path should -+ not be reachable for cmse_nonsecure_entry functions though we prefer -+ to assert it for now to ensure that future code changes do not silently -+ change this behavior. */ -+ gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ())); - if (num_regs == 1) - { - rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2)); -@@ -25087,6 +24911,8 @@ thumb2_expand_return (bool simple_return) - } - else - { -+ if (IS_CMSE_ENTRY (arm_current_func_type ())) -+ cmse_nonsecure_entry_clear_before_return (); - emit_jump_insn (simple_return_rtx); - } - } -@@ -25145,6 +24971,10 @@ thumb1_expand_epilogue (void) - - if (! df_regs_ever_live_p (LR_REGNUM)) - emit_use (gen_rtx_REG (SImode, LR_REGNUM)); -+ -+ /* Clear all caller-saved regs that are not used to return. */ -+ if (IS_CMSE_ENTRY (arm_current_func_type ())) -+ cmse_nonsecure_entry_clear_before_return (); - } - - /* Epilogue code for APCS frame. */ -@@ -25179,7 +25009,7 @@ arm_expand_epilogue_apcs_frame (bool really_return) - floats_from_frame += 4; - } - -- if (TARGET_HARD_FLOAT && TARGET_VFP) -+ if (TARGET_HARD_FLOAT) - { - int start_reg; - rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM); -@@ -25425,7 +25255,7 @@ arm_expand_epilogue (bool really_return) - } - } - -- if (TARGET_HARD_FLOAT && TARGET_VFP) -+ if (TARGET_HARD_FLOAT) - { - /* Generate VFP register multi-pop. */ - int end_reg = LAST_VFP_REGNUM + 1; -@@ -25482,6 +25312,7 @@ arm_expand_epilogue (bool really_return) - - if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED - && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL) -+ && !IS_CMSE_ENTRY (func_type) - && !IS_STACKALIGN (func_type) - && really_return - && crtl->args.pretend_args_size == 0 -@@ -25578,6 +25409,14 @@ arm_expand_epilogue (bool really_return) - stack_pointer_rtx, stack_pointer_rtx); - } - -+ /* Clear all caller-saved regs that are not used to return. */ -+ if (IS_CMSE_ENTRY (arm_current_func_type ())) -+ { -+ /* CMSE_ENTRY always returns. */ -+ gcc_assert (really_return); -+ cmse_nonsecure_entry_clear_before_return (); -+ } -+ - if (!really_return) - return; - -@@ -25874,13 +25713,6 @@ thumb_reload_out_hi (rtx *operands) - emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2])); - } - --/* Handle reading a half-word from memory during reload. */ --void --thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED) --{ -- gcc_unreachable (); --} -- - /* Return the length of a function name prefix - that starts with the character 'c'. */ - static int -@@ -25950,46 +25782,55 @@ arm_emit_eabi_attribute (const char *name, int num, int val) - void - arm_print_tune_info (void) - { -- asm_fprintf (asm_out_file, "\t@.tune parameters\n"); -- asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n", -+ asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n"); -+ asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n", - current_tune->constant_limit); -- asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n", -- current_tune->max_insns_skipped); -- asm_fprintf (asm_out_file, "\t\t@prefetch.num_slots:\t%d\n", -- current_tune->prefetch.num_slots); -- asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_size:\t%d\n", -+ asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START -+ "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped); -+ asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START -+ "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots); -+ asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START -+ "prefetch.l1_cache_size:\t%d\n", - current_tune->prefetch.l1_cache_size); -- asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_line_size:\t%d\n", -+ asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START -+ "prefetch.l1_cache_line_size:\t%d\n", - current_tune->prefetch.l1_cache_line_size); -- asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n", -+ asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START -+ "prefer_constant_pool:\t%d\n", - (int) current_tune->prefer_constant_pool); -- asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n"); -- asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n"); -- asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n", -+ asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START -+ "branch_cost:\t(s:speed, p:predictable)\n"); -+ asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n"); -+ asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n", - current_tune->branch_cost (false, false)); -- asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n", -+ asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n", - current_tune->branch_cost (false, true)); -- asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n", -+ asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n", - current_tune->branch_cost (true, false)); -- asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n", -+ asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n", - current_tune->branch_cost (true, true)); -- asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n", -+ asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START -+ "prefer_ldrd_strd:\t%d\n", - (int) current_tune->prefer_ldrd_strd); -- asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n", -+ asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START -+ "logical_op_non_short_circuit:\t[%d,%d]\n", - (int) current_tune->logical_op_non_short_circuit_thumb, - (int) current_tune->logical_op_non_short_circuit_arm); -- asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n", -+ asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START -+ "prefer_neon_for_64bits:\t%d\n", - (int) current_tune->prefer_neon_for_64bits); -- asm_fprintf (asm_out_file, -- "\t\t@disparage_flag_setting_t16_encodings:\t%d\n", -+ asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START -+ "disparage_flag_setting_t16_encodings:\t%d\n", - (int) current_tune->disparage_flag_setting_t16_encodings); -- asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n", -+ asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START -+ "string_ops_prefer_neon:\t%d\n", - (int) current_tune->string_ops_prefer_neon); -- asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n", -+ asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START -+ "max_insns_inline_memset:\t%d\n", - current_tune->max_insns_inline_memset); -- asm_fprintf (asm_out_file, "\t\t@fusible_ops:\t%u\n", -+ asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n", - current_tune->fusible_ops); -- asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n", -+ asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n", - (int) current_tune->sched_autopref); - } - -@@ -26018,7 +25859,7 @@ arm_file_start (void) - const char* pos = strchr (arm_selected_arch->name, '+'); - if (pos) - { -- char buf[15]; -+ char buf[32]; - gcc_assert (strlen (arm_selected_arch->name) - <= sizeof (buf) / sizeof (*pos)); - strncpy (buf, arm_selected_arch->name, -@@ -26043,7 +25884,7 @@ arm_file_start (void) - if (print_tune_info) - arm_print_tune_info (); - -- if (! TARGET_SOFT_FLOAT && TARGET_VFP) -+ if (! TARGET_SOFT_FLOAT) - { - if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE) - arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1); -@@ -26160,11 +26001,10 @@ arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno) - - /* Output code to add DELTA to the first argument, and then jump - to FUNCTION. Used for C++ multiple inheritance. */ -+ - static void --arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, -- HOST_WIDE_INT delta, -- HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED, -- tree function) -+arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta, -+ HOST_WIDE_INT, tree function) - { - static int thunk_label = 0; - char label[256]; -@@ -26305,6 +26145,76 @@ arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, - final_end_function (); - } - -+/* MI thunk handling for TARGET_32BIT. */ -+ -+static void -+arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta, -+ HOST_WIDE_INT vcall_offset, tree function) -+{ -+ /* On ARM, this_regno is R0 or R1 depending on -+ whether the function returns an aggregate or not. -+ */ -+ int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), -+ function) -+ ? R1_REGNUM : R0_REGNUM); -+ -+ rtx temp = gen_rtx_REG (Pmode, IP_REGNUM); -+ rtx this_rtx = gen_rtx_REG (Pmode, this_regno); -+ reload_completed = 1; -+ emit_note (NOTE_INSN_PROLOGUE_END); -+ -+ /* Add DELTA to THIS_RTX. */ -+ if (delta != 0) -+ arm_split_constant (PLUS, Pmode, NULL_RTX, -+ delta, this_rtx, this_rtx, false); -+ -+ /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */ -+ if (vcall_offset != 0) -+ { -+ /* Load *THIS_RTX. */ -+ emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx)); -+ /* Compute *THIS_RTX + VCALL_OFFSET. */ -+ arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp, -+ false); -+ /* Compute *(*THIS_RTX + VCALL_OFFSET). */ -+ emit_move_insn (temp, gen_rtx_MEM (Pmode, temp)); -+ emit_insn (gen_add3_insn (this_rtx, this_rtx, temp)); -+ } -+ -+ /* Generate a tail call to the target function. */ -+ if (!TREE_USED (function)) -+ { -+ assemble_external (function); -+ TREE_USED (function) = 1; -+ } -+ rtx funexp = XEXP (DECL_RTL (function), 0); -+ funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); -+ rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX)); -+ SIBLING_CALL_P (insn) = 1; -+ -+ insn = get_insns (); -+ shorten_branches (insn); -+ final_start_function (insn, file, 1); -+ final (insn, file, 1); -+ final_end_function (); -+ -+ /* Stop pretending this is a post-reload pass. */ -+ reload_completed = 0; -+} -+ -+/* Output code to add DELTA to the first argument, and then jump -+ to FUNCTION. Used for C++ multiple inheritance. */ -+ -+static void -+arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta, -+ HOST_WIDE_INT vcall_offset, tree function) -+{ -+ if (TARGET_32BIT) -+ arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function); -+ else -+ arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function); -+} -+ - int - arm_emit_vector_const (FILE *file, rtx x) - { -@@ -27543,7 +27453,7 @@ arm_mangle_type (const_tree type) - static const int thumb_core_reg_alloc_order[] = - { - 3, 2, 1, 0, 4, 5, 6, 7, -- 14, 12, 8, 9, 10, 11 -+ 12, 14, 8, 9, 10, 11 - }; - - /* Adjust register allocation order when compiling for Thumb. */ -@@ -27689,7 +27599,7 @@ arm_conditional_register_usage (void) - if (TARGET_THUMB1) - fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1; - -- if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP) -+ if (TARGET_32BIT && TARGET_HARD_FLOAT) - { - /* VFPv3 registers are disabled when earlier VFP - versions are selected due to the definition of -@@ -27760,7 +27670,7 @@ arm_preferred_rename_class (reg_class_t rclass) - return NO_REGS; - } - --/* Compute the atrribute "length" of insn "*push_multi". -+/* Compute the attribute "length" of insn "*push_multi". - So this function MUST be kept in sync with that insn pattern. */ - int - arm_attr_length_push_multi(rtx parallel_op, rtx first_op) -@@ -27777,6 +27687,11 @@ arm_attr_length_push_multi(rtx parallel_op, rtx first_op) - - /* Thumb2 mode. */ - regno = REGNO (first_op); -+ /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register -+ list is 8-bit. Normally this means all registers in the list must be -+ LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit -+ encodings. There is one exception for PUSH that LR in HI_REGS can be used -+ with 16-bit encoding. */ - hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM); - for (i = 1; i < num_saves && !hi_reg; i++) - { -@@ -27789,6 +27704,56 @@ arm_attr_length_push_multi(rtx parallel_op, rtx first_op) - return 4; - } - -+/* Compute the attribute "length" of insn. Currently, this function is used -+ for "*load_multiple_with_writeback", "*pop_multiple_with_return" and -+ "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL -+ rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is -+ true if OPERANDS contains insn which explicit updates base register. */ -+ -+int -+arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p) -+{ -+ /* ARM mode. */ -+ if (TARGET_ARM) -+ return 4; -+ /* Thumb1 mode. */ -+ if (TARGET_THUMB1) -+ return 2; -+ -+ rtx parallel_op = operands[0]; -+ /* Initialize to elements number of PARALLEL. */ -+ unsigned indx = XVECLEN (parallel_op, 0) - 1; -+ /* Initialize the value to base register. */ -+ unsigned regno = REGNO (operands[1]); -+ /* Skip return and write back pattern. -+ We only need register pop pattern for later analysis. */ -+ unsigned first_indx = 0; -+ first_indx += return_pc ? 1 : 0; -+ first_indx += write_back_p ? 1 : 0; -+ -+ /* A pop operation can be done through LDM or POP. If the base register is SP -+ and if it's with write back, then a LDM will be alias of POP. */ -+ bool pop_p = (regno == SP_REGNUM && write_back_p); -+ bool ldm_p = !pop_p; -+ -+ /* Check base register for LDM. */ -+ if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS) -+ return 4; -+ -+ /* Check each register in the list. */ -+ for (; indx >= first_indx; indx--) -+ { -+ regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0)); -+ /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar -+ comment in arm_attr_length_push_multi. */ -+ if (REGNO_REG_CLASS (regno) == HI_REGS -+ && (regno != PC_REGNUM || ldm_p)) -+ return 4; -+ } -+ -+ return 2; -+} -+ - /* Compute the number of instructions emitted by output_move_double. */ - int - arm_count_output_move_double_insns (rtx *operands) -@@ -27820,7 +27785,11 @@ vfp3_const_double_for_fract_bits (rtx operand) - HOST_WIDE_INT value = real_to_integer (&r0); - value = value & 0xffffffff; - if ((value != 0) && ( (value & (value - 1)) == 0)) -- return int_log2 (value); -+ { -+ int ret = exact_log2 (value); -+ gcc_assert (IN_RANGE (ret, 0, 31)); -+ return ret; -+ } - } - } - return 0; -@@ -27960,9 +27929,9 @@ emit_unlikely_jump (rtx insn) - void - arm_expand_compare_and_swap (rtx operands[]) - { -- rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x; -+ rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x; - machine_mode mode; -- rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx); -+ rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx); - - bval = operands[0]; - rval = operands[1]; -@@ -28019,43 +27988,54 @@ arm_expand_compare_and_swap (rtx operands[]) - gcc_unreachable (); - } - -- emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f)); -+ bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CCmode, CC_REGNUM); -+ emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f)); - - if (mode == QImode || mode == HImode) - emit_move_insn (operands[1], gen_lowpart (mode, rval)); - - /* In all cases, we arrange for success to be signaled by Z set. - This arrangement allows for the boolean result to be used directly -- in a subsequent branch, post optimization. */ -- x = gen_rtx_REG (CCmode, CC_REGNUM); -- x = gen_rtx_EQ (SImode, x, const0_rtx); -- emit_insn (gen_rtx_SET (bval, x)); -+ in a subsequent branch, post optimization. For Thumb-1 targets, the -+ boolean negation of the result is also stored in bval because Thumb-1 -+ backend lacks dependency tracking for CC flag due to flag-setting not -+ being represented at RTL level. */ -+ if (TARGET_THUMB1) -+ emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst)); -+ else -+ { -+ x = gen_rtx_EQ (SImode, bdst, const0_rtx); -+ emit_insn (gen_rtx_SET (bval, x)); -+ } - } - - /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether - another memory store between the load-exclusive and store-exclusive can - reset the monitor from Exclusive to Open state. This means we must wait - until after reload to split the pattern, lest we get a register spill in -- the middle of the atomic sequence. */ -+ the middle of the atomic sequence. Success of the compare and swap is -+ indicated by the Z flag set for 32bit targets and by neg_bval being zero -+ for Thumb-1 targets (ie. negation of the boolean value returned by -+ atomic_compare_and_swapmode standard pattern in operand 0). */ - - void - arm_split_compare_and_swap (rtx operands[]) - { -- rtx rval, mem, oldval, newval, scratch; -+ rtx rval, mem, oldval, newval, neg_bval; - machine_mode mode; - enum memmodel mod_s, mod_f; - bool is_weak; - rtx_code_label *label1, *label2; - rtx x, cond; - -- rval = operands[0]; -- mem = operands[1]; -- oldval = operands[2]; -- newval = operands[3]; -- is_weak = (operands[4] != const0_rtx); -- mod_s = memmodel_from_int (INTVAL (operands[5])); -- mod_f = memmodel_from_int (INTVAL (operands[6])); -- scratch = operands[7]; -+ rval = operands[1]; -+ mem = operands[2]; -+ oldval = operands[3]; -+ newval = operands[4]; -+ is_weak = (operands[5] != const0_rtx); -+ mod_s = memmodel_from_int (INTVAL (operands[6])); -+ mod_f = memmodel_from_int (INTVAL (operands[7])); -+ neg_bval = TARGET_THUMB1 ? operands[0] : operands[8]; - mode = GET_MODE (mem); - - bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s); -@@ -28087,26 +28067,44 @@ arm_split_compare_and_swap (rtx operands[]) - - arm_emit_load_exclusive (mode, rval, mem, use_acquire); - -- cond = arm_gen_compare_reg (NE, rval, oldval, scratch); -- x = gen_rtx_NE (VOIDmode, cond, const0_rtx); -- x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, -- gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); -- emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); -+ /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval, -+ as required to communicate with arm_expand_compare_and_swap. */ -+ if (TARGET_32BIT) -+ { -+ cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval); -+ x = gen_rtx_NE (VOIDmode, cond, const0_rtx); -+ x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, -+ gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); -+ emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); -+ } -+ else -+ { -+ emit_move_insn (neg_bval, const1_rtx); -+ cond = gen_rtx_NE (VOIDmode, rval, oldval); -+ if (thumb1_cmpneg_operand (oldval, SImode)) -+ emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval, -+ label2, cond)); -+ else -+ emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2)); -+ } - -- arm_emit_store_exclusive (mode, scratch, mem, newval, use_release); -+ arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release); - - /* Weak or strong, we want EQ to be true for success, so that we - match the flags that we got from the compare above. */ -- cond = gen_rtx_REG (CCmode, CC_REGNUM); -- x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx); -- emit_insn (gen_rtx_SET (cond, x)); -+ if (TARGET_32BIT) -+ { -+ cond = gen_rtx_REG (CCmode, CC_REGNUM); -+ x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx); -+ emit_insn (gen_rtx_SET (cond, x)); -+ } - - if (!is_weak) - { -- x = gen_rtx_NE (VOIDmode, cond, const0_rtx); -- x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, -- gen_rtx_LABEL_REF (Pmode, label1), pc_rtx); -- emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); -+ /* Z is set to boolean value of !neg_bval, as required to communicate -+ with arm_expand_compare_and_swap. */ -+ x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx); -+ emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1)); - } - - if (!is_mm_relaxed (mod_f)) -@@ -28121,6 +28119,15 @@ arm_split_compare_and_swap (rtx operands[]) - emit_label (label2); - } - -+/* Split an atomic operation pattern. Operation is given by CODE and is one -+ of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand -+ operation). Operation is performed on the content at MEM and on VALUE -+ following the memory model MODEL_RTX. The content at MEM before and after -+ the operation is returned in OLD_OUT and NEW_OUT respectively while the -+ success of the operation is returned in COND. Using a scratch register or -+ an operand register for these determines what result is returned for that -+ pattern. */ -+ - void - arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, - rtx value, rtx model_rtx, rtx cond) -@@ -28129,6 +28136,7 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, - machine_mode mode = GET_MODE (mem); - machine_mode wmode = (mode == DImode ? DImode : SImode); - rtx_code_label *label; -+ bool all_low_regs, bind_old_new; - rtx x; - - bool is_armv8_sync = arm_arch8 && is_mm_sync (model); -@@ -28163,6 +28171,28 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, - - arm_emit_load_exclusive (mode, old_out, mem, use_acquire); - -+ /* Does the operation require destination and first operand to use the same -+ register? This is decided by register constraints of relevant insn -+ patterns in thumb1.md. */ -+ gcc_assert (!new_out || REG_P (new_out)); -+ all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS -+ && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS -+ && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS; -+ bind_old_new = -+ (TARGET_THUMB1 -+ && code != SET -+ && code != MINUS -+ && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value)))); -+ -+ /* We want to return the old value while putting the result of the operation -+ in the same register as the old value so copy the old value over to the -+ destination register and use that register for the operation. */ -+ if (old_out && bind_old_new) -+ { -+ emit_move_insn (new_out, old_out); -+ old_out = new_out; -+ } -+ - switch (code) - { - case SET: -@@ -28377,6 +28407,8 @@ arm_evpc_neon_vuzp (struct expand_vec_perm_d *d) - case V8QImode: gen = gen_neon_vuzpv8qi_internal; break; - case V8HImode: gen = gen_neon_vuzpv8hi_internal; break; - case V4HImode: gen = gen_neon_vuzpv4hi_internal; break; -+ case V8HFmode: gen = gen_neon_vuzpv8hf_internal; break; -+ case V4HFmode: gen = gen_neon_vuzpv4hf_internal; break; - case V4SImode: gen = gen_neon_vuzpv4si_internal; break; - case V2SImode: gen = gen_neon_vuzpv2si_internal; break; - case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break; -@@ -28450,6 +28482,8 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d *d) - case V8QImode: gen = gen_neon_vzipv8qi_internal; break; - case V8HImode: gen = gen_neon_vzipv8hi_internal; break; - case V4HImode: gen = gen_neon_vzipv4hi_internal; break; -+ case V8HFmode: gen = gen_neon_vzipv8hf_internal; break; -+ case V4HFmode: gen = gen_neon_vzipv4hf_internal; break; - case V4SImode: gen = gen_neon_vzipv4si_internal; break; - case V2SImode: gen = gen_neon_vzipv2si_internal; break; - case V2SFmode: gen = gen_neon_vzipv2sf_internal; break; -@@ -28502,6 +28536,8 @@ arm_evpc_neon_vrev (struct expand_vec_perm_d *d) - case V8QImode: gen = gen_neon_vrev32v8qi; break; - case V8HImode: gen = gen_neon_vrev64v8hi; break; - case V4HImode: gen = gen_neon_vrev64v4hi; break; -+ case V8HFmode: gen = gen_neon_vrev64v8hf; break; -+ case V4HFmode: gen = gen_neon_vrev64v4hf; break; - default: - return false; - } -@@ -28585,6 +28621,8 @@ arm_evpc_neon_vtrn (struct expand_vec_perm_d *d) - case V8QImode: gen = gen_neon_vtrnv8qi_internal; break; - case V8HImode: gen = gen_neon_vtrnv8hi_internal; break; - case V4HImode: gen = gen_neon_vtrnv4hi_internal; break; -+ case V8HFmode: gen = gen_neon_vtrnv8hf_internal; break; -+ case V4HFmode: gen = gen_neon_vtrnv4hf_internal; break; - case V4SImode: gen = gen_neon_vtrnv4si_internal; break; - case V2SImode: gen = gen_neon_vtrnv2si_internal; break; - case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break; -@@ -28660,6 +28698,8 @@ arm_evpc_neon_vext (struct expand_vec_perm_d *d) - case V8HImode: gen = gen_neon_vextv8hi; break; - case V2SImode: gen = gen_neon_vextv2si; break; - case V4SImode: gen = gen_neon_vextv4si; break; -+ case V4HFmode: gen = gen_neon_vextv4hf; break; -+ case V8HFmode: gen = gen_neon_vextv8hf; break; - case V2SFmode: gen = gen_neon_vextv2sf; break; - case V4SFmode: gen = gen_neon_vextv4sf; break; - case V2DImode: gen = gen_neon_vextv2di; break; -@@ -29185,7 +29225,7 @@ arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2) - { - enum rtx_code code = GET_CODE (*comparison); - int code_int; -- machine_mode mode = (GET_MODE (*op1) == VOIDmode) -+ machine_mode mode = (GET_MODE (*op1) == VOIDmode) - ? GET_MODE (*op2) : GET_MODE (*op1); - - gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode); -@@ -29213,11 +29253,19 @@ arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2) - *op2 = force_reg (mode, *op2); - return true; - -+ case HFmode: -+ if (!TARGET_VFP_FP16INST) -+ break; -+ /* FP16 comparisons are done in SF mode. */ -+ mode = SFmode; -+ *op1 = convert_to_mode (mode, *op1, 1); -+ *op2 = convert_to_mode (mode, *op2, 1); -+ /* Fall through. */ - case SFmode: - case DFmode: -- if (!arm_float_compare_operand (*op1, mode)) -+ if (!vfp_compare_operand (*op1, mode)) - *op1 = force_reg (mode, *op1); -- if (!arm_float_compare_operand (*op2, mode)) -+ if (!vfp_compare_operand (*op2, mode)) - *op2 = force_reg (mode, *op2); - return true; - default: -@@ -29759,11 +29807,57 @@ arm_macro_fusion_p (void) - return current_tune->fusible_ops != tune_params::FUSE_NOTHING; - } - -+/* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable -+ for MOVW / MOVT macro fusion. */ -+ -+static bool -+arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set) -+{ -+ /* We are trying to fuse -+ movw imm / movt imm -+ instructions as a group that gets scheduled together. */ -+ -+ rtx set_dest = SET_DEST (curr_set); -+ -+ if (GET_MODE (set_dest) != SImode) -+ return false; -+ -+ /* We are trying to match: -+ prev (movw) == (set (reg r0) (const_int imm16)) -+ curr (movt) == (set (zero_extract (reg r0) -+ (const_int 16) -+ (const_int 16)) -+ (const_int imm16_1)) -+ or -+ prev (movw) == (set (reg r1) -+ (high (symbol_ref ("SYM")))) -+ curr (movt) == (set (reg r0) -+ (lo_sum (reg r1) -+ (symbol_ref ("SYM")))) */ -+ -+ if (GET_CODE (set_dest) == ZERO_EXTRACT) -+ { -+ if (CONST_INT_P (SET_SRC (curr_set)) -+ && CONST_INT_P (SET_SRC (prev_set)) -+ && REG_P (XEXP (set_dest, 0)) -+ && REG_P (SET_DEST (prev_set)) -+ && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set))) -+ return true; -+ -+ } -+ else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM -+ && REG_P (SET_DEST (curr_set)) -+ && REG_P (SET_DEST (prev_set)) -+ && GET_CODE (SET_SRC (prev_set)) == HIGH -+ && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set))) -+ return true; -+ -+ return false; -+} - - static bool - aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr) - { -- rtx set_dest; - rtx prev_set = single_set (prev); - rtx curr_set = single_set (curr); - -@@ -29781,54 +29875,26 @@ aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr) - && aarch_crypto_can_dual_issue (prev, curr)) - return true; - -- if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT) -- { -- /* We are trying to fuse -- movw imm / movt imm -- instructions as a group that gets scheduled together. */ -- -- set_dest = SET_DEST (curr_set); -- -- if (GET_MODE (set_dest) != SImode) -- return false; -+ if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT -+ && arm_sets_movw_movt_fusible_p (prev_set, curr_set)) -+ return true; - -- /* We are trying to match: -- prev (movw) == (set (reg r0) (const_int imm16)) -- curr (movt) == (set (zero_extract (reg r0) -- (const_int 16) -- (const_int 16)) -- (const_int imm16_1)) -- or -- prev (movw) == (set (reg r1) -- (high (symbol_ref ("SYM")))) -- curr (movt) == (set (reg r0) -- (lo_sum (reg r1) -- (symbol_ref ("SYM")))) */ -- if (GET_CODE (set_dest) == ZERO_EXTRACT) -- { -- if (CONST_INT_P (SET_SRC (curr_set)) -- && CONST_INT_P (SET_SRC (prev_set)) -- && REG_P (XEXP (set_dest, 0)) -- && REG_P (SET_DEST (prev_set)) -- && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set))) -- return true; -- } -- else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM -- && REG_P (SET_DEST (curr_set)) -- && REG_P (SET_DEST (prev_set)) -- && GET_CODE (SET_SRC (prev_set)) == HIGH -- && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set))) -- return true; -- } - return false; - } - -+/* Return true iff the instruction fusion described by OP is enabled. */ -+bool -+arm_fusion_enabled_p (tune_params::fuse_ops op) -+{ -+ return current_tune->fusible_ops & op; -+} -+ - /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ - - static unsigned HOST_WIDE_INT - arm_asan_shadow_offset (void) - { -- return (unsigned HOST_WIDE_INT) 1 << 29; -+ return HOST_WIDE_INT_1U << 29; - } - - -@@ -29853,9 +29919,9 @@ arm_const_not_ok_for_debug_p (rtx p) - && GET_CODE (XEXP (p, 0)) == SYMBOL_REF - && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0)))) - { -- if ((TREE_CODE (decl_op1) == VAR_DECL -+ if ((VAR_P (decl_op1) - || TREE_CODE (decl_op1) == CONST_DECL) -- && (TREE_CODE (decl_op0) == VAR_DECL -+ && (VAR_P (decl_op0) - || TREE_CODE (decl_op0) == CONST_DECL)) - return (get_variable_section (decl_op1, false) - != get_variable_section (decl_op0, false)); -@@ -29988,9 +30054,8 @@ arm_can_inline_p (tree caller, tree callee) - if ((caller_fpu->features & callee_fpu->features) != callee_fpu->features) - return false; - -- /* Need same model and regs. */ -- if (callee_fpu->model != caller_fpu->model -- || callee_fpu->regs != callee_fpu->regs) -+ /* Need same FPU regs. */ -+ if (callee_fpu->regs != callee_fpu->regs) - return false; - - /* OK to inline between different modes. -@@ -30333,4 +30398,113 @@ arm_sched_fusion_priority (rtx_insn *insn, int max_pri, - return; - } - -+ -+/* Construct and return a PARALLEL RTX vector with elements numbering the -+ lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of -+ the vector - from the perspective of the architecture. This does not -+ line up with GCC's perspective on lane numbers, so we end up with -+ different masks depending on our target endian-ness. The diagram -+ below may help. We must draw the distinction when building masks -+ which select one half of the vector. An instruction selecting -+ architectural low-lanes for a big-endian target, must be described using -+ a mask selecting GCC high-lanes. -+ -+ Big-Endian Little-Endian -+ -+GCC 0 1 2 3 3 2 1 0 -+ | x | x | x | x | | x | x | x | x | -+Architecture 3 2 1 0 3 2 1 0 -+ -+Low Mask: { 2, 3 } { 0, 1 } -+High Mask: { 0, 1 } { 2, 3 } -+*/ -+ -+rtx -+arm_simd_vect_par_cnst_half (machine_mode mode, bool high) -+{ -+ int nunits = GET_MODE_NUNITS (mode); -+ rtvec v = rtvec_alloc (nunits / 2); -+ int high_base = nunits / 2; -+ int low_base = 0; -+ int base; -+ rtx t1; -+ int i; -+ -+ if (BYTES_BIG_ENDIAN) -+ base = high ? low_base : high_base; -+ else -+ base = high ? high_base : low_base; -+ -+ for (i = 0; i < nunits / 2; i++) -+ RTVEC_ELT (v, i) = GEN_INT (base + i); -+ -+ t1 = gen_rtx_PARALLEL (mode, v); -+ return t1; -+} -+ -+/* Check OP for validity as a PARALLEL RTX vector with elements -+ numbering the lanes of either the high (HIGH == TRUE) or low lanes, -+ from the perspective of the architecture. See the diagram above -+ arm_simd_vect_par_cnst_half_p for more details. */ -+ -+bool -+arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode, -+ bool high) -+{ -+ rtx ideal = arm_simd_vect_par_cnst_half (mode, high); -+ HOST_WIDE_INT count_op = XVECLEN (op, 0); -+ HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0); -+ int i = 0; -+ -+ if (!VECTOR_MODE_P (mode)) -+ return false; -+ -+ if (count_op != count_ideal) -+ return false; -+ -+ for (i = 0; i < count_ideal; i++) -+ { -+ rtx elt_op = XVECEXP (op, 0, i); -+ rtx elt_ideal = XVECEXP (ideal, 0, i); -+ -+ if (!CONST_INT_P (elt_op) -+ || INTVAL (elt_ideal) != INTVAL (elt_op)) -+ return false; -+ } -+ return true; -+} -+ -+/* Can output mi_thunk for all cases except for non-zero vcall_offset -+ in Thumb1. */ -+static bool -+arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset, -+ const_tree) -+{ -+ /* For now, we punt and not handle this for TARGET_THUMB1. */ -+ if (vcall_offset && TARGET_THUMB1) -+ return false; -+ -+ /* Otherwise ok. */ -+ return true; -+} -+ -+/* Generate RTL for a conditional branch with rtx comparison CODE in -+ mode CC_MODE. The destination of the unlikely conditional branch -+ is LABEL_REF. */ -+ -+void -+arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode, -+ rtx label_ref) -+{ -+ rtx x; -+ x = gen_rtx_fmt_ee (code, VOIDmode, -+ gen_rtx_REG (cc_mode, CC_REGNUM), -+ const0_rtx); -+ -+ x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, -+ gen_rtx_LABEL_REF (VOIDmode, label_ref), -+ pc_rtx); -+ emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); -+} -+ - #include "gt-arm.h" ---- a/src/gcc/config/arm/arm.h -+++ b/src/gcc/config/arm/arm.h -@@ -80,11 +80,6 @@ extern arm_cc arm_current_cc; - extern int arm_target_label; - extern int arm_ccfsm_state; - extern GTY(()) rtx arm_target_insn; --/* The label of the current constant pool. */ --extern rtx pool_vector_label; --/* Set to 1 when a return insn is output, this means that the epilogue -- is not needed. */ --extern int return_used_this_function; - /* Callback to output language specific object attributes. */ - extern void (*arm_lang_output_object_attributes_hook)(void); - -@@ -139,7 +134,6 @@ extern void (*arm_lang_output_object_attributes_hook)(void); - #define TARGET_HARD_FLOAT (arm_float_abi != ARM_FLOAT_ABI_SOFT) - /* Use hardware floating point calling convention. */ - #define TARGET_HARD_FLOAT_ABI (arm_float_abi == ARM_FLOAT_ABI_HARD) --#define TARGET_VFP (TARGET_FPU_MODEL == ARM_FP_MODEL_VFP) - #define TARGET_IWMMXT (arm_arch_iwmmxt) - #define TARGET_IWMMXT2 (arm_arch_iwmmxt2) - #define TARGET_REALLY_IWMMXT (TARGET_IWMMXT && TARGET_32BIT) -@@ -177,50 +171,57 @@ extern void (*arm_lang_output_object_attributes_hook)(void); - to be more careful with TARGET_NEON as noted below. */ - - /* FPU is has the full VFPv3/NEON register file of 32 D registers. */ --#define TARGET_VFPD32 (TARGET_VFP && TARGET_FPU_REGS == VFP_REG_D32) -+#define TARGET_VFPD32 (TARGET_FPU_REGS == VFP_REG_D32) - - /* FPU supports VFPv3 instructions. */ --#define TARGET_VFP3 (TARGET_VFP && TARGET_FPU_REV >= 3) -+#define TARGET_VFP3 (TARGET_FPU_REV >= 3) - - /* FPU supports FPv5 instructions. */ --#define TARGET_VFP5 (TARGET_VFP && TARGET_FPU_REV >= 5) -+#define TARGET_VFP5 (TARGET_FPU_REV >= 5) - - /* FPU only supports VFP single-precision instructions. */ --#define TARGET_VFP_SINGLE (TARGET_VFP && TARGET_FPU_REGS == VFP_REG_SINGLE) -+#define TARGET_VFP_SINGLE (TARGET_FPU_REGS == VFP_REG_SINGLE) - - /* FPU supports VFP double-precision instructions. */ --#define TARGET_VFP_DOUBLE (TARGET_VFP && TARGET_FPU_REGS != VFP_REG_SINGLE) -+#define TARGET_VFP_DOUBLE (TARGET_FPU_REGS != VFP_REG_SINGLE) - - /* FPU supports half-precision floating-point with NEON element load/store. */ --#define TARGET_NEON_FP16 \ -- (TARGET_VFP \ -- && ARM_FPU_FSET_HAS (TARGET_FPU_FEATURES, FPU_FL_NEON | FPU_FL_FP16)) -+#define TARGET_NEON_FP16 \ -+ (ARM_FPU_FSET_HAS (TARGET_FPU_FEATURES, FPU_FL_NEON) \ -+ && ARM_FPU_FSET_HAS (TARGET_FPU_FEATURES, FPU_FL_FP16)) - - /* FPU supports VFP half-precision floating-point. */ - #define TARGET_FP16 \ -- (TARGET_VFP && ARM_FPU_FSET_HAS (TARGET_FPU_FEATURES, FPU_FL_FP16)) -+ (ARM_FPU_FSET_HAS (TARGET_FPU_FEATURES, FPU_FL_FP16)) - - /* FPU supports fused-multiply-add operations. */ --#define TARGET_FMA (TARGET_VFP && TARGET_FPU_REV >= 4) -+#define TARGET_FMA (TARGET_FPU_REV >= 4) - - /* FPU is ARMv8 compatible. */ --#define TARGET_FPU_ARMV8 (TARGET_VFP && TARGET_FPU_REV >= 8) -+#define TARGET_FPU_ARMV8 (TARGET_FPU_REV >= 8) - - /* FPU supports Crypto extensions. */ - #define TARGET_CRYPTO \ -- (TARGET_VFP && ARM_FPU_FSET_HAS (TARGET_FPU_FEATURES, FPU_FL_CRYPTO)) -+ (ARM_FPU_FSET_HAS (TARGET_FPU_FEATURES, FPU_FL_CRYPTO)) - - /* FPU supports Neon instructions. The setting of this macro gets - revealed via __ARM_NEON__ so we add extra guards upon TARGET_32BIT - and TARGET_HARD_FLOAT to ensure that NEON instructions are - available. */ - #define TARGET_NEON \ -- (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP \ -+ (TARGET_32BIT && TARGET_HARD_FLOAT \ - && ARM_FPU_FSET_HAS (TARGET_FPU_FEATURES, FPU_FL_NEON)) - - /* FPU supports ARMv8.1 Adv.SIMD extensions. */ - #define TARGET_NEON_RDMA (TARGET_NEON && arm_arch8_1) - -+/* FPU supports the floating point FP16 instructions for ARMv8.2 and later. */ -+#define TARGET_VFP_FP16INST \ -+ (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 && arm_fp16_inst) -+ -+/* FPU supports the AdvSIMD FP16 instructions for ARMv8.2 and later. */ -+#define TARGET_NEON_FP16INST (TARGET_VFP_FP16INST && TARGET_NEON_RDMA) -+ - /* Q-bit is present. */ - #define TARGET_ARM_QBIT \ - (TARGET_32BIT && arm_arch5e && (arm_arch_notm || arm_arch7)) -@@ -236,7 +237,7 @@ extern void (*arm_lang_output_object_attributes_hook)(void); - - /* Should MOVW/MOVT be used in preference to a constant pool. */ - #define TARGET_USE_MOVT \ -- (arm_arch_thumb2 \ -+ (TARGET_HAVE_MOVT \ - && (arm_disable_literal_pool \ - || (!optimize_size && !current_tune->prefer_constant_pool))) - -@@ -251,14 +252,18 @@ extern void (*arm_lang_output_object_attributes_hook)(void); - #define TARGET_HAVE_MEMORY_BARRIER (TARGET_HAVE_DMB || TARGET_HAVE_DMB_MCR) - - /* Nonzero if this chip supports ldrex and strex */ --#define TARGET_HAVE_LDREX ((arm_arch6 && TARGET_ARM) || arm_arch7) -+#define TARGET_HAVE_LDREX ((arm_arch6 && TARGET_ARM) \ -+ || arm_arch7 \ -+ || (arm_arch8 && !arm_arch_notm)) - - /* Nonzero if this chip supports LPAE. */ - #define TARGET_HAVE_LPAE \ - (arm_arch7 && ARM_FSET_HAS_CPU1 (insn_flags, FL_FOR_ARCH7VE)) - - /* Nonzero if this chip supports ldrex{bh} and strex{bh}. */ --#define TARGET_HAVE_LDREXBH ((arm_arch6k && TARGET_ARM) || arm_arch7) -+#define TARGET_HAVE_LDREXBH ((arm_arch6k && TARGET_ARM) \ -+ || arm_arch7 \ -+ || (arm_arch8 && !arm_arch_notm)) - - /* Nonzero if this chip supports ldrexd and strexd. */ - #define TARGET_HAVE_LDREXD (((arm_arch6k && TARGET_ARM) \ -@@ -267,9 +272,20 @@ extern void (*arm_lang_output_object_attributes_hook)(void); - /* Nonzero if this chip supports load-acquire and store-release. */ - #define TARGET_HAVE_LDACQ (TARGET_ARM_ARCH >= 8) - -+/* Nonzero if this chip supports LDAEXD and STLEXD. */ -+#define TARGET_HAVE_LDACQEXD (TARGET_ARM_ARCH >= 8 \ -+ && TARGET_32BIT \ -+ && arm_arch_notm) -+ -+/* Nonzero if this chip provides the MOVW and MOVT instructions. */ -+#define TARGET_HAVE_MOVT (arm_arch_thumb2 || arm_arch8) -+ -+/* Nonzero if this chip provides the CBZ and CBNZ instructions. */ -+#define TARGET_HAVE_CBZ (arm_arch_thumb2 || arm_arch8) -+ - /* Nonzero if integer division instructions supported. */ - #define TARGET_IDIV ((TARGET_ARM && arm_arch_arm_hwdiv) \ -- || (TARGET_THUMB2 && arm_arch_thumb_hwdiv)) -+ || (TARGET_THUMB && arm_arch_thumb_hwdiv)) - - /* Nonzero if disallow volatile memory access in IT block. */ - #define TARGET_NO_VOLATILE_CE (arm_arch_no_volatile_ce) -@@ -349,7 +365,6 @@ enum vfp_reg_type - extern const struct arm_fpu_desc - { - const char *name; -- enum arm_fp_model model; - int rev; - enum vfp_reg_type regs; - arm_fpu_feature_set features; -@@ -358,7 +373,6 @@ extern const struct arm_fpu_desc - /* Accessors. */ - - #define TARGET_FPU_NAME (all_fpus[arm_fpu_index].name) --#define TARGET_FPU_MODEL (all_fpus[arm_fpu_index].model) - #define TARGET_FPU_REV (all_fpus[arm_fpu_index].rev) - #define TARGET_FPU_REGS (all_fpus[arm_fpu_index].regs) - #define TARGET_FPU_FEATURES (all_fpus[arm_fpu_index].features) -@@ -402,7 +416,9 @@ enum base_architecture - BASE_ARCH_7R = 7, - BASE_ARCH_7M = 7, - BASE_ARCH_7EM = 7, -- BASE_ARCH_8A = 8 -+ BASE_ARCH_8A = 8, -+ BASE_ARCH_8M_BASE = 8, -+ BASE_ARCH_8M_MAIN = 8 - }; - - /* The major revision number of the ARM Architecture implemented by the target. */ -@@ -447,6 +463,13 @@ extern int arm_arch8; - /* Nonzero if this chip supports the ARM Architecture 8.1 extensions. */ - extern int arm_arch8_1; - -+/* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */ -+extern int arm_arch8_2; -+ -+/* Nonzero if this chip supports the FP16 instructions extension of ARM -+ Architecture 8.2. */ -+extern int arm_fp16_inst; -+ - /* Nonzero if this chip can benefit from load scheduling. */ - extern int arm_ld_sched; - -@@ -478,6 +501,9 @@ extern int arm_tune_cortex_a9; - interworking clean. */ - extern int arm_cpp_interwork; - -+/* Nonzero if chip supports Thumb 1. */ -+extern int arm_arch_thumb1; -+ - /* Nonzero if chip supports Thumb 2. */ - extern int arm_arch_thumb2; - -@@ -502,6 +528,9 @@ extern bool arm_disable_literal_pool; - /* Nonzero if chip supports the ARMv8 CRC instructions. */ - extern int arm_arch_crc; - -+/* Nonzero if chip supports the ARMv8-M Security Extensions. */ -+extern int arm_arch_cmse; -+ - #ifndef TARGET_DEFAULT - #define TARGET_DEFAULT (MASK_APCS_FRAME) - #endif -@@ -1191,7 +1220,7 @@ enum reg_class - the data layout happens to be consistent for big-endian, so we explicitly allow - that case. */ - #define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \ -- (TARGET_VFP && TARGET_BIG_END \ -+ (TARGET_BIG_END \ - && !(GET_MODE_SIZE (FROM) == 16 && GET_MODE_SIZE (TO) == 8) \ - && (GET_MODE_SIZE (FROM) > UNITS_PER_WORD \ - || GET_MODE_SIZE (TO) > UNITS_PER_WORD) \ -@@ -1242,8 +1271,7 @@ enum reg_class - NO_REGS is returned. */ - #define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X) \ - /* Restrict which direct reloads are allowed for VFP/iWMMXt regs. */ \ -- ((TARGET_VFP && TARGET_HARD_FLOAT \ -- && IS_VFP_CLASS (CLASS)) \ -+ ((TARGET_HARD_FLOAT && IS_VFP_CLASS (CLASS)) \ - ? coproc_secondary_reload_class (MODE, X, FALSE) \ - : (TARGET_IWMMXT && (CLASS) == IWMMXT_REGS) \ - ? coproc_secondary_reload_class (MODE, X, TRUE) \ -@@ -1255,8 +1283,7 @@ enum reg_class - /* If we need to load shorts byte-at-a-time, then we need a scratch. */ - #define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X) \ - /* Restrict which direct reloads are allowed for VFP/iWMMXt regs. */ \ -- ((TARGET_VFP && TARGET_HARD_FLOAT \ -- && IS_VFP_CLASS (CLASS)) \ -+ ((TARGET_HARD_FLOAT && IS_VFP_CLASS (CLASS)) \ - ? coproc_secondary_reload_class (MODE, X, FALSE) : \ - (TARGET_IWMMXT && (CLASS) == IWMMXT_REGS) ? \ - coproc_secondary_reload_class (MODE, X, TRUE) : \ -@@ -1363,6 +1390,7 @@ enum reg_class - #define ARM_FT_VOLATILE (1 << 4) /* Does not return. */ - #define ARM_FT_NESTED (1 << 5) /* Embedded inside another func. */ - #define ARM_FT_STACKALIGN (1 << 6) /* Called with misaligned stack. */ -+#define ARM_FT_CMSE_ENTRY (1 << 7) /* ARMv8-M non-secure entry function. */ - - /* Some macros to test these flags. */ - #define ARM_FUNC_TYPE(t) (t & ARM_FT_TYPE_MASK) -@@ -1371,6 +1399,7 @@ enum reg_class - #define IS_NAKED(t) (t & ARM_FT_NAKED) - #define IS_NESTED(t) (t & ARM_FT_NESTED) - #define IS_STACKALIGN(t) (t & ARM_FT_STACKALIGN) -+#define IS_CMSE_ENTRY(t) (t & ARM_FT_CMSE_ENTRY) - - - /* Structure used to hold the function stack frame layout. Offsets are -@@ -1516,7 +1545,7 @@ typedef struct - On the ARM, r0-r3 are used to pass args. */ - #define FUNCTION_ARG_REGNO_P(REGNO) \ - (IN_RANGE ((REGNO), 0, 3) \ -- || (TARGET_AAPCS_BASED && TARGET_VFP && TARGET_HARD_FLOAT \ -+ || (TARGET_AAPCS_BASED && TARGET_HARD_FLOAT \ - && IN_RANGE ((REGNO), FIRST_VFP_REGNUM, FIRST_VFP_REGNUM + 15)) \ - || (TARGET_IWMMXT_ABI \ - && IN_RANGE ((REGNO), FIRST_IWMMXT_REGNUM, FIRST_IWMMXT_REGNUM + 9))) -@@ -2187,13 +2216,9 @@ extern int making_const_table; - #define TARGET_ARM_ARCH \ - (arm_base_arch) \ - --#define TARGET_ARM_V6M (!arm_arch_notm && !arm_arch_thumb2) --#define TARGET_ARM_V7M (!arm_arch_notm && arm_arch_thumb2) -- - /* The highest Thumb instruction set version supported by the chip. */ --#define TARGET_ARM_ARCH_ISA_THUMB \ -- (arm_arch_thumb2 ? 2 \ -- : ((TARGET_ARM_ARCH >= 5 || arm_arch4t) ? 1 : 0)) -+#define TARGET_ARM_ARCH_ISA_THUMB \ -+ (arm_arch_thumb2 ? 2 : (arm_arch_thumb1 ? 1 : 0)) - - /* Expands to an upper-case char of the target's architectural - profile. */ -@@ -2245,13 +2270,18 @@ extern const char *arm_rewrite_mcpu (int argc, const char **argv); - " :%{march=*:-march=%*}}" \ - BIG_LITTLE_SPEC - -+extern const char *arm_target_thumb_only (int argc, const char **argv); -+#define TARGET_MODE_SPEC_FUNCTIONS \ -+ { "target_mode_check", arm_target_thumb_only }, -+ - /* -mcpu=native handling only makes sense with compiler running on - an ARM chip. */ - #if defined(__arm__) - extern const char *host_detect_local_cpu (int argc, const char **argv); - # define EXTRA_SPEC_FUNCTIONS \ - { "local_cpu_detect", host_detect_local_cpu }, \ -- BIG_LITTLE_CPU_SPEC_FUNCTIONS -+ BIG_LITTLE_CPU_SPEC_FUNCTIONS \ -+ TARGET_MODE_SPEC_FUNCTIONS - - # define MCPU_MTUNE_NATIVE_SPECS \ - " %{march=native:%<march=native %:local_cpu_detect(arch)}" \ -@@ -2259,10 +2289,21 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); - " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}" - #else - # define MCPU_MTUNE_NATIVE_SPECS "" --# define EXTRA_SPEC_FUNCTIONS BIG_LITTLE_CPU_SPEC_FUNCTIONS -+# define EXTRA_SPEC_FUNCTIONS \ -+ BIG_LITTLE_CPU_SPEC_FUNCTIONS \ -+ TARGET_MODE_SPEC_FUNCTIONS - #endif - --#define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS -+/* Automatically add -mthumb for Thumb-only targets if mode isn't specified -+ via the configuration option --with-mode or via the command line. The -+ function target_mode_check is called to do the check with either: -+ - an array of -march values if any is given; -+ - an array of -mcpu values if any is given; -+ - an empty array. */ -+#define TARGET_MODE_SPECS \ -+ " %{!marm:%{!mthumb:%:target_mode_check(%{march=*:%*;mcpu=*:%*;:})}}" -+ -+#define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS TARGET_MODE_SPECS - #define TARGET_SUPPORTS_WIDE_INT 1 - - /* For switching between functions with different target attributes. */ ---- a/src/gcc/config/arm/arm.md -+++ b/src/gcc/config/arm/arm.md -@@ -118,10 +118,10 @@ - ; This can be "a" for ARM, "t" for either of the Thumbs, "32" for - ; TARGET_32BIT, "t1" or "t2" to specify a specific Thumb mode. "v6" - ; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without --; arm_arch6. "v6t2" for Thumb-2 with arm_arch6. This attribute is --; used to compute attribute "enabled", use type "any" to enable an --; alternative in all cases. --(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2,armv6_or_vfpv3" -+; arm_arch6. "v6t2" for Thumb-2 with arm_arch6 and "v8mb" for ARMv8-M -+; Baseline. This attribute is used to compute attribute "enabled", -+; use type "any" to enable an alternative in all cases. -+(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,v8mb,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2,armv6_or_vfpv3,neon" - (const_string "any")) - - (define_attr "arch_enabled" "no,yes" -@@ -160,6 +160,10 @@ - (match_test "TARGET_32BIT && arm_arch6 && arm_arch_thumb2")) - (const_string "yes") - -+ (and (eq_attr "arch" "v8mb") -+ (match_test "TARGET_THUMB1 && arm_arch8")) -+ (const_string "yes") -+ - (and (eq_attr "arch" "avoid_neon_for_64bits") - (match_test "TARGET_NEON") - (not (match_test "TARGET_PREFER_NEON_64BITS"))) -@@ -177,6 +181,10 @@ - (and (eq_attr "arch" "armv6_or_vfpv3") - (match_test "arm_arch6 || TARGET_VFP3")) - (const_string "yes") -+ -+ (and (eq_attr "arch" "neon") -+ (match_test "TARGET_NEON")) -+ (const_string "yes") - ] - - (const_string "no"))) -@@ -539,6 +547,32 @@ - (set_attr "type" "multiple")] - ) - -+(define_expand "addv<mode>4" -+ [(match_operand:SIDI 0 "register_operand") -+ (match_operand:SIDI 1 "register_operand") -+ (match_operand:SIDI 2 "register_operand") -+ (match_operand 3 "")] -+ "TARGET_32BIT" -+{ -+ emit_insn (gen_add<mode>3_compareV (operands[0], operands[1], operands[2])); -+ arm_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); -+ -+ DONE; -+}) -+ -+(define_expand "uaddv<mode>4" -+ [(match_operand:SIDI 0 "register_operand") -+ (match_operand:SIDI 1 "register_operand") -+ (match_operand:SIDI 2 "register_operand") -+ (match_operand 3 "")] -+ "TARGET_32BIT" -+{ -+ emit_insn (gen_add<mode>3_compareC (operands[0], operands[1], operands[2])); -+ arm_gen_unlikely_cbranch (NE, CC_Cmode, operands[3]); -+ -+ DONE; -+}) -+ - (define_expand "addsi3" - [(set (match_operand:SI 0 "s_register_operand" "") - (plus:SI (match_operand:SI 1 "s_register_operand" "") -@@ -617,6 +651,165 @@ - ] - ) - -+(define_insn_and_split "adddi3_compareV" -+ [(set (reg:CC_V CC_REGNUM) -+ (ne:CC_V -+ (plus:TI -+ (sign_extend:TI (match_operand:DI 1 "register_operand" "r")) -+ (sign_extend:TI (match_operand:DI 2 "register_operand" "r"))) -+ (sign_extend:TI (plus:DI (match_dup 1) (match_dup 2))))) -+ (set (match_operand:DI 0 "register_operand" "=&r") -+ (plus:DI (match_dup 1) (match_dup 2)))] -+ "TARGET_32BIT" -+ "#" -+ "&& reload_completed" -+ [(parallel [(set (reg:CC_C CC_REGNUM) -+ (compare:CC_C (plus:SI (match_dup 1) (match_dup 2)) -+ (match_dup 1))) -+ (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]) -+ (parallel [(set (reg:CC_V CC_REGNUM) -+ (ne:CC_V -+ (plus:DI (plus:DI -+ (sign_extend:DI (match_dup 4)) -+ (sign_extend:DI (match_dup 5))) -+ (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0))) -+ (plus:DI (sign_extend:DI -+ (plus:SI (match_dup 4) (match_dup 5))) -+ (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0))))) -+ (set (match_dup 3) (plus:SI (plus:SI -+ (match_dup 4) (match_dup 5)) -+ (ltu:SI (reg:CC_C CC_REGNUM) -+ (const_int 0))))])] -+ " -+ { -+ operands[3] = gen_highpart (SImode, operands[0]); -+ operands[0] = gen_lowpart (SImode, operands[0]); -+ operands[4] = gen_highpart (SImode, operands[1]); -+ operands[1] = gen_lowpart (SImode, operands[1]); -+ operands[5] = gen_highpart (SImode, operands[2]); -+ operands[2] = gen_lowpart (SImode, operands[2]); -+ }" -+ [(set_attr "conds" "set") -+ (set_attr "length" "8") -+ (set_attr "type" "multiple")] -+) -+ -+(define_insn "addsi3_compareV" -+ [(set (reg:CC_V CC_REGNUM) -+ (ne:CC_V -+ (plus:DI -+ (sign_extend:DI (match_operand:SI 1 "register_operand" "r")) -+ (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))) -+ (sign_extend:DI (plus:SI (match_dup 1) (match_dup 2))))) -+ (set (match_operand:SI 0 "register_operand" "=r") -+ (plus:SI (match_dup 1) (match_dup 2)))] -+ "TARGET_32BIT" -+ "adds%?\\t%0, %1, %2" -+ [(set_attr "conds" "set") -+ (set_attr "type" "alus_sreg")] -+) -+ -+(define_insn "*addsi3_compareV_upper" -+ [(set (reg:CC_V CC_REGNUM) -+ (ne:CC_V -+ (plus:DI -+ (plus:DI -+ (sign_extend:DI (match_operand:SI 1 "register_operand" "r")) -+ (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))) -+ (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0))) -+ (plus:DI (sign_extend:DI -+ (plus:SI (match_dup 1) (match_dup 2))) -+ (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0))))) -+ (set (match_operand:SI 0 "register_operand" "=r") -+ (plus:SI -+ (plus:SI (match_dup 1) (match_dup 2)) -+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] -+ "TARGET_32BIT" -+ "adcs%?\\t%0, %1, %2" -+ [(set_attr "conds" "set") -+ (set_attr "type" "adcs_reg")] -+) -+ -+(define_insn_and_split "adddi3_compareC" -+ [(set (reg:CC_C CC_REGNUM) -+ (ne:CC_C -+ (plus:TI -+ (zero_extend:TI (match_operand:DI 1 "register_operand" "r")) -+ (zero_extend:TI (match_operand:DI 2 "register_operand" "r"))) -+ (zero_extend:TI (plus:DI (match_dup 1) (match_dup 2))))) -+ (set (match_operand:DI 0 "register_operand" "=&r") -+ (plus:DI (match_dup 1) (match_dup 2)))] -+ "TARGET_32BIT" -+ "#" -+ "&& reload_completed" -+ [(parallel [(set (reg:CC_C CC_REGNUM) -+ (compare:CC_C (plus:SI (match_dup 1) (match_dup 2)) -+ (match_dup 1))) -+ (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]) -+ (parallel [(set (reg:CC_C CC_REGNUM) -+ (ne:CC_C -+ (plus:DI (plus:DI -+ (zero_extend:DI (match_dup 4)) -+ (zero_extend:DI (match_dup 5))) -+ (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0))) -+ (plus:DI (zero_extend:DI -+ (plus:SI (match_dup 4) (match_dup 5))) -+ (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0))))) -+ (set (match_dup 3) (plus:SI -+ (plus:SI (match_dup 4) (match_dup 5)) -+ (ltu:SI (reg:CC_C CC_REGNUM) -+ (const_int 0))))])] -+ " -+ { -+ operands[3] = gen_highpart (SImode, operands[0]); -+ operands[0] = gen_lowpart (SImode, operands[0]); -+ operands[4] = gen_highpart (SImode, operands[1]); -+ operands[5] = gen_highpart (SImode, operands[2]); -+ operands[1] = gen_lowpart (SImode, operands[1]); -+ operands[2] = gen_lowpart (SImode, operands[2]); -+ }" -+ [(set_attr "conds" "set") -+ (set_attr "length" "8") -+ (set_attr "type" "multiple")] -+) -+ -+(define_insn "*addsi3_compareC_upper" -+ [(set (reg:CC_C CC_REGNUM) -+ (ne:CC_C -+ (plus:DI -+ (plus:DI -+ (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) -+ (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))) -+ (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0))) -+ (plus:DI (zero_extend:DI -+ (plus:SI (match_dup 1) (match_dup 2))) -+ (ltu:DI (reg:CC_C CC_REGNUM) (const_int 0))))) -+ (set (match_operand:SI 0 "register_operand" "=r") -+ (plus:SI -+ (plus:SI (match_dup 1) (match_dup 2)) -+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] -+ "TARGET_32BIT" -+ "adcs%?\\t%0, %1, %2" -+ [(set_attr "conds" "set") -+ (set_attr "type" "adcs_reg")] -+) -+ -+(define_insn "addsi3_compareC" -+ [(set (reg:CC_C CC_REGNUM) -+ (ne:CC_C -+ (plus:DI -+ (zero_extend:DI (match_operand:SI 1 "register_operand" "r")) -+ (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))) -+ (zero_extend:DI -+ (plus:SI (match_dup 1) (match_dup 2))))) -+ (set (match_operand:SI 0 "register_operand" "=r") -+ (plus:SI (match_dup 1) (match_dup 2)))] -+ "TARGET_32BIT" -+ "adds%?\\t%0, %1, %2" -+ [(set_attr "conds" "set") -+ (set_attr "type" "alus_sreg")] -+) -+ - (define_insn "addsi3_compare0" - [(set (reg:CC_NOOV CC_REGNUM) - (compare:CC_NOOV -@@ -866,20 +1059,90 @@ - (set_attr "type" "adcs_reg")] - ) - -+(define_expand "subv<mode>4" -+ [(match_operand:SIDI 0 "register_operand") -+ (match_operand:SIDI 1 "register_operand") -+ (match_operand:SIDI 2 "register_operand") -+ (match_operand 3 "")] -+ "TARGET_32BIT" -+{ -+ emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2])); -+ arm_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); -+ -+ DONE; -+}) -+ -+(define_expand "usubv<mode>4" -+ [(match_operand:SIDI 0 "register_operand") -+ (match_operand:SIDI 1 "register_operand") -+ (match_operand:SIDI 2 "register_operand") -+ (match_operand 3 "")] -+ "TARGET_32BIT" -+{ -+ emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1], operands[2])); -+ arm_gen_unlikely_cbranch (LTU, CCmode, operands[3]); -+ -+ DONE; -+}) -+ -+(define_insn_and_split "subdi3_compare1" -+ [(set (reg:CC CC_REGNUM) -+ (compare:CC -+ (match_operand:DI 1 "register_operand" "r") -+ (match_operand:DI 2 "register_operand" "r"))) -+ (set (match_operand:DI 0 "register_operand" "=&r") -+ (minus:DI (match_dup 1) (match_dup 2)))] -+ "TARGET_32BIT" -+ "#" -+ "&& reload_completed" -+ [(parallel [(set (reg:CC CC_REGNUM) -+ (compare:CC (match_dup 1) (match_dup 2))) -+ (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))]) -+ (parallel [(set (reg:CC CC_REGNUM) -+ (compare:CC (match_dup 4) (match_dup 5))) -+ (set (match_dup 3) (minus:SI (minus:SI (match_dup 4) (match_dup 5)) -+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))])] -+ { -+ operands[3] = gen_highpart (SImode, operands[0]); -+ operands[0] = gen_lowpart (SImode, operands[0]); -+ operands[4] = gen_highpart (SImode, operands[1]); -+ operands[1] = gen_lowpart (SImode, operands[1]); -+ operands[5] = gen_highpart (SImode, operands[2]); -+ operands[2] = gen_lowpart (SImode, operands[2]); -+ } -+ [(set_attr "conds" "set") -+ (set_attr "length" "8") -+ (set_attr "type" "multiple")] -+) -+ -+(define_insn "subsi3_compare1" -+ [(set (reg:CC CC_REGNUM) -+ (compare:CC -+ (match_operand:SI 1 "register_operand" "r") -+ (match_operand:SI 2 "register_operand" "r"))) -+ (set (match_operand:SI 0 "register_operand" "=r") -+ (minus:SI (match_dup 1) (match_dup 2)))] -+ "TARGET_32BIT" -+ "subs%?\\t%0, %1, %2" -+ [(set_attr "conds" "set") -+ (set_attr "type" "alus_sreg")] -+) -+ - (define_insn "*subsi3_carryin" -- [(set (match_operand:SI 0 "s_register_operand" "=r,r") -- (minus:SI (minus:SI (match_operand:SI 1 "reg_or_int_operand" "r,I") -- (match_operand:SI 2 "s_register_operand" "r,r")) -- (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] -+ [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") -+ (minus:SI (minus:SI (match_operand:SI 1 "reg_or_int_operand" "r,I,Pz") -+ (match_operand:SI 2 "s_register_operand" "r,r,r")) -+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] - "TARGET_32BIT" - "@ - sbc%?\\t%0, %1, %2 -- rsc%?\\t%0, %2, %1" -+ rsc%?\\t%0, %2, %1 -+ sbc%?\\t%0, %2, %2, lsl #1" - [(set_attr "conds" "use") -- (set_attr "arch" "*,a") -+ (set_attr "arch" "*,a,t2") - (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") -- (set_attr "type" "adc_reg,adc_imm")] -+ (set_attr "type" "adc_reg,adc_imm,alu_shift_imm")] - ) - - (define_insn "*subsi3_carryin_const" -@@ -1895,7 +2158,7 @@ - [(set (match_operand:SF 0 "s_register_operand" "") - (div:SF (match_operand:SF 1 "s_register_operand" "") - (match_operand:SF 2 "s_register_operand" "")))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "") - - (define_expand "divdf3" -@@ -2137,13 +2400,13 @@ - - for (i = 9; i <= 31; i++) - { -- if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (operands[2])) -+ if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (operands[2])) - { - emit_insn (gen_extzv (operands[0], operands[1], GEN_INT (i), - const0_rtx)); - DONE; - } -- else if ((((HOST_WIDE_INT) 1) << i) - 1 -+ else if ((HOST_WIDE_INT_1 << i) - 1 - == ~INTVAL (operands[2])) - { - rtx shift = GEN_INT (i); -@@ -2442,7 +2705,7 @@ - { - int start_bit = INTVAL (operands[2]); - int width = INTVAL (operands[1]); -- HOST_WIDE_INT mask = (((HOST_WIDE_INT)1) << width) - 1; -+ HOST_WIDE_INT mask = (HOST_WIDE_INT_1 << width) - 1; - rtx target, subtarget; - - if (arm_arch_thumb2) -@@ -3050,7 +3313,14 @@ - (xor:DI (match_operand:DI 1 "s_register_operand" "") - (match_operand:DI 2 "arm_xordi_operand" "")))] - "TARGET_32BIT" -- "" -+ { -+ /* The iWMMXt pattern for xordi3 accepts only register operands but we want -+ to reuse this expander for all TARGET_32BIT targets so just force the -+ constants into a register. Unlike for the anddi3 and iordi3 there are -+ no NEON instructions that take an immediate. */ -+ if (TARGET_IWMMXT && !REG_P (operands[2])) -+ operands[2] = force_reg (DImode, operands[2]); -+ } - ) - - (define_insn_and_split "*xordi3_insn" -@@ -3744,8 +4014,7 @@ - { - rtx scratch1, scratch2; - -- if (CONST_INT_P (operands[2]) -- && (HOST_WIDE_INT) INTVAL (operands[2]) == 1) -+ if (operands[2] == CONST1_RTX (SImode)) - { - emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1])); - DONE; -@@ -3790,7 +4059,7 @@ - "TARGET_EITHER" - " - if (CONST_INT_P (operands[2]) -- && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31) -+ && (UINTVAL (operands[2])) > 31) - { - emit_insn (gen_movsi (operands[0], const0_rtx)); - DONE; -@@ -3818,8 +4087,7 @@ - { - rtx scratch1, scratch2; - -- if (CONST_INT_P (operands[2]) -- && (HOST_WIDE_INT) INTVAL (operands[2]) == 1) -+ if (operands[2] == CONST1_RTX (SImode)) - { - emit_insn (gen_arm_ashrdi3_1bit (operands[0], operands[1])); - DONE; -@@ -3864,7 +4132,7 @@ - "TARGET_EITHER" - " - if (CONST_INT_P (operands[2]) -- && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31) -+ && UINTVAL (operands[2]) > 31) - operands[2] = GEN_INT (31); - " - ) -@@ -3889,8 +4157,7 @@ - { - rtx scratch1, scratch2; - -- if (CONST_INT_P (operands[2]) -- && (HOST_WIDE_INT) INTVAL (operands[2]) == 1) -+ if (operands[2] == CONST1_RTX (SImode)) - { - emit_insn (gen_arm_lshrdi3_1bit (operands[0], operands[1])); - DONE; -@@ -3935,7 +4202,7 @@ - "TARGET_EITHER" - " - if (CONST_INT_P (operands[2]) -- && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31) -+ && (UINTVAL (operands[2])) > 31) - { - emit_insn (gen_movsi (operands[0], const0_rtx)); - DONE; -@@ -3969,7 +4236,7 @@ - if (TARGET_32BIT) - { - if (CONST_INT_P (operands[2]) -- && ((unsigned HOST_WIDE_INT) INTVAL (operands[2])) > 31) -+ && UINTVAL (operands[2]) > 31) - operands[2] = GEN_INT (INTVAL (operands[2]) % 32); - } - else /* TARGET_THUMB1 */ -@@ -4300,9 +4567,11 @@ - (define_insn "*extv_reg" - [(set (match_operand:SI 0 "s_register_operand" "=r") - (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r") -- (match_operand:SI 2 "const_int_M_operand" "M") -- (match_operand:SI 3 "const_int_M_operand" "M")))] -- "arm_arch_thumb2" -+ (match_operand:SI 2 "const_int_operand" "n") -+ (match_operand:SI 3 "const_int_operand" "n")))] -+ "arm_arch_thumb2 -+ && IN_RANGE (INTVAL (operands[3]), 0, 31) -+ && IN_RANGE (INTVAL (operands[2]), 1, 32 - INTVAL (operands[3]))" - "sbfx%?\t%0, %1, %3, %2" - [(set_attr "length" "4") - (set_attr "predicable" "yes") -@@ -4313,9 +4582,11 @@ - (define_insn "extzv_t2" - [(set (match_operand:SI 0 "s_register_operand" "=r") - (zero_extract:SI (match_operand:SI 1 "s_register_operand" "r") -- (match_operand:SI 2 "const_int_M_operand" "M") -- (match_operand:SI 3 "const_int_M_operand" "M")))] -- "arm_arch_thumb2" -+ (match_operand:SI 2 "const_int_operand" "n") -+ (match_operand:SI 3 "const_int_operand" "n")))] -+ "arm_arch_thumb2 -+ && IN_RANGE (INTVAL (operands[3]), 0, 31) -+ && IN_RANGE (INTVAL (operands[2]), 1, 32 - INTVAL (operands[3]))" - "ubfx%?\t%0, %1, %3, %2" - [(set_attr "length" "4") - (set_attr "predicable" "yes") -@@ -4326,23 +4597,29 @@ - - ;; Division instructions - (define_insn "divsi3" -- [(set (match_operand:SI 0 "s_register_operand" "=r") -- (div:SI (match_operand:SI 1 "s_register_operand" "r") -- (match_operand:SI 2 "s_register_operand" "r")))] -+ [(set (match_operand:SI 0 "s_register_operand" "=r,r") -+ (div:SI (match_operand:SI 1 "s_register_operand" "r,r") -+ (match_operand:SI 2 "s_register_operand" "r,r")))] - "TARGET_IDIV" -- "sdiv%?\t%0, %1, %2" -- [(set_attr "predicable" "yes") -+ "@ -+ sdiv%?\t%0, %1, %2 -+ sdiv\t%0, %1, %2" -+ [(set_attr "arch" "32,v8mb") -+ (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "sdiv")] - ) - - (define_insn "udivsi3" -- [(set (match_operand:SI 0 "s_register_operand" "=r") -- (udiv:SI (match_operand:SI 1 "s_register_operand" "r") -- (match_operand:SI 2 "s_register_operand" "r")))] -+ [(set (match_operand:SI 0 "s_register_operand" "=r,r") -+ (udiv:SI (match_operand:SI 1 "s_register_operand" "r,r") -+ (match_operand:SI 2 "s_register_operand" "r,r")))] - "TARGET_IDIV" -- "udiv%?\t%0, %1, %2" -- [(set_attr "predicable" "yes") -+ "@ -+ udiv%?\t%0, %1, %2 -+ udiv\t%0, %1, %2" -+ [(set_attr "arch" "32,v8mb") -+ (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "type" "udiv")] - ) -@@ -4350,6 +4627,63 @@ - - ;; Unary arithmetic insns - -+(define_expand "negvsi3" -+ [(match_operand:SI 0 "register_operand") -+ (match_operand:SI 1 "register_operand") -+ (match_operand 2 "")] -+ "TARGET_32BIT" -+{ -+ emit_insn (gen_subsi3_compare (operands[0], const0_rtx, operands[1])); -+ arm_gen_unlikely_cbranch (NE, CC_Vmode, operands[2]); -+ -+ DONE; -+}) -+ -+(define_expand "negvdi3" -+ [(match_operand:DI 0 "register_operand") -+ (match_operand:DI 1 "register_operand") -+ (match_operand 2 "")] -+ "TARGET_ARM" -+{ -+ emit_insn (gen_negdi2_compare (operands[0], operands[1])); -+ arm_gen_unlikely_cbranch (NE, CC_Vmode, operands[2]); -+ -+ DONE; -+}) -+ -+ -+(define_insn_and_split "negdi2_compare" -+ [(set (reg:CC CC_REGNUM) -+ (compare:CC -+ (const_int 0) -+ (match_operand:DI 1 "register_operand" "0,r"))) -+ (set (match_operand:DI 0 "register_operand" "=r,&r") -+ (minus:DI (const_int 0) (match_dup 1)))] -+ "TARGET_ARM" -+ "#" -+ "&& reload_completed" -+ [(parallel [(set (reg:CC CC_REGNUM) -+ (compare:CC (const_int 0) (match_dup 1))) -+ (set (match_dup 0) (minus:SI (const_int 0) -+ (match_dup 1)))]) -+ (parallel [(set (reg:CC CC_REGNUM) -+ (compare:CC (const_int 0) (match_dup 3))) -+ (set (match_dup 2) -+ (minus:SI -+ (minus:SI (const_int 0) (match_dup 3)) -+ (ltu:SI (reg:CC_C CC_REGNUM) -+ (const_int 0))))])] -+ { -+ operands[2] = gen_highpart (SImode, operands[0]); -+ operands[0] = gen_lowpart (SImode, operands[0]); -+ operands[3] = gen_highpart (SImode, operands[1]); -+ operands[1] = gen_lowpart (SImode, operands[1]); -+ } -+ [(set_attr "conds" "set") -+ (set_attr "length" "8") -+ (set_attr "type" "multiple")] -+) -+ - (define_expand "negdi2" - [(parallel - [(set (match_operand:DI 0 "s_register_operand" "") -@@ -4367,12 +4701,13 @@ - - ;; The constraints here are to prevent a *partial* overlap (where %Q0 == %R1). - ;; The first alternative allows the common case of a *full* overlap. --(define_insn_and_split "*arm_negdi2" -+(define_insn_and_split "*negdi2_insn" - [(set (match_operand:DI 0 "s_register_operand" "=r,&r") - (neg:DI (match_operand:DI 1 "s_register_operand" "0,r"))) - (clobber (reg:CC CC_REGNUM))] -- "TARGET_ARM" -- "#" ; "rsbs\\t%Q0, %Q1, #0\;rsc\\t%R0, %R1, #0" -+ "TARGET_32BIT" -+ "#" ; rsbs %Q0, %Q1, #0; rsc %R0, %R1, #0 (ARM) -+ ; negs %Q0, %Q1 ; sbc %R0, %R1, %R1, lsl #1 (Thumb-2) - "&& reload_completed" - [(parallel [(set (reg:CC CC_REGNUM) - (compare:CC (const_int 0) (match_dup 1))) -@@ -4390,6 +4725,20 @@ - (set_attr "type" "multiple")] - ) - -+(define_insn "*negsi2_carryin_compare" -+ [(set (reg:CC CC_REGNUM) -+ (compare:CC (const_int 0) -+ (match_operand:SI 1 "s_register_operand" "r"))) -+ (set (match_operand:SI 0 "s_register_operand" "=r") -+ (minus:SI (minus:SI (const_int 0) -+ (match_dup 1)) -+ (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] -+ "TARGET_ARM" -+ "rscs\\t%0, %1, #0" -+ [(set_attr "conds" "set") -+ (set_attr "type" "alus_imm")] -+) -+ - (define_expand "negsi2" - [(set (match_operand:SI 0 "s_register_operand" "") - (neg:SI (match_operand:SI 1 "s_register_operand" "")))] -@@ -4412,7 +4761,7 @@ - (define_expand "negsf2" - [(set (match_operand:SF 0 "s_register_operand" "") - (neg:SF (match_operand:SF 1 "s_register_operand" "")))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "" - ) - -@@ -4685,7 +5034,7 @@ - (define_expand "sqrtsf2" - [(set (match_operand:SF 0 "s_register_operand" "") - (sqrt:SF (match_operand:SF 1 "s_register_operand" "")))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "") - - (define_expand "sqrtdf2" -@@ -4854,7 +5203,7 @@ - "" - ) - --/* DFmode -> HFmode conversions have to go through SFmode. */ -+;; DFmode to HFmode conversions have to go through SFmode. - (define_expand "truncdfhf2" - [(set (match_operand:HF 0 "general_operand" "") - (float_truncate:HF -@@ -5117,7 +5466,7 @@ - (match_operator 5 "subreg_lowpart_operator" - [(match_operand:SI 4 "s_register_operand" "")]))))] - "TARGET_32BIT -- && ((unsigned HOST_WIDE_INT) INTVAL (operands[3]) -+ && (UINTVAL (operands[3]) - == (GET_MODE_MASK (GET_MODE (operands[5])) - & (GET_MODE_MASK (GET_MODE (operands[5])) - << (INTVAL (operands[2])))))" -@@ -5361,7 +5710,7 @@ - "" - ) - --/* HFmode -> DFmode conversions have to go through SFmode. */ -+;; HFmode -> DFmode conversions have to go through SFmode. - (define_expand "extendhfdf2" - [(set (match_operand:DF 0 "general_operand" "") - (float_extend:DF (match_operand:HF 1 "general_operand" "")))] -@@ -5490,7 +5839,7 @@ - [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, q, m") - (match_operand:DI 1 "di_operand" "rDa,Db,Dc,mi,q"))] - "TARGET_32BIT -- && !(TARGET_HARD_FLOAT && TARGET_VFP) -+ && !(TARGET_HARD_FLOAT) - && !TARGET_IWMMXT - && ( register_operand (operands[0], DImode) - || register_operand (operands[1], DImode))" -@@ -5699,12 +6048,15 @@ - ;; LO_SUM adds in the high bits. Fortunately these are opaque operations - ;; so this does not matter. - (define_insn "*arm_movt" -- [(set (match_operand:SI 0 "nonimmediate_operand" "=r") -- (lo_sum:SI (match_operand:SI 1 "nonimmediate_operand" "0") -- (match_operand:SI 2 "general_operand" "i")))] -- "arm_arch_thumb2 && arm_valid_symbolic_address_p (operands[2])" -- "movt%?\t%0, #:upper16:%c2" -- [(set_attr "predicable" "yes") -+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r") -+ (lo_sum:SI (match_operand:SI 1 "nonimmediate_operand" "0,0") -+ (match_operand:SI 2 "general_operand" "i,i")))] -+ "TARGET_HAVE_MOVT && arm_valid_symbolic_address_p (operands[2])" -+ "@ -+ movt%?\t%0, #:upper16:%c2 -+ movt\t%0, #:upper16:%c2" -+ [(set_attr "arch" "32,v8mb") -+ (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "length" "4") - (set_attr "type" "alu_sreg")] -@@ -5713,8 +6065,7 @@ - (define_insn "*arm_movsi_insn" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m") - (match_operand:SI 1 "general_operand" "rk, I,K,j,mi,rk"))] -- "TARGET_ARM && ! TARGET_IWMMXT -- && !(TARGET_HARD_FLOAT && TARGET_VFP) -+ "TARGET_ARM && !TARGET_IWMMXT && !TARGET_HARD_FLOAT - && ( register_operand (operands[0], SImode) - || register_operand (operands[1], SImode))" - "@ -@@ -5726,6 +6077,7 @@ - str%?\\t%1, %0" - [(set_attr "type" "mov_reg,mov_imm,mvn_imm,mov_imm,load1,store1") - (set_attr "predicable" "yes") -+ (set_attr "arch" "*,*,*,v6t2,*,*") - (set_attr "pool_range" "*,*,*,*,4096,*") - (set_attr "neg_pool_range" "*,*,*,*,4084,*")] - ) -@@ -5762,7 +6114,8 @@ - [(set (match_operand:SI 0 "arm_general_register_operand" "") - (const:SI (plus:SI (match_operand:SI 1 "general_operand" "") - (match_operand:SI 2 "const_int_operand" ""))))] -- "TARGET_THUMB2 -+ "TARGET_THUMB -+ && TARGET_HAVE_MOVT - && arm_disable_literal_pool - && reload_completed - && GET_CODE (operands[1]) == SYMBOL_REF" -@@ -5793,8 +6146,7 @@ - (define_split - [(set (match_operand:SI 0 "arm_general_register_operand" "") - (match_operand:SI 1 "general_operand" ""))] -- "TARGET_32BIT -- && TARGET_USE_MOVT && GET_CODE (operands[1]) == SYMBOL_REF -+ "TARGET_USE_MOVT && GET_CODE (operands[1]) == SYMBOL_REF - && !flag_pic && !target_word_relocations - && !arm_tls_referenced_p (operands[1])" - [(clobber (const_int 0))] -@@ -6362,7 +6714,7 @@ - [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m,r") - (match_operand:HI 1 "general_operand" "rIk,K,n,r,mi"))] - "TARGET_ARM -- && arm_arch4 -+ && arm_arch4 && !TARGET_HARD_FLOAT - && (register_operand (operands[0], HImode) - || register_operand (operands[1], HImode))" - "@ -@@ -6388,7 +6740,7 @@ - (define_insn "*movhi_bytes" - [(set (match_operand:HI 0 "s_register_operand" "=r,r,r") - (match_operand:HI 1 "arm_rhs_operand" "I,rk,K"))] -- "TARGET_ARM" -+ "TARGET_ARM && !TARGET_HARD_FLOAT" - "@ - mov%?\\t%0, %1\\t%@ movhi - mov%?\\t%0, %1\\t%@ movhi -@@ -6396,7 +6748,7 @@ - [(set_attr "predicable" "yes") - (set_attr "type" "mov_imm,mov_reg,mvn_imm")] - ) -- -+ - ;; We use a DImode scratch because we may occasionally need an additional - ;; temporary if the address isn't offsettable -- push_reload doesn't seem - ;; to take any notice of the "o" constraints on reload_memory_operand operand. -@@ -6518,7 +6870,7 @@ - strb%?\\t%1, %0" - [(set_attr "type" "mov_reg,mov_reg,mov_imm,mov_imm,mvn_imm,load1,store1,load1,store1") - (set_attr "predicable" "yes") -- (set_attr "predicable_short_it" "yes,yes,yes,no,no,no,no,no,no") -+ (set_attr "predicable_short_it" "yes,yes,no,yes,no,no,no,no,no") - (set_attr "arch" "t2,any,any,t2,any,t2,t2,any,any") - (set_attr "length" "2,4,4,2,4,2,2,4,4")] - ) -@@ -6548,7 +6900,7 @@ - (define_insn "*arm32_movhf" - [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,r,r") - (match_operand:HF 1 "general_operand" " m,r,r,F"))] -- "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_FP16) -+ "TARGET_32BIT && !TARGET_HARD_FLOAT - && ( s_register_operand (operands[0], HFmode) - || s_register_operand (operands[1], HFmode))" - "* -@@ -6892,7 +7244,7 @@ - [(set (pc) (if_then_else - (match_operator 0 "expandable_comparison_operator" - [(match_operand:SF 1 "s_register_operand" "") -- (match_operand:SF 2 "arm_float_compare_operand" "")]) -+ (match_operand:SF 2 "vfp_compare_operand" "")]) - (label_ref (match_operand 3 "" "")) - (pc)))] - "TARGET_32BIT && TARGET_HARD_FLOAT" -@@ -6904,7 +7256,7 @@ - [(set (pc) (if_then_else - (match_operator 0 "expandable_comparison_operator" - [(match_operand:DF 1 "s_register_operand" "") -- (match_operand:DF 2 "arm_float_compare_operand" "")]) -+ (match_operand:DF 2 "vfp_compare_operand" "")]) - (label_ref (match_operand 3 "" "")) - (pc)))] - "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" -@@ -7366,11 +7718,29 @@ - DONE; - }") - -+(define_expand "cstorehf4" -+ [(set (match_operand:SI 0 "s_register_operand") -+ (match_operator:SI 1 "expandable_comparison_operator" -+ [(match_operand:HF 2 "s_register_operand") -+ (match_operand:HF 3 "vfp_compare_operand")]))] -+ "TARGET_VFP_FP16INST" -+ { -+ if (!arm_validize_comparison (&operands[1], -+ &operands[2], -+ &operands[3])) -+ FAIL; -+ -+ emit_insn (gen_cstore_cc (operands[0], operands[1], -+ operands[2], operands[3])); -+ DONE; -+ } -+) -+ - (define_expand "cstoresf4" - [(set (match_operand:SI 0 "s_register_operand" "") - (match_operator:SI 1 "expandable_comparison_operator" - [(match_operand:SF 2 "s_register_operand" "") -- (match_operand:SF 3 "arm_float_compare_operand" "")]))] -+ (match_operand:SF 3 "vfp_compare_operand" "")]))] - "TARGET_32BIT && TARGET_HARD_FLOAT" - "emit_insn (gen_cstore_cc (operands[0], operands[1], - operands[2], operands[3])); DONE;" -@@ -7380,7 +7750,7 @@ - [(set (match_operand:SI 0 "s_register_operand" "") - (match_operator:SI 1 "expandable_comparison_operator" - [(match_operand:DF 2 "s_register_operand" "") -- (match_operand:DF 3 "arm_float_compare_operand" "")]))] -+ (match_operand:DF 3 "vfp_compare_operand" "")]))] - "TARGET_32BIT && TARGET_HARD_FLOAT && !TARGET_VFP_SINGLE" - "emit_insn (gen_cstore_cc (operands[0], operands[1], - operands[2], operands[3])); DONE;" -@@ -7418,9 +7788,31 @@ - rtx ccreg; - - if (!arm_validize_comparison (&operands[1], &XEXP (operands[1], 0), -- &XEXP (operands[1], 1))) -+ &XEXP (operands[1], 1))) - FAIL; -- -+ -+ code = GET_CODE (operands[1]); -+ ccreg = arm_gen_compare_reg (code, XEXP (operands[1], 0), -+ XEXP (operands[1], 1), NULL_RTX); -+ operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx); -+ }" -+) -+ -+(define_expand "movhfcc" -+ [(set (match_operand:HF 0 "s_register_operand") -+ (if_then_else:HF (match_operand 1 "arm_cond_move_operator") -+ (match_operand:HF 2 "s_register_operand") -+ (match_operand:HF 3 "s_register_operand")))] -+ "TARGET_VFP_FP16INST" -+ " -+ { -+ enum rtx_code code = GET_CODE (operands[1]); -+ rtx ccreg; -+ -+ if (!arm_validize_comparison (&operands[1], &XEXP (operands[1], 0), -+ &XEXP (operands[1], 1))) -+ FAIL; -+ - code = GET_CODE (operands[1]); - ccreg = arm_gen_compare_reg (code, XEXP (operands[1], 0), - XEXP (operands[1], 1), NULL_RTX); -@@ -7439,7 +7831,7 @@ - enum rtx_code code = GET_CODE (operands[1]); - rtx ccreg; - -- if (!arm_validize_comparison (&operands[1], &XEXP (operands[1], 0), -+ if (!arm_validize_comparison (&operands[1], &XEXP (operands[1], 0), - &XEXP (operands[1], 1))) - FAIL; - -@@ -7504,6 +7896,37 @@ - (set_attr "type" "fcsel")] - ) - -+(define_insn "*cmovhf" -+ [(set (match_operand:HF 0 "s_register_operand" "=t") -+ (if_then_else:HF (match_operator 1 "arm_vsel_comparison_operator" -+ [(match_operand 2 "cc_register" "") (const_int 0)]) -+ (match_operand:HF 3 "s_register_operand" "t") -+ (match_operand:HF 4 "s_register_operand" "t")))] -+ "TARGET_VFP_FP16INST" -+ "* -+ { -+ enum arm_cond_code code = maybe_get_arm_condition_code (operands[1]); -+ switch (code) -+ { -+ case ARM_GE: -+ case ARM_GT: -+ case ARM_EQ: -+ case ARM_VS: -+ return \"vsel%d1.f16\\t%0, %3, %4\"; -+ case ARM_LT: -+ case ARM_LE: -+ case ARM_NE: -+ case ARM_VC: -+ return \"vsel%D1.f16\\t%0, %4, %3\"; -+ default: -+ gcc_unreachable (); -+ } -+ return \"\"; -+ }" -+ [(set_attr "conds" "use") -+ (set_attr "type" "fcsel")] -+) -+ - (define_insn_and_split "*movsicc_insn" - [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r,r,r,r,r") - (if_then_else:SI -@@ -7627,6 +8050,7 @@ - " - { - rtx callee, pat; -+ tree addr = MEM_EXPR (operands[0]); - - /* In an untyped call, we can get NULL for operand 2. */ - if (operands[2] == NULL_RTX) -@@ -7641,8 +8065,17 @@ - : !REG_P (callee)) - XEXP (operands[0], 0) = force_reg (Pmode, callee); - -- pat = gen_call_internal (operands[0], operands[1], operands[2]); -- arm_emit_call_insn (pat, XEXP (operands[0], 0), false); -+ if (detect_cmse_nonsecure_call (addr)) -+ { -+ pat = gen_nonsecure_call_internal (operands[0], operands[1], -+ operands[2]); -+ emit_call_insn (pat); -+ } -+ else -+ { -+ pat = gen_call_internal (operands[0], operands[1], operands[2]); -+ arm_emit_call_insn (pat, XEXP (operands[0], 0), false); -+ } - DONE; - }" - ) -@@ -7653,6 +8086,24 @@ - (use (match_operand 2 "" "")) - (clobber (reg:SI LR_REGNUM))])]) - -+(define_expand "nonsecure_call_internal" -+ [(parallel [(call (unspec:SI [(match_operand 0 "memory_operand" "")] -+ UNSPEC_NONSECURE_MEM) -+ (match_operand 1 "general_operand" "")) -+ (use (match_operand 2 "" "")) -+ (clobber (reg:SI LR_REGNUM)) -+ (clobber (reg:SI 4))])] -+ "use_cmse" -+ " -+ { -+ rtx tmp; -+ tmp = copy_to_suggested_reg (XEXP (operands[0], 0), -+ gen_rtx_REG (SImode, 4), -+ SImode); -+ -+ operands[0] = replace_equiv_address (operands[0], tmp); -+ }") -+ - (define_insn "*call_reg_armv5" - [(call (mem:SI (match_operand:SI 0 "s_register_operand" "r")) - (match_operand 1 "" "")) -@@ -7688,6 +8139,7 @@ - " - { - rtx pat, callee; -+ tree addr = MEM_EXPR (operands[1]); - - /* In an untyped call, we can get NULL for operand 2. */ - if (operands[3] == 0) -@@ -7702,9 +8154,18 @@ - : !REG_P (callee)) - XEXP (operands[1], 0) = force_reg (Pmode, callee); - -- pat = gen_call_value_internal (operands[0], operands[1], -- operands[2], operands[3]); -- arm_emit_call_insn (pat, XEXP (operands[1], 0), false); -+ if (detect_cmse_nonsecure_call (addr)) -+ { -+ pat = gen_nonsecure_call_value_internal (operands[0], operands[1], -+ operands[2], operands[3]); -+ emit_call_insn (pat); -+ } -+ else -+ { -+ pat = gen_call_value_internal (operands[0], operands[1], -+ operands[2], operands[3]); -+ arm_emit_call_insn (pat, XEXP (operands[1], 0), false); -+ } - DONE; - }" - ) -@@ -7716,6 +8177,25 @@ - (use (match_operand 3 "" "")) - (clobber (reg:SI LR_REGNUM))])]) - -+(define_expand "nonsecure_call_value_internal" -+ [(parallel [(set (match_operand 0 "" "") -+ (call (unspec:SI [(match_operand 1 "memory_operand" "")] -+ UNSPEC_NONSECURE_MEM) -+ (match_operand 2 "general_operand" ""))) -+ (use (match_operand 3 "" "")) -+ (clobber (reg:SI LR_REGNUM)) -+ (clobber (reg:SI 4))])] -+ "use_cmse" -+ " -+ { -+ rtx tmp; -+ tmp = copy_to_suggested_reg (XEXP (operands[1], 0), -+ gen_rtx_REG (SImode, 4), -+ SImode); -+ -+ operands[1] = replace_equiv_address (operands[1], tmp); -+ }") -+ - (define_insn "*call_value_reg_armv5" - [(set (match_operand 0 "" "") - (call (mem:SI (match_operand:SI 1 "s_register_operand" "r")) -@@ -8153,8 +8633,8 @@ - ) - - (define_insn "probe_stack" -- [(set (match_operand 0 "memory_operand" "=m") -- (unspec [(const_int 0)] UNSPEC_PROBE_STACK))] -+ [(set (match_operand:SI 0 "memory_operand" "=m") -+ (unspec:SI [(const_int 0)] UNSPEC_PROBE_STACK))] - "TARGET_32BIT" - "str%?\\tr0, %0" - [(set_attr "type" "store1") -@@ -10221,8 +10701,8 @@ - (match_operand 1 "const_int_operand" ""))) - (clobber (match_scratch:SI 2 ""))] - "TARGET_ARM -- && (((unsigned HOST_WIDE_INT) INTVAL (operands[1])) -- == (((unsigned HOST_WIDE_INT) INTVAL (operands[1])) >> 24) << 24)" -+ && ((UINTVAL (operands[1])) -+ == ((UINTVAL (operands[1])) >> 24) << 24)" - [(set (match_dup 2) (zero_extend:SI (match_dup 0))) - (set (reg:CC CC_REGNUM) (compare:CC (match_dup 2) (match_dup 1)))] - " -@@ -10562,7 +11042,11 @@ - } - " - [(set_attr "type" "load4") -- (set_attr "predicable" "yes")] -+ (set_attr "predicable" "yes") -+ (set (attr "length") -+ (symbol_ref "arm_attr_length_pop_multi (operands, -+ /*return_pc=*/false, -+ /*write_back_p=*/true)"))] - ) - - ;; Pop with return (as used in epilogue RTL) -@@ -10591,7 +11075,10 @@ - } - " - [(set_attr "type" "load4") -- (set_attr "predicable" "yes")] -+ (set_attr "predicable" "yes") -+ (set (attr "length") -+ (symbol_ref "arm_attr_length_pop_multi (operands, /*return_pc=*/true, -+ /*write_back_p=*/true)"))] - ) - - (define_insn "*pop_multiple_with_return" -@@ -10611,7 +11098,10 @@ - } - " - [(set_attr "type" "load4") -- (set_attr "predicable" "yes")] -+ (set_attr "predicable" "yes") -+ (set (attr "length") -+ (symbol_ref "arm_attr_length_pop_multi (operands, /*return_pc=*/true, -+ /*write_back_p=*/false)"))] - ) - - ;; Load into PC and return -@@ -10632,7 +11122,7 @@ - (match_operand:SI 2 "const_int_I_operand" "I"))) - (set (match_operand:DF 3 "vfp_hard_register_operand" "") - (mem:DF (match_dup 1)))])] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "* - { - int num_regs = XVECLEN (operands[0], 0); -@@ -10822,19 +11312,22 @@ - (set_attr "predicable_short_it" "no") - (set_attr "type" "clz")]) - --(define_expand "ctzsi2" -- [(set (match_operand:SI 0 "s_register_operand" "") -- (ctz:SI (match_operand:SI 1 "s_register_operand" "")))] -+;; Keep this as a CTZ expression until after reload and then split -+;; into RBIT + CLZ. Since RBIT is represented as an UNSPEC it is unlikely -+;; to fold with any other expression. -+ -+(define_insn_and_split "ctzsi2" -+ [(set (match_operand:SI 0 "s_register_operand" "=r") -+ (ctz:SI (match_operand:SI 1 "s_register_operand" "r")))] - "TARGET_32BIT && arm_arch_thumb2" -+ "#" -+ "&& reload_completed" -+ [(const_int 0)] - " -- { -- rtx tmp = gen_reg_rtx (SImode); -- emit_insn (gen_rbitsi2 (tmp, operands[1])); -- emit_insn (gen_clzsi2 (operands[0], tmp)); -- } -- DONE; -- " --) -+ emit_insn (gen_rbitsi2 (operands[0], operands[1])); -+ emit_insn (gen_clzsi2 (operands[0], operands[0])); -+ DONE; -+") - - ;; V5E instructions. - -@@ -10958,13 +11451,16 @@ - ;; We only care about the lower 16 bits of the constant - ;; being inserted into the upper 16 bits of the register. - (define_insn "*arm_movtas_ze" -- [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "+r") -+ [(set (zero_extract:SI (match_operand:SI 0 "s_register_operand" "+r,r") - (const_int 16) - (const_int 16)) - (match_operand:SI 1 "const_int_operand" ""))] -- "arm_arch_thumb2" -- "movt%?\t%0, %L1" -- [(set_attr "predicable" "yes") -+ "TARGET_HAVE_MOVT" -+ "@ -+ movt%?\t%0, %L1 -+ movt\t%0, %L1" -+ [(set_attr "arch" "32,v8mb") -+ (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") - (set_attr "length" "4") - (set_attr "type" "alu_sreg")] ---- a/src/gcc/config/arm/arm.opt -+++ b/src/gcc/config/arm/arm.opt -@@ -61,10 +61,6 @@ Generate a call to abort if a noreturn function returns. - mapcs - Target RejectNegative Mask(APCS_FRAME) Undocumented - --mapcs-float --Target Report Mask(APCS_FLOAT) --Pass FP arguments in FP registers. -- - mapcs-frame - Target Report Mask(APCS_FRAME) - Generate APCS conformant stack frames. -@@ -109,6 +105,10 @@ mfloat-abi= - Target RejectNegative Joined Enum(float_abi_type) Var(arm_float_abi) Init(TARGET_DEFAULT_FLOAT_ABI) - Specify if floating point hardware should be used. - -+mcmse -+Target RejectNegative Var(use_cmse) -+Specify that the compiler should target secure code as per ARMv8-M Security Extensions. -+ - Enum - Name(float_abi_type) Type(enum float_abi_type) - Known floating-point ABIs (for use with the -mfloat-abi= option): -@@ -253,14 +253,6 @@ mrestrict-it - Target Report Var(arm_restrict_it) Init(2) Save - Generate IT blocks appropriate for ARMv8. - --mold-rtx-costs --Target Report Mask(OLD_RTX_COSTS) --Use the old RTX costing tables (transitional). -- --mnew-generic-costs --Target Report Mask(NEW_GENERIC_COSTS) --Use the new generic RTX cost tables if new core-specific cost table not available (transitional). -- - mfix-cortex-m3-ldrd - Target Report Var(fix_cm3_ldrd) Init(2) - Avoid overlapping destination and address registers on LDRD instructions ---- /dev/null -+++ b/src/gcc/config/arm/arm_cmse.h -@@ -0,0 +1,199 @@ -+/* ARMv8-M Secure Extensions intrinsics include file. -+ -+ Copyright (C) 2015-2016 Free Software Foundation, Inc. -+ Contributed by ARM Ltd. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published -+ by the Free Software Foundation; either version 3, or (at your -+ option) any later version. -+ -+ GCC is distributed in the hope that it will be useful, but WITHOUT -+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public -+ License for more details. -+ -+ Under Section 7 of GPL version 3, you are granted additional -+ permissions described in the GCC Runtime Library Exception, version -+ 3.1, as published by the Free Software Foundation. -+ -+ You should have received a copy of the GNU General Public License and -+ a copy of the GCC Runtime Library Exception along with this program; -+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -+ <http://www.gnu.org/licenses/>. */ -+ -+ -+#ifndef _GCC_ARM_CMSE_H -+#define _GCC_ARM_CMSE_H -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#if __ARM_FEATURE_CMSE & 1 -+ -+#include <stddef.h> -+#include <stdint.h> -+ -+#ifdef __ARM_BIG_ENDIAN -+ -+typedef union { -+ struct cmse_address_info { -+#if __ARM_FEATURE_CMSE & 2 -+ unsigned idau_region:8; -+ unsigned idau_region_valid:1; -+ unsigned secure:1; -+ unsigned nonsecure_readwrite_ok:1; -+ unsigned nonsecure_read_ok:1; -+#else -+ unsigned :12; -+#endif -+ unsigned readwrite_ok:1; -+ unsigned read_ok:1; -+#if __ARM_FEATURE_CMSE & 2 -+ unsigned sau_region_valid:1; -+#else -+ unsigned :1; -+#endif -+ unsigned mpu_region_valid:1; -+#if __ARM_FEATURE_CMSE & 2 -+ unsigned sau_region:8; -+#else -+ unsigned :8; -+#endif -+ unsigned mpu_region:8; -+ } flags; -+ unsigned value; -+} cmse_address_info_t; -+ -+#else -+ -+typedef union { -+ struct cmse_address_info { -+ unsigned mpu_region:8; -+#if __ARM_FEATURE_CMSE & 2 -+ unsigned sau_region:8; -+#else -+ unsigned :8; -+#endif -+ unsigned mpu_region_valid:1; -+#if __ARM_FEATURE_CMSE & 2 -+ unsigned sau_region_valid:1; -+#else -+ unsigned :1; -+#endif -+ unsigned read_ok:1; -+ unsigned readwrite_ok:1; -+#if __ARM_FEATURE_CMSE & 2 -+ unsigned nonsecure_read_ok:1; -+ unsigned nonsecure_readwrite_ok:1; -+ unsigned secure:1; -+ unsigned idau_region_valid:1; -+ unsigned idau_region:8; -+#else -+ unsigned :12; -+#endif -+ } flags; -+ unsigned value; -+} cmse_address_info_t; -+ -+#endif /* __ARM_BIG_ENDIAN */ -+ -+#define cmse_TT_fptr(p) (__cmse_TT_fptr ((__cmse_fptr)(p))) -+ -+typedef void (*__cmse_fptr)(void); -+ -+#define __CMSE_TT_ASM(flags) \ -+{ \ -+ cmse_address_info_t __result; \ -+ __asm__ ("tt" # flags " %0,%1" \ -+ : "=r"(__result) \ -+ : "r"(__p) \ -+ : "memory"); \ -+ return __result; \ -+} -+ -+__extension__ static __inline __attribute__ ((__always_inline__)) -+cmse_address_info_t -+__cmse_TT_fptr (__cmse_fptr __p) -+__CMSE_TT_ASM () -+ -+__extension__ static __inline __attribute__ ((__always_inline__)) -+cmse_address_info_t -+cmse_TT (void *__p) -+__CMSE_TT_ASM () -+ -+#define cmse_TTT_fptr(p) (__cmse_TTT_fptr ((__cmse_fptr)(p))) -+ -+__extension__ static __inline __attribute__ ((__always_inline__)) -+cmse_address_info_t -+__cmse_TTT_fptr (__cmse_fptr __p) -+__CMSE_TT_ASM (t) -+ -+__extension__ static __inline __attribute__ ((__always_inline__)) -+cmse_address_info_t -+cmse_TTT (void *__p) -+__CMSE_TT_ASM (t) -+ -+#if __ARM_FEATURE_CMSE & 2 -+ -+#define cmse_TTA_fptr(p) (__cmse_TTA_fptr ((__cmse_fptr)(p))) -+ -+__extension__ static __inline __attribute__ ((__always_inline__)) -+cmse_address_info_t -+__cmse_TTA_fptr (__cmse_fptr __p) -+__CMSE_TT_ASM (a) -+ -+__extension__ static __inline __attribute__ ((__always_inline__)) -+cmse_address_info_t -+cmse_TTA (void *__p) -+__CMSE_TT_ASM (a) -+ -+#define cmse_TTAT_fptr(p) (__cmse_TTAT_fptr ((__cmse_fptr)(p))) -+ -+__extension__ static __inline cmse_address_info_t -+__attribute__ ((__always_inline__)) -+__cmse_TTAT_fptr (__cmse_fptr __p) -+__CMSE_TT_ASM (at) -+ -+__extension__ static __inline cmse_address_info_t -+__attribute__ ((__always_inline__)) -+cmse_TTAT (void *__p) -+__CMSE_TT_ASM (at) -+ -+/* FIXME: diagnose use outside cmse_nonsecure_entry functions. */ -+__extension__ static __inline int __attribute__ ((__always_inline__)) -+cmse_nonsecure_caller (void) -+{ -+ return __builtin_arm_cmse_nonsecure_caller (); -+} -+ -+#define CMSE_AU_NONSECURE 2 -+#define CMSE_MPU_NONSECURE 16 -+#define CMSE_NONSECURE 18 -+ -+#define cmse_nsfptr_create(p) ((typeof ((p))) ((intptr_t) (p) & ~1)) -+ -+#define cmse_is_nsfptr(p) (!((intptr_t) (p) & 1)) -+ -+#endif /* __ARM_FEATURE_CMSE & 2 */ -+ -+#define CMSE_MPU_UNPRIV 4 -+#define CMSE_MPU_READWRITE 1 -+#define CMSE_MPU_READ 8 -+ -+__extension__ void * -+cmse_check_address_range (void *, size_t, int); -+ -+#define cmse_check_pointed_object(p, f) \ -+ ((typeof ((p))) cmse_check_address_range ((p), sizeof (*(p)), (f))) -+ -+#endif /* __ARM_FEATURE_CMSE & 1 */ -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* _GCC_ARM_CMSE_H */ ---- /dev/null -+++ b/src/gcc/config/arm/arm_fp16.h -@@ -0,0 +1,255 @@ -+/* ARM FP16 intrinsics include file. -+ -+ Copyright (C) 2016 Free Software Foundation, Inc. -+ Contributed by ARM Ltd. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published -+ by the Free Software Foundation; either version 3, or (at your -+ option) any later version. -+ -+ GCC is distributed in the hope that it will be useful, but WITHOUT -+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public -+ License for more details. -+ -+ Under Section 7 of GPL version 3, you are granted additional -+ permissions described in the GCC Runtime Library Exception, version -+ 3.1, as published by the Free Software Foundation. -+ -+ You should have received a copy of the GNU General Public License and -+ a copy of the GCC Runtime Library Exception along with this program; -+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -+ <http://www.gnu.org/licenses/>. */ -+ -+#ifndef _GCC_ARM_FP16_H -+#define _GCC_ARM_FP16_H 1 -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#include <stdint.h> -+ -+/* Intrinsics for FP16 instructions. */ -+#pragma GCC push_options -+#pragma GCC target ("fpu=fp-armv8") -+ -+#if defined (__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) -+ -+typedef __fp16 float16_t; -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vabsh_f16 (float16_t __a) -+{ -+ return __builtin_neon_vabshf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vaddh_f16 (float16_t __a, float16_t __b) -+{ -+ return __a + __b; -+} -+ -+__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -+vcvtah_s32_f16 (float16_t __a) -+{ -+ return __builtin_neon_vcvtahssi (__a); -+} -+ -+__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+vcvtah_u32_f16 (float16_t __a) -+{ -+ return __builtin_neon_vcvtahusi (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vcvth_f16_s32 (int32_t __a) -+{ -+ return __builtin_neon_vcvthshf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vcvth_f16_u32 (uint32_t __a) -+{ -+ return __builtin_neon_vcvthuhf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vcvth_n_f16_s32 (int32_t __a, const int __b) -+{ -+ return __builtin_neon_vcvths_nhf (__a, __b); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vcvth_n_f16_u32 (uint32_t __a, const int __b) -+{ -+ return __builtin_neon_vcvthu_nhf ((int32_t)__a, __b); -+} -+ -+__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -+vcvth_n_s32_f16 (float16_t __a, const int __b) -+{ -+ return __builtin_neon_vcvths_nsi (__a, __b); -+} -+ -+__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+vcvth_n_u32_f16 (float16_t __a, const int __b) -+{ -+ return (uint32_t)__builtin_neon_vcvthu_nsi (__a, __b); -+} -+ -+__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -+vcvth_s32_f16 (float16_t __a) -+{ -+ return __builtin_neon_vcvthssi (__a); -+} -+ -+__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+vcvth_u32_f16 (float16_t __a) -+{ -+ return __builtin_neon_vcvthusi (__a); -+} -+ -+__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -+vcvtmh_s32_f16 (float16_t __a) -+{ -+ return __builtin_neon_vcvtmhssi (__a); -+} -+ -+__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+vcvtmh_u32_f16 (float16_t __a) -+{ -+ return __builtin_neon_vcvtmhusi (__a); -+} -+ -+__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -+vcvtnh_s32_f16 (float16_t __a) -+{ -+ return __builtin_neon_vcvtnhssi (__a); -+} -+ -+__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+vcvtnh_u32_f16 (float16_t __a) -+{ -+ return __builtin_neon_vcvtnhusi (__a); -+} -+ -+__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -+vcvtph_s32_f16 (float16_t __a) -+{ -+ return __builtin_neon_vcvtphssi (__a); -+} -+ -+__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+vcvtph_u32_f16 (float16_t __a) -+{ -+ return __builtin_neon_vcvtphusi (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vdivh_f16 (float16_t __a, float16_t __b) -+{ -+ return __a / __b; -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vfmah_f16 (float16_t __a, float16_t __b, float16_t __c) -+{ -+ return __builtin_neon_vfmahf (__a, __b, __c); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vfmsh_f16 (float16_t __a, float16_t __b, float16_t __c) -+{ -+ return __builtin_neon_vfmshf (__a, __b, __c); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vmaxnmh_f16 (float16_t __a, float16_t __b) -+{ -+ return __builtin_neon_vmaxnmhf (__a, __b); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vminnmh_f16 (float16_t __a, float16_t __b) -+{ -+ return __builtin_neon_vminnmhf (__a, __b); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vmulh_f16 (float16_t __a, float16_t __b) -+{ -+ return __a * __b; -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vnegh_f16 (float16_t __a) -+{ -+ return - __a; -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vrndah_f16 (float16_t __a) -+{ -+ return __builtin_neon_vrndahf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vrndh_f16 (float16_t __a) -+{ -+ return __builtin_neon_vrndhf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vrndih_f16 (float16_t __a) -+{ -+ return __builtin_neon_vrndihf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vrndmh_f16 (float16_t __a) -+{ -+ return __builtin_neon_vrndmhf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vrndnh_f16 (float16_t __a) -+{ -+ return __builtin_neon_vrndnhf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vrndph_f16 (float16_t __a) -+{ -+ return __builtin_neon_vrndphf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vrndxh_f16 (float16_t __a) -+{ -+ return __builtin_neon_vrndxhf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vsqrth_f16 (float16_t __a) -+{ -+ return __builtin_neon_vsqrthf (__a); -+} -+ -+__extension__ static __inline float16_t __attribute__ ((__always_inline__)) -+vsubh_f16 (float16_t __a, float16_t __b) -+{ -+ return __a - __b; -+} -+ -+#endif /* __ARM_FEATURE_FP16_SCALAR_ARITHMETIC */ -+#pragma GCC pop_options -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif ---- a/src/gcc/config/arm/arm_neon.h -+++ b/src/gcc/config/arm/arm_neon.h -@@ -38,6 +38,7 @@ - extern "C" { - #endif - -+#include <arm_fp16.h> - #include <stdint.h> - - typedef __simd64_int8_t int8x8_t; -@@ -509,528 +510,614 @@ typedef struct poly64x2x4_t - #pragma GCC pop_options - - /* vadd */ --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vadd_s8 (int8x8_t __a, int8x8_t __b) - { - return __a + __b; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vadd_s16 (int16x4_t __a, int16x4_t __b) - { - return __a + __b; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vadd_s32 (int32x2_t __a, int32x2_t __b) - { - return __a + __b; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vadd_f32 (float32x2_t __a, float32x2_t __b) - { --#ifdef __FAST_MATH -+#ifdef __FAST_MATH__ - return __a + __b; - #else - return (float32x2_t) __builtin_neon_vaddv2sf (__a, __b); - #endif - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vadd_u8 (uint8x8_t __a, uint8x8_t __b) - { - return __a + __b; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vadd_u16 (uint16x4_t __a, uint16x4_t __b) - { - return __a + __b; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vadd_u32 (uint32x2_t __a, uint32x2_t __b) - { - return __a + __b; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vadd_s64 (int64x1_t __a, int64x1_t __b) - { - return __a + __b; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vadd_u64 (uint64x1_t __a, uint64x1_t __b) - { - return __a + __b; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddq_s8 (int8x16_t __a, int8x16_t __b) - { - return __a + __b; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddq_s16 (int16x8_t __a, int16x8_t __b) - { - return __a + __b; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddq_s32 (int32x4_t __a, int32x4_t __b) - { - return __a + __b; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddq_s64 (int64x2_t __a, int64x2_t __b) - { - return __a + __b; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddq_f32 (float32x4_t __a, float32x4_t __b) - { --#ifdef __FAST_MATH -+#ifdef __FAST_MATH__ - return __a + __b; - #else - return (float32x4_t) __builtin_neon_vaddv4sf (__a, __b); - #endif - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return __a + __b; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return __a + __b; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return __a + __b; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddq_u64 (uint64x2_t __a, uint64x2_t __b) - { - return __a + __b; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddl_s8 (int8x8_t __a, int8x8_t __b) - { - return (int16x8_t)__builtin_neon_vaddlsv8qi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddl_s16 (int16x4_t __a, int16x4_t __b) - { - return (int32x4_t)__builtin_neon_vaddlsv4hi (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddl_s32 (int32x2_t __a, int32x2_t __b) - { - return (int64x2_t)__builtin_neon_vaddlsv2si (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddl_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint16x8_t)__builtin_neon_vaddluv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddl_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint32x4_t)__builtin_neon_vaddluv4hi ((int16x4_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddl_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint64x2_t)__builtin_neon_vaddluv2si ((int32x2_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddw_s8 (int16x8_t __a, int8x8_t __b) - { - return (int16x8_t)__builtin_neon_vaddwsv8qi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddw_s16 (int32x4_t __a, int16x4_t __b) - { - return (int32x4_t)__builtin_neon_vaddwsv4hi (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddw_s32 (int64x2_t __a, int32x2_t __b) - { - return (int64x2_t)__builtin_neon_vaddwsv2si (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddw_u8 (uint16x8_t __a, uint8x8_t __b) - { - return (uint16x8_t)__builtin_neon_vaddwuv8qi ((int16x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddw_u16 (uint32x4_t __a, uint16x4_t __b) - { - return (uint32x4_t)__builtin_neon_vaddwuv4hi ((int32x4_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddw_u32 (uint64x2_t __a, uint32x2_t __b) - { - return (uint64x2_t)__builtin_neon_vaddwuv2si ((int64x2_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhadd_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x8_t)__builtin_neon_vhaddsv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhadd_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t)__builtin_neon_vhaddsv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhadd_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t)__builtin_neon_vhaddsv2si (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhadd_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vhadduv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhadd_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vhadduv4hi ((int16x4_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhadd_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vhadduv2si ((int32x2_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhaddq_s8 (int8x16_t __a, int8x16_t __b) - { - return (int8x16_t)__builtin_neon_vhaddsv16qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhaddq_s16 (int16x8_t __a, int16x8_t __b) - { - return (int16x8_t)__builtin_neon_vhaddsv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhaddq_s32 (int32x4_t __a, int32x4_t __b) - { - return (int32x4_t)__builtin_neon_vhaddsv4si (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhaddq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vhadduv16qi ((int8x16_t) __a, (int8x16_t) __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhaddq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vhadduv8hi ((int16x8_t) __a, (int16x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhaddq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vhadduv4si ((int32x4_t) __a, (int32x4_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhadd_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x8_t)__builtin_neon_vrhaddsv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhadd_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t)__builtin_neon_vrhaddsv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhadd_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t)__builtin_neon_vrhaddsv2si (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhadd_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vrhadduv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhadd_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vrhadduv4hi ((int16x4_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhadd_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vrhadduv2si ((int32x2_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhaddq_s8 (int8x16_t __a, int8x16_t __b) - { - return (int8x16_t)__builtin_neon_vrhaddsv16qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhaddq_s16 (int16x8_t __a, int16x8_t __b) - { - return (int16x8_t)__builtin_neon_vrhaddsv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhaddq_s32 (int32x4_t __a, int32x4_t __b) - { - return (int32x4_t)__builtin_neon_vrhaddsv4si (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vrhadduv16qi ((int8x16_t) __a, (int8x16_t) __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vrhadduv8hi ((int16x8_t) __a, (int16x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vrhadduv4si ((int32x4_t) __a, (int32x4_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqadd_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x8_t)__builtin_neon_vqaddsv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqadd_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t)__builtin_neon_vqaddsv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqadd_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t)__builtin_neon_vqaddsv2si (__a, __b); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqadd_s64 (int64x1_t __a, int64x1_t __b) - { - return (int64x1_t)__builtin_neon_vqaddsdi (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqadd_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vqadduv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqadd_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vqadduv4hi ((int16x4_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqadd_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vqadduv2si ((int32x2_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqadd_u64 (uint64x1_t __a, uint64x1_t __b) - { - return (uint64x1_t)__builtin_neon_vqaddudi ((int64x1_t) __a, (int64x1_t) __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqaddq_s8 (int8x16_t __a, int8x16_t __b) - { - return (int8x16_t)__builtin_neon_vqaddsv16qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqaddq_s16 (int16x8_t __a, int16x8_t __b) - { - return (int16x8_t)__builtin_neon_vqaddsv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqaddq_s32 (int32x4_t __a, int32x4_t __b) - { - return (int32x4_t)__builtin_neon_vqaddsv4si (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqaddq_s64 (int64x2_t __a, int64x2_t __b) - { - return (int64x2_t)__builtin_neon_vqaddsv2di (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqaddq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vqadduv16qi ((int8x16_t) __a, (int8x16_t) __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqaddq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vqadduv8hi ((int16x8_t) __a, (int16x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqaddq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vqadduv4si ((int32x4_t) __a, (int32x4_t) __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqaddq_u64 (uint64x2_t __a, uint64x2_t __b) - { - return (uint64x2_t)__builtin_neon_vqadduv2di ((int64x2_t) __a, (int64x2_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddhn_s16 (int16x8_t __a, int16x8_t __b) - { - return (int8x8_t)__builtin_neon_vaddhnv8hi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddhn_s32 (int32x4_t __a, int32x4_t __b) - { - return (int16x4_t)__builtin_neon_vaddhnv4si (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddhn_s64 (int64x2_t __a, int64x2_t __b) - { - return (int32x2_t)__builtin_neon_vaddhnv2di (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddhn_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint8x8_t)__builtin_neon_vaddhnv8hi ((int16x8_t) __a, (int16x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddhn_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint16x4_t)__builtin_neon_vaddhnv4si ((int32x4_t) __a, (int32x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaddhn_u64 (uint64x2_t __a, uint64x2_t __b) - { - return (uint32x2_t)__builtin_neon_vaddhnv2di ((int64x2_t) __a, (int64x2_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vraddhn_s16 (int16x8_t __a, int16x8_t __b) - { - return (int8x8_t)__builtin_neon_vraddhnv8hi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vraddhn_s32 (int32x4_t __a, int32x4_t __b) - { - return (int16x4_t)__builtin_neon_vraddhnv4si (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vraddhn_s64 (int64x2_t __a, int64x2_t __b) - { - return (int32x2_t)__builtin_neon_vraddhnv2di (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vraddhn_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint8x8_t)__builtin_neon_vraddhnv8hi ((int16x8_t) __a, (int16x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vraddhn_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint16x4_t)__builtin_neon_vraddhnv4si ((int32x4_t) __a, (int32x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vraddhn_u64 (uint64x2_t __a, uint64x2_t __b) - { - return (uint32x2_t)__builtin_neon_vraddhnv2di ((int64x2_t) __a, (int64x2_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_s8 (int8x8_t __a, int8x8_t __b) - { - return __a * __b; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_s16 (int16x4_t __a, int16x4_t __b) - { - return __a * __b; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_s32 (int32x2_t __a, int32x2_t __b) - { - return __a * __b; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_f32 (float32x2_t __a, float32x2_t __b) - { --#ifdef __FAST_MATH -+#ifdef __FAST_MATH__ - return __a * __b; - #else - return (float32x2_t) __builtin_neon_vmulfv2sf (__a, __b); -@@ -1038,493 +1125,574 @@ vmul_f32 (float32x2_t __a, float32x2_t __b) - - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_u8 (uint8x8_t __a, uint8x8_t __b) - { - return __a * __b; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_u16 (uint16x4_t __a, uint16x4_t __b) - { - return __a * __b; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_u32 (uint32x2_t __a, uint32x2_t __b) - { - return __a * __b; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_s8 (int8x16_t __a, int8x16_t __b) - { - return __a * __b; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_s16 (int16x8_t __a, int16x8_t __b) - { - return __a * __b; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_s32 (int32x4_t __a, int32x4_t __b) - { - return __a * __b; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_f32 (float32x4_t __a, float32x4_t __b) - { --#ifdef __FAST_MATH -+#ifdef __FAST_MATH__ - return __a * __b; - #else - return (float32x4_t) __builtin_neon_vmulfv4sf (__a, __b); - #endif - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return __a * __b; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return __a * __b; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return __a * __b; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_p8 (poly8x8_t __a, poly8x8_t __b) - { - return (poly8x8_t)__builtin_neon_vmulpv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_p8 (poly8x16_t __a, poly8x16_t __b) - { - return (poly8x16_t)__builtin_neon_vmulpv16qi ((int8x16_t) __a, (int8x16_t) __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmulh_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t)__builtin_neon_vqdmulhv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmulh_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t)__builtin_neon_vqdmulhv2si (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmulhq_s16 (int16x8_t __a, int16x8_t __b) - { - return (int16x8_t)__builtin_neon_vqdmulhv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmulhq_s32 (int32x4_t __a, int32x4_t __b) - { - return (int32x4_t)__builtin_neon_vqdmulhv4si (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmulh_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t)__builtin_neon_vqrdmulhv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmulh_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t)__builtin_neon_vqrdmulhv2si (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b) - { - return (int16x8_t)__builtin_neon_vqrdmulhv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b) - { - return (int32x4_t)__builtin_neon_vqrdmulhv4si (__a, __b); - } - - #ifdef __ARM_FEATURE_QRDMX --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) - { - return (int16x4_t)__builtin_neon_vqrdmlahv4hi (__a, __b, __c); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) - { - return (int32x2_t)__builtin_neon_vqrdmlahv2si (__a, __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) - { - return (int16x8_t)__builtin_neon_vqrdmlahv8hi (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) - { - return (int32x4_t)__builtin_neon_vqrdmlahv4si (__a, __b, __c); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) - { - return (int16x4_t)__builtin_neon_vqrdmlshv4hi (__a, __b, __c); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) - { - return (int32x2_t)__builtin_neon_vqrdmlshv2si (__a, __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) - { - return (int16x8_t)__builtin_neon_vqrdmlshv8hi (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) - { - return (int32x4_t)__builtin_neon_vqrdmlshv4si (__a, __b, __c); - } - #endif - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmull_s8 (int8x8_t __a, int8x8_t __b) - { - return (int16x8_t)__builtin_neon_vmullsv8qi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmull_s16 (int16x4_t __a, int16x4_t __b) - { - return (int32x4_t)__builtin_neon_vmullsv4hi (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmull_s32 (int32x2_t __a, int32x2_t __b) - { - return (int64x2_t)__builtin_neon_vmullsv2si (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmull_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint16x8_t)__builtin_neon_vmulluv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmull_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint32x4_t)__builtin_neon_vmulluv4hi ((int16x4_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmull_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint64x2_t)__builtin_neon_vmulluv2si ((int32x2_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmull_p8 (poly8x8_t __a, poly8x8_t __b) - { - return (poly16x8_t)__builtin_neon_vmullpv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmull_s16 (int16x4_t __a, int16x4_t __b) - { - return (int32x4_t)__builtin_neon_vqdmullv4hi (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmull_s32 (int32x2_t __a, int32x2_t __b) - { - return (int64x2_t)__builtin_neon_vqdmullv2si (__a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmla_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c) - { - return (int8x8_t)__builtin_neon_vmlav8qi (__a, __b, __c); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmla_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) - { - return (int16x4_t)__builtin_neon_vmlav4hi (__a, __b, __c); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmla_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) - { - return (int32x2_t)__builtin_neon_vmlav2si (__a, __b, __c); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmla_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) - { - return (float32x2_t)__builtin_neon_vmlav2sf (__a, __b, __c); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmla_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) - { - return (uint8x8_t)__builtin_neon_vmlav8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmla_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) - { - return (uint16x4_t)__builtin_neon_vmlav4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmla_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) - { - return (uint32x2_t)__builtin_neon_vmlav2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c) - { - return (int8x16_t)__builtin_neon_vmlav16qi (__a, __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) - { - return (int16x8_t)__builtin_neon_vmlav8hi (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) - { - return (int32x4_t)__builtin_neon_vmlav4si (__a, __b, __c); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) - { - return (float32x4_t)__builtin_neon_vmlav4sf (__a, __b, __c); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) - { - return (uint8x16_t)__builtin_neon_vmlav16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) - { - return (uint16x8_t)__builtin_neon_vmlav8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) - { - return (uint32x4_t)__builtin_neon_vmlav4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c) - { - return (int16x8_t)__builtin_neon_vmlalsv8qi (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) - { - return (int32x4_t)__builtin_neon_vmlalsv4hi (__a, __b, __c); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) - { - return (int64x2_t)__builtin_neon_vmlalsv2si (__a, __b, __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) - { - return (uint16x8_t)__builtin_neon_vmlaluv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) - { - return (uint32x4_t)__builtin_neon_vmlaluv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) - { - return (uint64x2_t)__builtin_neon_vmlaluv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) - { - return (int32x4_t)__builtin_neon_vqdmlalv4hi (__a, __b, __c); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) - { - return (int64x2_t)__builtin_neon_vqdmlalv2si (__a, __b, __c); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmls_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c) - { - return (int8x8_t)__builtin_neon_vmlsv8qi (__a, __b, __c); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmls_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) - { - return (int16x4_t)__builtin_neon_vmlsv4hi (__a, __b, __c); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmls_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) - { - return (int32x2_t)__builtin_neon_vmlsv2si (__a, __b, __c); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmls_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) - { - return (float32x2_t)__builtin_neon_vmlsv2sf (__a, __b, __c); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmls_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) - { - return (uint8x8_t)__builtin_neon_vmlsv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmls_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) - { - return (uint16x4_t)__builtin_neon_vmlsv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmls_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) - { - return (uint32x2_t)__builtin_neon_vmlsv2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c) - { - return (int8x16_t)__builtin_neon_vmlsv16qi (__a, __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) - { - return (int16x8_t)__builtin_neon_vmlsv8hi (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) - { - return (int32x4_t)__builtin_neon_vmlsv4si (__a, __b, __c); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) - { - return (float32x4_t)__builtin_neon_vmlsv4sf (__a, __b, __c); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) - { - return (uint8x16_t)__builtin_neon_vmlsv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) - { - return (uint16x8_t)__builtin_neon_vmlsv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) - { - return (uint32x4_t)__builtin_neon_vmlsv4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsl_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c) - { - return (int16x8_t)__builtin_neon_vmlslsv8qi (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) - { - return (int32x4_t)__builtin_neon_vmlslsv4hi (__a, __b, __c); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) - { - return (int64x2_t)__builtin_neon_vmlslsv2si (__a, __b, __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsl_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) - { - return (uint16x8_t)__builtin_neon_vmlsluv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsl_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) - { - return (uint32x4_t)__builtin_neon_vmlsluv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsl_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) - { - return (uint64x2_t)__builtin_neon_vmlsluv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) - { - return (int32x4_t)__builtin_neon_vqdmlslv4hi (__a, __b, __c); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) - { - return (int64x2_t)__builtin_neon_vqdmlslv2si (__a, __b, __c); -@@ -1532,25 +1700,29 @@ vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) - - #pragma GCC push_options - #pragma GCC target ("fpu=neon-vfpv4") --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vfma_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) - { - return (float32x2_t)__builtin_neon_vfmav2sf (__a, __b, __c); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) - { - return (float32x4_t)__builtin_neon_vfmav4sf (__a, __b, __c); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vfms_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) - { - return (float32x2_t)__builtin_neon_vfmsv2sf (__a, __b, __c); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) - { - return (float32x4_t)__builtin_neon_vfmsv4sf (__a, __b, __c); -@@ -1558,7 +1730,8 @@ vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) - #pragma GCC pop_options - - #if __ARM_ARCH >= 8 --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrndn_f32 (float32x2_t __a) - { - return (float32x2_t)__builtin_neon_vrintnv2sf (__a); -@@ -1566,7 +1739,8 @@ vrndn_f32 (float32x2_t __a) - - #endif - #if __ARM_ARCH >= 8 --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrndnq_f32 (float32x4_t __a) - { - return (float32x4_t)__builtin_neon_vrintnv4sf (__a); -@@ -1574,7 +1748,8 @@ vrndnq_f32 (float32x4_t __a) - - #endif - #if __ARM_ARCH >= 8 --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrnda_f32 (float32x2_t __a) - { - return (float32x2_t)__builtin_neon_vrintav2sf (__a); -@@ -1582,7 +1757,8 @@ vrnda_f32 (float32x2_t __a) - - #endif - #if __ARM_ARCH >= 8 --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrndaq_f32 (float32x4_t __a) - { - return (float32x4_t)__builtin_neon_vrintav4sf (__a); -@@ -1590,7 +1766,8 @@ vrndaq_f32 (float32x4_t __a) - - #endif - #if __ARM_ARCH >= 8 --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrndp_f32 (float32x2_t __a) - { - return (float32x2_t)__builtin_neon_vrintpv2sf (__a); -@@ -1598,7 +1775,8 @@ vrndp_f32 (float32x2_t __a) - - #endif - #if __ARM_ARCH >= 8 --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrndpq_f32 (float32x4_t __a) - { - return (float32x4_t)__builtin_neon_vrintpv4sf (__a); -@@ -1606,7 +1784,8 @@ vrndpq_f32 (float32x4_t __a) - - #endif - #if __ARM_ARCH >= 8 --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrndm_f32 (float32x2_t __a) - { - return (float32x2_t)__builtin_neon_vrintmv2sf (__a); -@@ -1614,7 +1793,8 @@ vrndm_f32 (float32x2_t __a) - - #endif - #if __ARM_ARCH >= 8 --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrndmq_f32 (float32x4_t __a) - { - return (float32x4_t)__builtin_neon_vrintmv4sf (__a); -@@ -1623,7 +1803,8 @@ vrndmq_f32 (float32x4_t __a) - #endif - - #if __ARM_ARCH >= 8 --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrndx_f32 (float32x2_t __a) - { - return (float32x2_t)__builtin_neon_vrintxv2sf (__a); -@@ -1632,7 +1813,8 @@ vrndx_f32 (float32x2_t __a) - #endif - - #if __ARM_ARCH >= 8 --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrndxq_f32 (float32x4_t __a) - { - return (float32x4_t)__builtin_neon_vrintxv4sf (__a); -@@ -1641,7 +1823,8 @@ vrndxq_f32 (float32x4_t __a) - #endif - - #if __ARM_ARCH >= 8 --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrnd_f32 (float32x2_t __a) - { - return (float32x2_t)__builtin_neon_vrintzv2sf (__a); -@@ -1649,7 +1832,8 @@ vrnd_f32 (float32x2_t __a) - - #endif - #if __ARM_ARCH >= 8 --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrndq_f32 (float32x4_t __a) - { - return (float32x4_t)__builtin_neon_vrintzv4sf (__a); -@@ -1657,2907 +1841,3436 @@ vrndq_f32 (float32x4_t __a) - - #endif - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsub_s8 (int8x8_t __a, int8x8_t __b) - { - return __a - __b; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsub_s16 (int16x4_t __a, int16x4_t __b) - { - return __a - __b; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsub_s32 (int32x2_t __a, int32x2_t __b) - { - return __a - __b; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsub_f32 (float32x2_t __a, float32x2_t __b) - { --#ifdef __FAST_MATH -+#ifdef __FAST_MATH__ - return __a - __b; - #else - return (float32x2_t) __builtin_neon_vsubv2sf (__a, __b); - #endif - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsub_u8 (uint8x8_t __a, uint8x8_t __b) - { - return __a - __b; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsub_u16 (uint16x4_t __a, uint16x4_t __b) - { - return __a - __b; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsub_u32 (uint32x2_t __a, uint32x2_t __b) - { - return __a - __b; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsub_s64 (int64x1_t __a, int64x1_t __b) - { - return __a - __b; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsub_u64 (uint64x1_t __a, uint64x1_t __b) - { - return __a - __b; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubq_s8 (int8x16_t __a, int8x16_t __b) - { - return __a - __b; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubq_s16 (int16x8_t __a, int16x8_t __b) - { - return __a - __b; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubq_s32 (int32x4_t __a, int32x4_t __b) - { - return __a - __b; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubq_s64 (int64x2_t __a, int64x2_t __b) - { - return __a - __b; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubq_f32 (float32x4_t __a, float32x4_t __b) - { --#ifdef __FAST_MATH -+#ifdef __FAST_MATH__ - return __a - __b; - #else - return (float32x4_t) __builtin_neon_vsubv4sf (__a, __b); - #endif - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return __a - __b; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return __a - __b; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return __a - __b; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubq_u64 (uint64x2_t __a, uint64x2_t __b) - { - return __a - __b; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubl_s8 (int8x8_t __a, int8x8_t __b) - { - return (int16x8_t)__builtin_neon_vsublsv8qi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubl_s16 (int16x4_t __a, int16x4_t __b) - { - return (int32x4_t)__builtin_neon_vsublsv4hi (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubl_s32 (int32x2_t __a, int32x2_t __b) - { - return (int64x2_t)__builtin_neon_vsublsv2si (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubl_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint16x8_t)__builtin_neon_vsubluv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubl_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint32x4_t)__builtin_neon_vsubluv4hi ((int16x4_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubl_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint64x2_t)__builtin_neon_vsubluv2si ((int32x2_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubw_s8 (int16x8_t __a, int8x8_t __b) - { - return (int16x8_t)__builtin_neon_vsubwsv8qi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubw_s16 (int32x4_t __a, int16x4_t __b) - { - return (int32x4_t)__builtin_neon_vsubwsv4hi (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubw_s32 (int64x2_t __a, int32x2_t __b) - { - return (int64x2_t)__builtin_neon_vsubwsv2si (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubw_u8 (uint16x8_t __a, uint8x8_t __b) - { - return (uint16x8_t)__builtin_neon_vsubwuv8qi ((int16x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubw_u16 (uint32x4_t __a, uint16x4_t __b) - { - return (uint32x4_t)__builtin_neon_vsubwuv4hi ((int32x4_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubw_u32 (uint64x2_t __a, uint32x2_t __b) - { - return (uint64x2_t)__builtin_neon_vsubwuv2si ((int64x2_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsub_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x8_t)__builtin_neon_vhsubsv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsub_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t)__builtin_neon_vhsubsv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsub_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t)__builtin_neon_vhsubsv2si (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsub_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vhsubuv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsub_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vhsubuv4hi ((int16x4_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsub_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vhsubuv2si ((int32x2_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsubq_s8 (int8x16_t __a, int8x16_t __b) - { - return (int8x16_t)__builtin_neon_vhsubsv16qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsubq_s16 (int16x8_t __a, int16x8_t __b) - { - return (int16x8_t)__builtin_neon_vhsubsv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsubq_s32 (int32x4_t __a, int32x4_t __b) - { - return (int32x4_t)__builtin_neon_vhsubsv4si (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsubq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vhsubuv16qi ((int8x16_t) __a, (int8x16_t) __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsubq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vhsubuv8hi ((int16x8_t) __a, (int16x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vhsubq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vhsubuv4si ((int32x4_t) __a, (int32x4_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsub_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x8_t)__builtin_neon_vqsubsv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsub_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t)__builtin_neon_vqsubsv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsub_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t)__builtin_neon_vqsubsv2si (__a, __b); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsub_s64 (int64x1_t __a, int64x1_t __b) - { - return (int64x1_t)__builtin_neon_vqsubsdi (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsub_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vqsubuv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsub_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vqsubuv4hi ((int16x4_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsub_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vqsubuv2si ((int32x2_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsub_u64 (uint64x1_t __a, uint64x1_t __b) - { - return (uint64x1_t)__builtin_neon_vqsubudi ((int64x1_t) __a, (int64x1_t) __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsubq_s8 (int8x16_t __a, int8x16_t __b) - { - return (int8x16_t)__builtin_neon_vqsubsv16qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsubq_s16 (int16x8_t __a, int16x8_t __b) - { - return (int16x8_t)__builtin_neon_vqsubsv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsubq_s32 (int32x4_t __a, int32x4_t __b) - { - return (int32x4_t)__builtin_neon_vqsubsv4si (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsubq_s64 (int64x2_t __a, int64x2_t __b) - { - return (int64x2_t)__builtin_neon_vqsubsv2di (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsubq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vqsubuv16qi ((int8x16_t) __a, (int8x16_t) __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsubq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vqsubuv8hi ((int16x8_t) __a, (int16x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsubq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vqsubuv4si ((int32x4_t) __a, (int32x4_t) __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqsubq_u64 (uint64x2_t __a, uint64x2_t __b) - { - return (uint64x2_t)__builtin_neon_vqsubuv2di ((int64x2_t) __a, (int64x2_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubhn_s16 (int16x8_t __a, int16x8_t __b) - { - return (int8x8_t)__builtin_neon_vsubhnv8hi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubhn_s32 (int32x4_t __a, int32x4_t __b) - { - return (int16x4_t)__builtin_neon_vsubhnv4si (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubhn_s64 (int64x2_t __a, int64x2_t __b) - { - return (int32x2_t)__builtin_neon_vsubhnv2di (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubhn_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint8x8_t)__builtin_neon_vsubhnv8hi ((int16x8_t) __a, (int16x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubhn_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint16x4_t)__builtin_neon_vsubhnv4si ((int32x4_t) __a, (int32x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsubhn_u64 (uint64x2_t __a, uint64x2_t __b) - { - return (uint32x2_t)__builtin_neon_vsubhnv2di ((int64x2_t) __a, (int64x2_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsubhn_s16 (int16x8_t __a, int16x8_t __b) - { - return (int8x8_t)__builtin_neon_vrsubhnv8hi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsubhn_s32 (int32x4_t __a, int32x4_t __b) - { - return (int16x4_t)__builtin_neon_vrsubhnv4si (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsubhn_s64 (int64x2_t __a, int64x2_t __b) - { - return (int32x2_t)__builtin_neon_vrsubhnv2di (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsubhn_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint8x8_t)__builtin_neon_vrsubhnv8hi ((int16x8_t) __a, (int16x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsubhn_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint16x4_t)__builtin_neon_vrsubhnv4si ((int32x4_t) __a, (int32x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsubhn_u64 (uint64x2_t __a, uint64x2_t __b) - { - return (uint32x2_t)__builtin_neon_vrsubhnv2di ((int64x2_t) __a, (int64x2_t) __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vceq_s8 (int8x8_t __a, int8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vceqv8qi (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vceq_s16 (int16x4_t __a, int16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vceqv4hi (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vceq_s32 (int32x2_t __a, int32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vceqv2si (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vceq_f32 (float32x2_t __a, float32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vceqv2sf (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vceq_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vceqv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vceq_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vceqv4hi ((int16x4_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vceq_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vceqv2si ((int32x2_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vceq_p8 (poly8x8_t __a, poly8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vceqv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vceqq_s8 (int8x16_t __a, int8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vceqv16qi (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vceqq_s16 (int16x8_t __a, int16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vceqv8hi (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vceqq_s32 (int32x4_t __a, int32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vceqv4si (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vceqq_f32 (float32x4_t __a, float32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vceqv4sf (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vceqq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vceqv16qi ((int8x16_t) __a, (int8x16_t) __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vceqq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vceqv8hi ((int16x8_t) __a, (int16x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vceqq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vceqv4si ((int32x4_t) __a, (int32x4_t) __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vceqq_p8 (poly8x16_t __a, poly8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vceqv16qi ((int8x16_t) __a, (int8x16_t) __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcge_s8 (int8x8_t __a, int8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vcgev8qi (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcge_s16 (int16x4_t __a, int16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vcgev4hi (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcge_s32 (int32x2_t __a, int32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vcgev2si (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcge_f32 (float32x2_t __a, float32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vcgev2sf (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcge_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vcgeuv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcge_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vcgeuv4hi ((int16x4_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcge_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vcgeuv2si ((int32x2_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcgeq_s8 (int8x16_t __a, int8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vcgev16qi (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcgeq_s16 (int16x8_t __a, int16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vcgev8hi (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcgeq_s32 (int32x4_t __a, int32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vcgev4si (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcgeq_f32 (float32x4_t __a, float32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vcgev4sf (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcgeq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vcgeuv16qi ((int8x16_t) __a, (int8x16_t) __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcgeq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vcgeuv8hi ((int16x8_t) __a, (int16x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcgeq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vcgeuv4si ((int32x4_t) __a, (int32x4_t) __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcle_s8 (int8x8_t __a, int8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vcgev8qi (__b, __a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcle_s16 (int16x4_t __a, int16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vcgev4hi (__b, __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcle_s32 (int32x2_t __a, int32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vcgev2si (__b, __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcle_f32 (float32x2_t __a, float32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vcgev2sf (__b, __a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcle_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vcgeuv8qi ((int8x8_t) __b, (int8x8_t) __a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcle_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vcgeuv4hi ((int16x4_t) __b, (int16x4_t) __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcle_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vcgeuv2si ((int32x2_t) __b, (int32x2_t) __a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcleq_s8 (int8x16_t __a, int8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vcgev16qi (__b, __a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcleq_s16 (int16x8_t __a, int16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vcgev8hi (__b, __a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcleq_s32 (int32x4_t __a, int32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vcgev4si (__b, __a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcleq_f32 (float32x4_t __a, float32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vcgev4sf (__b, __a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcleq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vcgeuv16qi ((int8x16_t) __b, (int8x16_t) __a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcleq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vcgeuv8hi ((int16x8_t) __b, (int16x8_t) __a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcleq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vcgeuv4si ((int32x4_t) __b, (int32x4_t) __a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcgt_s8 (int8x8_t __a, int8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vcgtv8qi (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcgt_s16 (int16x4_t __a, int16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vcgtv4hi (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcgt_s32 (int32x2_t __a, int32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vcgtv2si (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcgt_f32 (float32x2_t __a, float32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vcgtv2sf (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcgt_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vcgtuv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcgt_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vcgtuv4hi ((int16x4_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcgt_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vcgtuv2si ((int32x2_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcgtq_s8 (int8x16_t __a, int8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vcgtv16qi (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcgtq_s16 (int16x8_t __a, int16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vcgtv8hi (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcgtq_s32 (int32x4_t __a, int32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vcgtv4si (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcgtq_f32 (float32x4_t __a, float32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vcgtv4sf (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcgtq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vcgtuv16qi ((int8x16_t) __a, (int8x16_t) __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcgtq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vcgtuv8hi ((int16x8_t) __a, (int16x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcgtq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vcgtuv4si ((int32x4_t) __a, (int32x4_t) __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vclt_s8 (int8x8_t __a, int8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vcgtv8qi (__b, __a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vclt_s16 (int16x4_t __a, int16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vcgtv4hi (__b, __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vclt_s32 (int32x2_t __a, int32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vcgtv2si (__b, __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vclt_f32 (float32x2_t __a, float32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vcgtv2sf (__b, __a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vclt_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vcgtuv8qi ((int8x8_t) __b, (int8x8_t) __a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vclt_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vcgtuv4hi ((int16x4_t) __b, (int16x4_t) __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vclt_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vcgtuv2si ((int32x2_t) __b, (int32x2_t) __a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcltq_s8 (int8x16_t __a, int8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vcgtv16qi (__b, __a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcltq_s16 (int16x8_t __a, int16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vcgtv8hi (__b, __a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcltq_s32 (int32x4_t __a, int32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vcgtv4si (__b, __a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcltq_f32 (float32x4_t __a, float32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vcgtv4sf (__b, __a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcltq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vcgtuv16qi ((int8x16_t) __b, (int8x16_t) __a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcltq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vcgtuv8hi ((int16x8_t) __b, (int16x8_t) __a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcltq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vcgtuv4si ((int32x4_t) __b, (int32x4_t) __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcage_f32 (float32x2_t __a, float32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vcagev2sf (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcageq_f32 (float32x4_t __a, float32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vcagev4sf (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcale_f32 (float32x2_t __a, float32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vcagev2sf (__b, __a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcaleq_f32 (float32x4_t __a, float32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vcagev4sf (__b, __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcagt_f32 (float32x2_t __a, float32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vcagtv2sf (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcagtq_f32 (float32x4_t __a, float32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vcagtv4sf (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcalt_f32 (float32x2_t __a, float32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vcagtv2sf (__b, __a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcaltq_f32 (float32x4_t __a, float32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vcagtv4sf (__b, __a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtst_s8 (int8x8_t __a, int8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vtstv8qi (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtst_s16 (int16x4_t __a, int16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vtstv4hi (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtst_s32 (int32x2_t __a, int32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vtstv2si (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtst_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vtstv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtst_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vtstv4hi ((int16x4_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtst_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vtstv2si ((int32x2_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtst_p8 (poly8x8_t __a, poly8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vtstv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtst_p16 (poly16x4_t __a, poly16x4_t __b) -+{ -+ return (uint16x4_t)__builtin_neon_vtstv4hi ((int16x4_t) __a, (int16x4_t) __b); -+} -+ -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtstq_s8 (int8x16_t __a, int8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vtstv16qi (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtstq_s16 (int16x8_t __a, int16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vtstv8hi (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtstq_s32 (int32x4_t __a, int32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vtstv4si (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtstq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vtstv16qi ((int8x16_t) __a, (int8x16_t) __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtstq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vtstv8hi ((int16x8_t) __a, (int16x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtstq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vtstv4si ((int32x4_t) __a, (int32x4_t) __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtstq_p8 (poly8x16_t __a, poly8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vtstv16qi ((int8x16_t) __a, (int8x16_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtstq_p16 (poly16x8_t __a, poly16x8_t __b) -+{ -+ return (uint16x8_t)__builtin_neon_vtstv8hi ((int16x8_t) __a, (int16x8_t) __b); -+} -+ -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabd_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x8_t)__builtin_neon_vabdsv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabd_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t)__builtin_neon_vabdsv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabd_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t)__builtin_neon_vabdsv2si (__a, __b); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabd_f32 (float32x2_t __a, float32x2_t __b) - { - return (float32x2_t)__builtin_neon_vabdfv2sf (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabd_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vabduv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabd_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vabduv4hi ((int16x4_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabd_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vabduv2si ((int32x2_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdq_s8 (int8x16_t __a, int8x16_t __b) - { - return (int8x16_t)__builtin_neon_vabdsv16qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdq_s16 (int16x8_t __a, int16x8_t __b) - { - return (int16x8_t)__builtin_neon_vabdsv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdq_s32 (int32x4_t __a, int32x4_t __b) - { - return (int32x4_t)__builtin_neon_vabdsv4si (__a, __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdq_f32 (float32x4_t __a, float32x4_t __b) - { - return (float32x4_t)__builtin_neon_vabdfv4sf (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vabduv16qi ((int8x16_t) __a, (int8x16_t) __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vabduv8hi ((int16x8_t) __a, (int16x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vabduv4si ((int32x4_t) __a, (int32x4_t) __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdl_s8 (int8x8_t __a, int8x8_t __b) - { - return (int16x8_t)__builtin_neon_vabdlsv8qi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdl_s16 (int16x4_t __a, int16x4_t __b) - { - return (int32x4_t)__builtin_neon_vabdlsv4hi (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdl_s32 (int32x2_t __a, int32x2_t __b) - { - return (int64x2_t)__builtin_neon_vabdlsv2si (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdl_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint16x8_t)__builtin_neon_vabdluv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdl_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint32x4_t)__builtin_neon_vabdluv4hi ((int16x4_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabdl_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint64x2_t)__builtin_neon_vabdluv2si ((int32x2_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaba_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c) - { - return (int8x8_t)__builtin_neon_vabasv8qi (__a, __b, __c); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaba_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) - { - return (int16x4_t)__builtin_neon_vabasv4hi (__a, __b, __c); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaba_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) - { - return (int32x2_t)__builtin_neon_vabasv2si (__a, __b, __c); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaba_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) - { - return (uint8x8_t)__builtin_neon_vabauv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaba_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) - { - return (uint16x4_t)__builtin_neon_vabauv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaba_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) - { - return (uint32x2_t)__builtin_neon_vabauv2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c) - { - return (int8x16_t)__builtin_neon_vabasv16qi (__a, __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) - { - return (int16x8_t)__builtin_neon_vabasv8hi (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) - { - return (int32x4_t)__builtin_neon_vabasv4si (__a, __b, __c); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) - { - return (uint8x16_t)__builtin_neon_vabauv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) - { - return (uint16x8_t)__builtin_neon_vabauv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) - { - return (uint32x4_t)__builtin_neon_vabauv4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c) - { - return (int16x8_t)__builtin_neon_vabalsv8qi (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) - { - return (int32x4_t)__builtin_neon_vabalsv4hi (__a, __b, __c); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) - { - return (int64x2_t)__builtin_neon_vabalsv2si (__a, __b, __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) - { - return (uint16x8_t)__builtin_neon_vabaluv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) - { - return (uint32x4_t)__builtin_neon_vabaluv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) - { - return (uint64x2_t)__builtin_neon_vabaluv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmax_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x8_t)__builtin_neon_vmaxsv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmax_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t)__builtin_neon_vmaxsv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmax_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t)__builtin_neon_vmaxsv2si (__a, __b); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmax_f32 (float32x2_t __a, float32x2_t __b) - { - return (float32x2_t)__builtin_neon_vmaxfv2sf (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmax_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vmaxuv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmax_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vmaxuv4hi ((int16x4_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmax_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vmaxuv2si ((int32x2_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmaxq_s8 (int8x16_t __a, int8x16_t __b) - { - return (int8x16_t)__builtin_neon_vmaxsv16qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmaxq_s16 (int16x8_t __a, int16x8_t __b) - { - return (int16x8_t)__builtin_neon_vmaxsv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmaxq_s32 (int32x4_t __a, int32x4_t __b) - { - return (int32x4_t)__builtin_neon_vmaxsv4si (__a, __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmaxq_f32 (float32x4_t __a, float32x4_t __b) - { - return (float32x4_t)__builtin_neon_vmaxfv4sf (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+#pragma GCC push_options -+#pragma GCC target ("fpu=neon-fp-armv8") -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxnm_f32 (float32x2_t a, float32x2_t b) -+{ -+ return (float32x2_t)__builtin_neon_vmaxnmv2sf (a, b); -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxnmq_f32 (float32x4_t a, float32x4_t b) -+{ -+ return (float32x4_t)__builtin_neon_vmaxnmv4sf (a, b); -+} -+ -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminnm_f32 (float32x2_t a, float32x2_t b) -+{ -+ return (float32x2_t)__builtin_neon_vminnmv2sf (a, b); -+} -+ -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminnmq_f32 (float32x4_t a, float32x4_t b) -+{ -+ return (float32x4_t)__builtin_neon_vminnmv4sf (a, b); -+} -+#pragma GCC pop_options -+ -+ -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmaxq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vmaxuv16qi ((int8x16_t) __a, (int8x16_t) __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmaxq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vmaxuv8hi ((int16x8_t) __a, (int16x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmaxq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vmaxuv4si ((int32x4_t) __a, (int32x4_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmin_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x8_t)__builtin_neon_vminsv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmin_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t)__builtin_neon_vminsv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmin_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t)__builtin_neon_vminsv2si (__a, __b); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmin_f32 (float32x2_t __a, float32x2_t __b) - { - return (float32x2_t)__builtin_neon_vminfv2sf (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmin_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vminuv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmin_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vminuv4hi ((int16x4_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmin_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vminuv2si ((int32x2_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vminq_s8 (int8x16_t __a, int8x16_t __b) - { - return (int8x16_t)__builtin_neon_vminsv16qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vminq_s16 (int16x8_t __a, int16x8_t __b) - { - return (int16x8_t)__builtin_neon_vminsv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vminq_s32 (int32x4_t __a, int32x4_t __b) - { - return (int32x4_t)__builtin_neon_vminsv4si (__a, __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vminq_f32 (float32x4_t __a, float32x4_t __b) - { - return (float32x4_t)__builtin_neon_vminfv4sf (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vminq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vminuv16qi ((int8x16_t) __a, (int8x16_t) __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vminq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vminuv8hi ((int16x8_t) __a, (int16x8_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vminq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vminuv4si ((int32x4_t) __a, (int32x4_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpadd_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x8_t)__builtin_neon_vpaddv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpadd_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t)__builtin_neon_vpaddv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpadd_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t)__builtin_neon_vpaddv2si (__a, __b); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpadd_f32 (float32x2_t __a, float32x2_t __b) - { - return (float32x2_t)__builtin_neon_vpaddv2sf (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpadd_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vpaddv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpadd_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vpaddv4hi ((int16x4_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpadd_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vpaddv2si ((int32x2_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpaddl_s8 (int8x8_t __a) - { - return (int16x4_t)__builtin_neon_vpaddlsv8qi (__a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpaddl_s16 (int16x4_t __a) - { - return (int32x2_t)__builtin_neon_vpaddlsv4hi (__a); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpaddl_s32 (int32x2_t __a) - { - return (int64x1_t)__builtin_neon_vpaddlsv2si (__a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpaddl_u8 (uint8x8_t __a) - { - return (uint16x4_t)__builtin_neon_vpaddluv8qi ((int8x8_t) __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpaddl_u16 (uint16x4_t __a) - { - return (uint32x2_t)__builtin_neon_vpaddluv4hi ((int16x4_t) __a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpaddl_u32 (uint32x2_t __a) - { - return (uint64x1_t)__builtin_neon_vpaddluv2si ((int32x2_t) __a); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpaddlq_s8 (int8x16_t __a) - { - return (int16x8_t)__builtin_neon_vpaddlsv16qi (__a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpaddlq_s16 (int16x8_t __a) - { - return (int32x4_t)__builtin_neon_vpaddlsv8hi (__a); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpaddlq_s32 (int32x4_t __a) - { - return (int64x2_t)__builtin_neon_vpaddlsv4si (__a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpaddlq_u8 (uint8x16_t __a) - { - return (uint16x8_t)__builtin_neon_vpaddluv16qi ((int8x16_t) __a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpaddlq_u16 (uint16x8_t __a) - { - return (uint32x4_t)__builtin_neon_vpaddluv8hi ((int16x8_t) __a); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpaddlq_u32 (uint32x4_t __a) - { - return (uint64x2_t)__builtin_neon_vpaddluv4si ((int32x4_t) __a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpadal_s8 (int16x4_t __a, int8x8_t __b) - { - return (int16x4_t)__builtin_neon_vpadalsv8qi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpadal_s16 (int32x2_t __a, int16x4_t __b) - { - return (int32x2_t)__builtin_neon_vpadalsv4hi (__a, __b); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpadal_s32 (int64x1_t __a, int32x2_t __b) - { - return (int64x1_t)__builtin_neon_vpadalsv2si (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpadal_u8 (uint16x4_t __a, uint8x8_t __b) - { - return (uint16x4_t)__builtin_neon_vpadaluv8qi ((int16x4_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpadal_u16 (uint32x2_t __a, uint16x4_t __b) - { - return (uint32x2_t)__builtin_neon_vpadaluv4hi ((int32x2_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpadal_u32 (uint64x1_t __a, uint32x2_t __b) - { - return (uint64x1_t)__builtin_neon_vpadaluv2si ((int64x1_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpadalq_s8 (int16x8_t __a, int8x16_t __b) - { - return (int16x8_t)__builtin_neon_vpadalsv16qi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpadalq_s16 (int32x4_t __a, int16x8_t __b) - { - return (int32x4_t)__builtin_neon_vpadalsv8hi (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpadalq_s32 (int64x2_t __a, int32x4_t __b) - { - return (int64x2_t)__builtin_neon_vpadalsv4si (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpadalq_u8 (uint16x8_t __a, uint8x16_t __b) - { - return (uint16x8_t)__builtin_neon_vpadaluv16qi ((int16x8_t) __a, (int8x16_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpadalq_u16 (uint32x4_t __a, uint16x8_t __b) - { - return (uint32x4_t)__builtin_neon_vpadaluv8hi ((int32x4_t) __a, (int16x8_t) __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpadalq_u32 (uint64x2_t __a, uint32x4_t __b) - { - return (uint64x2_t)__builtin_neon_vpadaluv4si ((int64x2_t) __a, (int32x4_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpmax_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x8_t)__builtin_neon_vpmaxsv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpmax_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t)__builtin_neon_vpmaxsv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpmax_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t)__builtin_neon_vpmaxsv2si (__a, __b); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpmax_f32 (float32x2_t __a, float32x2_t __b) - { - return (float32x2_t)__builtin_neon_vpmaxfv2sf (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpmax_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vpmaxuv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpmax_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vpmaxuv4hi ((int16x4_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpmax_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vpmaxuv2si ((int32x2_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpmin_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x8_t)__builtin_neon_vpminsv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpmin_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t)__builtin_neon_vpminsv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpmin_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t)__builtin_neon_vpminsv2si (__a, __b); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpmin_f32 (float32x2_t __a, float32x2_t __b) - { - return (float32x2_t)__builtin_neon_vpminfv2sf (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpmin_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vpminuv8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpmin_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vpminuv4hi ((int16x4_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vpmin_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vpminuv2si ((int32x2_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrecps_f32 (float32x2_t __a, float32x2_t __b) - { - return (float32x2_t)__builtin_neon_vrecpsv2sf (__a, __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrecpsq_f32 (float32x4_t __a, float32x4_t __b) - { - return (float32x4_t)__builtin_neon_vrecpsv4sf (__a, __b); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsqrts_f32 (float32x2_t __a, float32x2_t __b) - { - return (float32x2_t)__builtin_neon_vrsqrtsv2sf (__a, __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsqrtsq_f32 (float32x4_t __a, float32x4_t __b) - { - return (float32x4_t)__builtin_neon_vrsqrtsv4sf (__a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshl_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x8_t)__builtin_neon_vshlsv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshl_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t)__builtin_neon_vshlsv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshl_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t)__builtin_neon_vshlsv2si (__a, __b); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshl_s64 (int64x1_t __a, int64x1_t __b) - { - return (int64x1_t)__builtin_neon_vshlsdi (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshl_u8 (uint8x8_t __a, int8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vshluv8qi ((int8x8_t) __a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshl_u16 (uint16x4_t __a, int16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vshluv4hi ((int16x4_t) __a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshl_u32 (uint32x2_t __a, int32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vshluv2si ((int32x2_t) __a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshl_u64 (uint64x1_t __a, int64x1_t __b) - { - return (uint64x1_t)__builtin_neon_vshludi ((int64x1_t) __a, __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshlq_s8 (int8x16_t __a, int8x16_t __b) - { - return (int8x16_t)__builtin_neon_vshlsv16qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshlq_s16 (int16x8_t __a, int16x8_t __b) - { - return (int16x8_t)__builtin_neon_vshlsv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshlq_s32 (int32x4_t __a, int32x4_t __b) - { - return (int32x4_t)__builtin_neon_vshlsv4si (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshlq_s64 (int64x2_t __a, int64x2_t __b) - { - return (int64x2_t)__builtin_neon_vshlsv2di (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshlq_u8 (uint8x16_t __a, int8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vshluv16qi ((int8x16_t) __a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshlq_u16 (uint16x8_t __a, int16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vshluv8hi ((int16x8_t) __a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshlq_u32 (uint32x4_t __a, int32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vshluv4si ((int32x4_t) __a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshlq_u64 (uint64x2_t __a, int64x2_t __b) - { - return (uint64x2_t)__builtin_neon_vshluv2di ((int64x2_t) __a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshl_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x8_t)__builtin_neon_vrshlsv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshl_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t)__builtin_neon_vrshlsv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshl_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t)__builtin_neon_vrshlsv2si (__a, __b); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshl_s64 (int64x1_t __a, int64x1_t __b) - { - return (int64x1_t)__builtin_neon_vrshlsdi (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshl_u8 (uint8x8_t __a, int8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vrshluv8qi ((int8x8_t) __a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshl_u16 (uint16x4_t __a, int16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vrshluv4hi ((int16x4_t) __a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshl_u32 (uint32x2_t __a, int32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vrshluv2si ((int32x2_t) __a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshl_u64 (uint64x1_t __a, int64x1_t __b) - { - return (uint64x1_t)__builtin_neon_vrshludi ((int64x1_t) __a, __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshlq_s8 (int8x16_t __a, int8x16_t __b) - { - return (int8x16_t)__builtin_neon_vrshlsv16qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshlq_s16 (int16x8_t __a, int16x8_t __b) - { - return (int16x8_t)__builtin_neon_vrshlsv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshlq_s32 (int32x4_t __a, int32x4_t __b) - { - return (int32x4_t)__builtin_neon_vrshlsv4si (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshlq_s64 (int64x2_t __a, int64x2_t __b) - { - return (int64x2_t)__builtin_neon_vrshlsv2di (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshlq_u8 (uint8x16_t __a, int8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vrshluv16qi ((int8x16_t) __a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshlq_u16 (uint16x8_t __a, int16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vrshluv8hi ((int16x8_t) __a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshlq_u32 (uint32x4_t __a, int32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vrshluv4si ((int32x4_t) __a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshlq_u64 (uint64x2_t __a, int64x2_t __b) - { - return (uint64x2_t)__builtin_neon_vrshluv2di ((int64x2_t) __a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshl_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x8_t)__builtin_neon_vqshlsv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshl_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t)__builtin_neon_vqshlsv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshl_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t)__builtin_neon_vqshlsv2si (__a, __b); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshl_s64 (int64x1_t __a, int64x1_t __b) - { - return (int64x1_t)__builtin_neon_vqshlsdi (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshl_u8 (uint8x8_t __a, int8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vqshluv8qi ((int8x8_t) __a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshl_u16 (uint16x4_t __a, int16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vqshluv4hi ((int16x4_t) __a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshl_u32 (uint32x2_t __a, int32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vqshluv2si ((int32x2_t) __a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshl_u64 (uint64x1_t __a, int64x1_t __b) - { - return (uint64x1_t)__builtin_neon_vqshludi ((int64x1_t) __a, __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshlq_s8 (int8x16_t __a, int8x16_t __b) - { - return (int8x16_t)__builtin_neon_vqshlsv16qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshlq_s16 (int16x8_t __a, int16x8_t __b) - { - return (int16x8_t)__builtin_neon_vqshlsv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshlq_s32 (int32x4_t __a, int32x4_t __b) - { - return (int32x4_t)__builtin_neon_vqshlsv4si (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshlq_s64 (int64x2_t __a, int64x2_t __b) - { - return (int64x2_t)__builtin_neon_vqshlsv2di (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshlq_u8 (uint8x16_t __a, int8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vqshluv16qi ((int8x16_t) __a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshlq_u16 (uint16x8_t __a, int16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vqshluv8hi ((int16x8_t) __a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshlq_u32 (uint32x4_t __a, int32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vqshluv4si ((int32x4_t) __a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshlq_u64 (uint64x2_t __a, int64x2_t __b) - { - return (uint64x2_t)__builtin_neon_vqshluv2di ((int64x2_t) __a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshl_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x8_t)__builtin_neon_vqrshlsv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshl_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x4_t)__builtin_neon_vqrshlsv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshl_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x2_t)__builtin_neon_vqrshlsv2si (__a, __b); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshl_s64 (int64x1_t __a, int64x1_t __b) - { - return (int64x1_t)__builtin_neon_vqrshlsdi (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshl_u8 (uint8x8_t __a, int8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vqrshluv8qi ((int8x8_t) __a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshl_u16 (uint16x4_t __a, int16x4_t __b) - { - return (uint16x4_t)__builtin_neon_vqrshluv4hi ((int16x4_t) __a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshl_u32 (uint32x2_t __a, int32x2_t __b) - { - return (uint32x2_t)__builtin_neon_vqrshluv2si ((int32x2_t) __a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshl_u64 (uint64x1_t __a, int64x1_t __b) - { - return (uint64x1_t)__builtin_neon_vqrshludi ((int64x1_t) __a, __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshlq_s8 (int8x16_t __a, int8x16_t __b) - { - return (int8x16_t)__builtin_neon_vqrshlsv16qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshlq_s16 (int16x8_t __a, int16x8_t __b) - { - return (int16x8_t)__builtin_neon_vqrshlsv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshlq_s32 (int32x4_t __a, int32x4_t __b) - { - return (int32x4_t)__builtin_neon_vqrshlsv4si (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshlq_s64 (int64x2_t __a, int64x2_t __b) - { - return (int64x2_t)__builtin_neon_vqrshlsv2di (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshlq_u8 (uint8x16_t __a, int8x16_t __b) - { - return (uint8x16_t)__builtin_neon_vqrshluv16qi ((int8x16_t) __a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshlq_u16 (uint16x8_t __a, int16x8_t __b) - { - return (uint16x8_t)__builtin_neon_vqrshluv8hi ((int16x8_t) __a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshlq_u32 (uint32x4_t __a, int32x4_t __b) - { - return (uint32x4_t)__builtin_neon_vqrshluv4si ((int32x4_t) __a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshlq_u64 (uint64x2_t __a, int64x2_t __b) - { - return (uint64x2_t)__builtin_neon_vqrshluv2di ((int64x2_t) __a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshr_n_s8 (int8x8_t __a, const int __b) - { - return (int8x8_t)__builtin_neon_vshrs_nv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshr_n_s16 (int16x4_t __a, const int __b) - { - return (int16x4_t)__builtin_neon_vshrs_nv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshr_n_s32 (int32x2_t __a, const int __b) - { - return (int32x2_t)__builtin_neon_vshrs_nv2si (__a, __b); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshr_n_s64 (int64x1_t __a, const int __b) - { - return (int64x1_t)__builtin_neon_vshrs_ndi (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshr_n_u8 (uint8x8_t __a, const int __b) - { - return (uint8x8_t)__builtin_neon_vshru_nv8qi ((int8x8_t) __a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshr_n_u16 (uint16x4_t __a, const int __b) - { - return (uint16x4_t)__builtin_neon_vshru_nv4hi ((int16x4_t) __a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshr_n_u32 (uint32x2_t __a, const int __b) - { - return (uint32x2_t)__builtin_neon_vshru_nv2si ((int32x2_t) __a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshr_n_u64 (uint64x1_t __a, const int __b) - { - return (uint64x1_t)__builtin_neon_vshru_ndi ((int64x1_t) __a, __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshrq_n_s8 (int8x16_t __a, const int __b) - { - return (int8x16_t)__builtin_neon_vshrs_nv16qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshrq_n_s16 (int16x8_t __a, const int __b) - { - return (int16x8_t)__builtin_neon_vshrs_nv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshrq_n_s32 (int32x4_t __a, const int __b) - { - return (int32x4_t)__builtin_neon_vshrs_nv4si (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshrq_n_s64 (int64x2_t __a, const int __b) - { - return (int64x2_t)__builtin_neon_vshrs_nv2di (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshrq_n_u8 (uint8x16_t __a, const int __b) - { - return (uint8x16_t)__builtin_neon_vshru_nv16qi ((int8x16_t) __a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshrq_n_u16 (uint16x8_t __a, const int __b) - { - return (uint16x8_t)__builtin_neon_vshru_nv8hi ((int16x8_t) __a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshrq_n_u32 (uint32x4_t __a, const int __b) - { - return (uint32x4_t)__builtin_neon_vshru_nv4si ((int32x4_t) __a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshrq_n_u64 (uint64x2_t __a, const int __b) - { - return (uint64x2_t)__builtin_neon_vshru_nv2di ((int64x2_t) __a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshr_n_s8 (int8x8_t __a, const int __b) - { - return (int8x8_t)__builtin_neon_vrshrs_nv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshr_n_s16 (int16x4_t __a, const int __b) - { - return (int16x4_t)__builtin_neon_vrshrs_nv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshr_n_s32 (int32x2_t __a, const int __b) - { - return (int32x2_t)__builtin_neon_vrshrs_nv2si (__a, __b); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshr_n_s64 (int64x1_t __a, const int __b) - { - return (int64x1_t)__builtin_neon_vrshrs_ndi (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshr_n_u8 (uint8x8_t __a, const int __b) - { - return (uint8x8_t)__builtin_neon_vrshru_nv8qi ((int8x8_t) __a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshr_n_u16 (uint16x4_t __a, const int __b) - { - return (uint16x4_t)__builtin_neon_vrshru_nv4hi ((int16x4_t) __a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshr_n_u32 (uint32x2_t __a, const int __b) - { - return (uint32x2_t)__builtin_neon_vrshru_nv2si ((int32x2_t) __a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshr_n_u64 (uint64x1_t __a, const int __b) - { - return (uint64x1_t)__builtin_neon_vrshru_ndi ((int64x1_t) __a, __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshrq_n_s8 (int8x16_t __a, const int __b) - { - return (int8x16_t)__builtin_neon_vrshrs_nv16qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshrq_n_s16 (int16x8_t __a, const int __b) - { - return (int16x8_t)__builtin_neon_vrshrs_nv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshrq_n_s32 (int32x4_t __a, const int __b) - { - return (int32x4_t)__builtin_neon_vrshrs_nv4si (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshrq_n_s64 (int64x2_t __a, const int __b) - { - return (int64x2_t)__builtin_neon_vrshrs_nv2di (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshrq_n_u8 (uint8x16_t __a, const int __b) - { - return (uint8x16_t)__builtin_neon_vrshru_nv16qi ((int8x16_t) __a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshrq_n_u16 (uint16x8_t __a, const int __b) - { - return (uint16x8_t)__builtin_neon_vrshru_nv8hi ((int16x8_t) __a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshrq_n_u32 (uint32x4_t __a, const int __b) - { - return (uint32x4_t)__builtin_neon_vrshru_nv4si ((int32x4_t) __a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshrq_n_u64 (uint64x2_t __a, const int __b) - { - return (uint64x2_t)__builtin_neon_vrshru_nv2di ((int64x2_t) __a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshrn_n_s16 (int16x8_t __a, const int __b) - { - return (int8x8_t)__builtin_neon_vshrn_nv8hi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshrn_n_s32 (int32x4_t __a, const int __b) - { - return (int16x4_t)__builtin_neon_vshrn_nv4si (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshrn_n_s64 (int64x2_t __a, const int __b) - { - return (int32x2_t)__builtin_neon_vshrn_nv2di (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshrn_n_u16 (uint16x8_t __a, const int __b) - { - return (uint8x8_t)__builtin_neon_vshrn_nv8hi ((int16x8_t) __a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshrn_n_u32 (uint32x4_t __a, const int __b) - { - return (uint16x4_t)__builtin_neon_vshrn_nv4si ((int32x4_t) __a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshrn_n_u64 (uint64x2_t __a, const int __b) - { - return (uint32x2_t)__builtin_neon_vshrn_nv2di ((int64x2_t) __a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshrn_n_s16 (int16x8_t __a, const int __b) - { - return (int8x8_t)__builtin_neon_vrshrn_nv8hi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshrn_n_s32 (int32x4_t __a, const int __b) - { - return (int16x4_t)__builtin_neon_vrshrn_nv4si (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshrn_n_s64 (int64x2_t __a, const int __b) - { - return (int32x2_t)__builtin_neon_vrshrn_nv2di (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshrn_n_u16 (uint16x8_t __a, const int __b) - { - return (uint8x8_t)__builtin_neon_vrshrn_nv8hi ((int16x8_t) __a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshrn_n_u32 (uint32x4_t __a, const int __b) - { - return (uint16x4_t)__builtin_neon_vrshrn_nv4si ((int32x4_t) __a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrshrn_n_u64 (uint64x2_t __a, const int __b) - { - return (uint32x2_t)__builtin_neon_vrshrn_nv2di ((int64x2_t) __a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshrn_n_s16 (int16x8_t __a, const int __b) - { - return (int8x8_t)__builtin_neon_vqshrns_nv8hi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshrn_n_s32 (int32x4_t __a, const int __b) - { - return (int16x4_t)__builtin_neon_vqshrns_nv4si (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshrn_n_s64 (int64x2_t __a, const int __b) - { - return (int32x2_t)__builtin_neon_vqshrns_nv2di (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshrn_n_u16 (uint16x8_t __a, const int __b) - { - return (uint8x8_t)__builtin_neon_vqshrnu_nv8hi ((int16x8_t) __a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshrn_n_u32 (uint32x4_t __a, const int __b) - { - return (uint16x4_t)__builtin_neon_vqshrnu_nv4si ((int32x4_t) __a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshrn_n_u64 (uint64x2_t __a, const int __b) - { - return (uint32x2_t)__builtin_neon_vqshrnu_nv2di ((int64x2_t) __a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshrn_n_s16 (int16x8_t __a, const int __b) - { - return (int8x8_t)__builtin_neon_vqrshrns_nv8hi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshrn_n_s32 (int32x4_t __a, const int __b) - { - return (int16x4_t)__builtin_neon_vqrshrns_nv4si (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshrn_n_s64 (int64x2_t __a, const int __b) - { - return (int32x2_t)__builtin_neon_vqrshrns_nv2di (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshrn_n_u16 (uint16x8_t __a, const int __b) - { - return (uint8x8_t)__builtin_neon_vqrshrnu_nv8hi ((int16x8_t) __a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshrn_n_u32 (uint32x4_t __a, const int __b) - { - return (uint16x4_t)__builtin_neon_vqrshrnu_nv4si ((int32x4_t) __a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshrn_n_u64 (uint64x2_t __a, const int __b) - { - return (uint32x2_t)__builtin_neon_vqrshrnu_nv2di ((int64x2_t) __a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshrun_n_s16 (int16x8_t __a, const int __b) - { - return (uint8x8_t)__builtin_neon_vqshrun_nv8hi (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshrun_n_s32 (int32x4_t __a, const int __b) - { - return (uint16x4_t)__builtin_neon_vqshrun_nv4si (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshrun_n_s64 (int64x2_t __a, const int __b) - { - return (uint32x2_t)__builtin_neon_vqshrun_nv2di (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshrun_n_s16 (int16x8_t __a, const int __b) - { - return (uint8x8_t)__builtin_neon_vqrshrun_nv8hi (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshrun_n_s32 (int32x4_t __a, const int __b) - { - return (uint16x4_t)__builtin_neon_vqrshrun_nv4si (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrshrun_n_s64 (int64x2_t __a, const int __b) - { - return (uint32x2_t)__builtin_neon_vqrshrun_nv2di (__a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshl_n_s8 (int8x8_t __a, const int __b) - { - return (int8x8_t)__builtin_neon_vshl_nv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshl_n_s16 (int16x4_t __a, const int __b) - { - return (int16x4_t)__builtin_neon_vshl_nv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshl_n_s32 (int32x2_t __a, const int __b) - { - return (int32x2_t)__builtin_neon_vshl_nv2si (__a, __b); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshl_n_s64 (int64x1_t __a, const int __b) - { - return (int64x1_t)__builtin_neon_vshl_ndi (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshl_n_u8 (uint8x8_t __a, const int __b) - { - return (uint8x8_t)__builtin_neon_vshl_nv8qi ((int8x8_t) __a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshl_n_u16 (uint16x4_t __a, const int __b) - { - return (uint16x4_t)__builtin_neon_vshl_nv4hi ((int16x4_t) __a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshl_n_u32 (uint32x2_t __a, const int __b) - { - return (uint32x2_t)__builtin_neon_vshl_nv2si ((int32x2_t) __a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshl_n_u64 (uint64x1_t __a, const int __b) - { - return (uint64x1_t)__builtin_neon_vshl_ndi ((int64x1_t) __a, __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshlq_n_s8 (int8x16_t __a, const int __b) - { - return (int8x16_t)__builtin_neon_vshl_nv16qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshlq_n_s16 (int16x8_t __a, const int __b) - { - return (int16x8_t)__builtin_neon_vshl_nv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshlq_n_s32 (int32x4_t __a, const int __b) - { - return (int32x4_t)__builtin_neon_vshl_nv4si (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshlq_n_s64 (int64x2_t __a, const int __b) - { - return (int64x2_t)__builtin_neon_vshl_nv2di (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshlq_n_u8 (uint8x16_t __a, const int __b) - { - return (uint8x16_t)__builtin_neon_vshl_nv16qi ((int8x16_t) __a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshlq_n_u16 (uint16x8_t __a, const int __b) - { - return (uint16x8_t)__builtin_neon_vshl_nv8hi ((int16x8_t) __a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshlq_n_u32 (uint32x4_t __a, const int __b) - { - return (uint32x4_t)__builtin_neon_vshl_nv4si ((int32x4_t) __a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshlq_n_u64 (uint64x2_t __a, const int __b) - { - return (uint64x2_t)__builtin_neon_vshl_nv2di ((int64x2_t) __a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshl_n_s8 (int8x8_t __a, const int __b) - { - return (int8x8_t)__builtin_neon_vqshl_s_nv8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshl_n_s16 (int16x4_t __a, const int __b) - { - return (int16x4_t)__builtin_neon_vqshl_s_nv4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshl_n_s32 (int32x2_t __a, const int __b) - { - return (int32x2_t)__builtin_neon_vqshl_s_nv2si (__a, __b); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshl_n_s64 (int64x1_t __a, const int __b) - { - return (int64x1_t)__builtin_neon_vqshl_s_ndi (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshl_n_u8 (uint8x8_t __a, const int __b) - { - return (uint8x8_t)__builtin_neon_vqshl_u_nv8qi ((int8x8_t) __a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshl_n_u16 (uint16x4_t __a, const int __b) - { - return (uint16x4_t)__builtin_neon_vqshl_u_nv4hi ((int16x4_t) __a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshl_n_u32 (uint32x2_t __a, const int __b) - { - return (uint32x2_t)__builtin_neon_vqshl_u_nv2si ((int32x2_t) __a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshl_n_u64 (uint64x1_t __a, const int __b) - { - return (uint64x1_t)__builtin_neon_vqshl_u_ndi ((int64x1_t) __a, __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshlq_n_s8 (int8x16_t __a, const int __b) - { - return (int8x16_t)__builtin_neon_vqshl_s_nv16qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshlq_n_s16 (int16x8_t __a, const int __b) - { - return (int16x8_t)__builtin_neon_vqshl_s_nv8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshlq_n_s32 (int32x4_t __a, const int __b) - { - return (int32x4_t)__builtin_neon_vqshl_s_nv4si (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshlq_n_s64 (int64x2_t __a, const int __b) - { - return (int64x2_t)__builtin_neon_vqshl_s_nv2di (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshlq_n_u8 (uint8x16_t __a, const int __b) - { - return (uint8x16_t)__builtin_neon_vqshl_u_nv16qi ((int8x16_t) __a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshlq_n_u16 (uint16x8_t __a, const int __b) - { - return (uint16x8_t)__builtin_neon_vqshl_u_nv8hi ((int16x8_t) __a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshlq_n_u32 (uint32x4_t __a, const int __b) - { - return (uint32x4_t)__builtin_neon_vqshl_u_nv4si ((int32x4_t) __a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshlq_n_u64 (uint64x2_t __a, const int __b) - { - return (uint64x2_t)__builtin_neon_vqshl_u_nv2di ((int64x2_t) __a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshlu_n_s8 (int8x8_t __a, const int __b) - { - return (uint8x8_t)__builtin_neon_vqshlu_nv8qi (__a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshlu_n_s16 (int16x4_t __a, const int __b) - { - return (uint16x4_t)__builtin_neon_vqshlu_nv4hi (__a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshlu_n_s32 (int32x2_t __a, const int __b) - { - return (uint32x2_t)__builtin_neon_vqshlu_nv2si (__a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshlu_n_s64 (int64x1_t __a, const int __b) - { - return (uint64x1_t)__builtin_neon_vqshlu_ndi (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshluq_n_s8 (int8x16_t __a, const int __b) - { - return (uint8x16_t)__builtin_neon_vqshlu_nv16qi (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshluq_n_s16 (int16x8_t __a, const int __b) - { - return (uint16x8_t)__builtin_neon_vqshlu_nv8hi (__a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshluq_n_s32 (int32x4_t __a, const int __b) - { - return (uint32x4_t)__builtin_neon_vqshlu_nv4si (__a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqshluq_n_s64 (int64x2_t __a, const int __b) - { - return (uint64x2_t)__builtin_neon_vqshlu_nv2di (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshll_n_s8 (int8x8_t __a, const int __b) - { - return (int16x8_t)__builtin_neon_vshlls_nv8qi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshll_n_s16 (int16x4_t __a, const int __b) - { - return (int32x4_t)__builtin_neon_vshlls_nv4hi (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshll_n_s32 (int32x2_t __a, const int __b) - { - return (int64x2_t)__builtin_neon_vshlls_nv2si (__a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshll_n_u8 (uint8x8_t __a, const int __b) - { - return (uint16x8_t)__builtin_neon_vshllu_nv8qi ((int8x8_t) __a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshll_n_u16 (uint16x4_t __a, const int __b) - { - return (uint32x4_t)__builtin_neon_vshllu_nv4hi ((int16x4_t) __a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vshll_n_u32 (uint32x2_t __a, const int __b) - { - return (uint64x2_t)__builtin_neon_vshllu_nv2si ((int32x2_t) __a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) - { - return (int8x8_t)__builtin_neon_vsras_nv8qi (__a, __b, __c); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) - { - return (int16x4_t)__builtin_neon_vsras_nv4hi (__a, __b, __c); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) - { - return (int32x2_t)__builtin_neon_vsras_nv2si (__a, __b, __c); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) - { - return (int64x1_t)__builtin_neon_vsras_ndi (__a, __b, __c); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) - { - return (uint8x8_t)__builtin_neon_vsrau_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) - { - return (uint16x4_t)__builtin_neon_vsrau_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) - { - return (uint32x2_t)__builtin_neon_vsrau_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) - { - return (uint64x1_t)__builtin_neon_vsrau_ndi ((int64x1_t) __a, (int64x1_t) __b, __c); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) - { - return (int8x16_t)__builtin_neon_vsras_nv16qi (__a, __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) - { - return (int16x8_t)__builtin_neon_vsras_nv8hi (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) - { - return (int32x4_t)__builtin_neon_vsras_nv4si (__a, __b, __c); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) - { - return (int64x2_t)__builtin_neon_vsras_nv2di (__a, __b, __c); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) - { - return (uint8x16_t)__builtin_neon_vsrau_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) - { - return (uint16x8_t)__builtin_neon_vsrau_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) - { - return (uint32x4_t)__builtin_neon_vsrau_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) - { - return (uint64x2_t)__builtin_neon_vsrau_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsra_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) - { - return (int8x8_t)__builtin_neon_vrsras_nv8qi (__a, __b, __c); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsra_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) - { - return (int16x4_t)__builtin_neon_vrsras_nv4hi (__a, __b, __c); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsra_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) - { - return (int32x2_t)__builtin_neon_vrsras_nv2si (__a, __b, __c); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsra_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) - { - return (int64x1_t)__builtin_neon_vrsras_ndi (__a, __b, __c); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) - { - return (uint8x8_t)__builtin_neon_vrsrau_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) - { - return (uint16x4_t)__builtin_neon_vrsrau_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) - { - return (uint32x2_t)__builtin_neon_vrsrau_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) - { - return (uint64x1_t)__builtin_neon_vrsrau_ndi ((int64x1_t) __a, (int64x1_t) __b, __c); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsraq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) - { - return (int8x16_t)__builtin_neon_vrsras_nv16qi (__a, __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsraq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) - { - return (int16x8_t)__builtin_neon_vrsras_nv8hi (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsraq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) - { - return (int32x4_t)__builtin_neon_vrsras_nv4si (__a, __b, __c); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsraq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) - { - return (int64x2_t)__builtin_neon_vrsras_nv2di (__a, __b, __c); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) - { - return (uint8x16_t)__builtin_neon_vrsrau_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) - { - return (uint16x8_t)__builtin_neon_vrsrau_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) - { - return (uint32x4_t)__builtin_neon_vrsrau_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) - { - return (uint64x2_t)__builtin_neon_vrsrau_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c); -@@ -4565,68 +5278,79 @@ vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsri_n_p64 (poly64x1_t __a, poly64x1_t __b, const int __c) - { - return (poly64x1_t)__builtin_neon_vsri_ndi (__a, __b, __c); - } - - #pragma GCC pop_options --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsri_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) - { - return (int8x8_t)__builtin_neon_vsri_nv8qi (__a, __b, __c); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsri_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) - { - return (int16x4_t)__builtin_neon_vsri_nv4hi (__a, __b, __c); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsri_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) - { - return (int32x2_t)__builtin_neon_vsri_nv2si (__a, __b, __c); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsri_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) - { - return (int64x1_t)__builtin_neon_vsri_ndi (__a, __b, __c); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) - { - return (uint8x8_t)__builtin_neon_vsri_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) - { - return (uint16x4_t)__builtin_neon_vsri_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) - { - return (uint32x2_t)__builtin_neon_vsri_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) - { - return (uint64x1_t)__builtin_neon_vsri_ndi ((int64x1_t) __a, (int64x1_t) __b, __c); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsri_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c) - { - return (poly8x8_t)__builtin_neon_vsri_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsri_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c) - { - return (poly16x4_t)__builtin_neon_vsri_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); -@@ -4634,68 +5358,79 @@ vsri_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsriq_n_p64 (poly64x2_t __a, poly64x2_t __b, const int __c) - { - return (poly64x2_t)__builtin_neon_vsri_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c); - } - - #pragma GCC pop_options --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsriq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) - { - return (int8x16_t)__builtin_neon_vsri_nv16qi (__a, __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsriq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) - { - return (int16x8_t)__builtin_neon_vsri_nv8hi (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsriq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) - { - return (int32x4_t)__builtin_neon_vsri_nv4si (__a, __b, __c); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsriq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) - { - return (int64x2_t)__builtin_neon_vsri_nv2di (__a, __b, __c); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) - { - return (uint8x16_t)__builtin_neon_vsri_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) - { - return (uint16x8_t)__builtin_neon_vsri_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) - { - return (uint32x4_t)__builtin_neon_vsri_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) - { - return (uint64x2_t)__builtin_neon_vsri_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsriq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c) - { - return (poly8x16_t)__builtin_neon_vsri_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsriq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c) - { - return (poly16x8_t)__builtin_neon_vsri_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c); -@@ -4703,68 +5438,79 @@ vsriq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsli_n_p64 (poly64x1_t __a, poly64x1_t __b, const int __c) - { - return (poly64x1_t)__builtin_neon_vsli_ndi (__a, __b, __c); - } - - #pragma GCC pop_options --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsli_n_s8 (int8x8_t __a, int8x8_t __b, const int __c) - { - return (int8x8_t)__builtin_neon_vsli_nv8qi (__a, __b, __c); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsli_n_s16 (int16x4_t __a, int16x4_t __b, const int __c) - { - return (int16x4_t)__builtin_neon_vsli_nv4hi (__a, __b, __c); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsli_n_s32 (int32x2_t __a, int32x2_t __b, const int __c) - { - return (int32x2_t)__builtin_neon_vsli_nv2si (__a, __b, __c); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsli_n_s64 (int64x1_t __a, int64x1_t __b, const int __c) - { - return (int64x1_t)__builtin_neon_vsli_ndi (__a, __b, __c); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) - { - return (uint8x8_t)__builtin_neon_vsli_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) - { - return (uint16x4_t)__builtin_neon_vsli_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) - { - return (uint32x2_t)__builtin_neon_vsli_nv2si ((int32x2_t) __a, (int32x2_t) __b, __c); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) - { - return (uint64x1_t)__builtin_neon_vsli_ndi ((int64x1_t) __a, (int64x1_t) __b, __c); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsli_n_p8 (poly8x8_t __a, poly8x8_t __b, const int __c) - { - return (poly8x8_t)__builtin_neon_vsli_nv8qi ((int8x8_t) __a, (int8x8_t) __b, __c); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsli_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c) - { - return (poly16x4_t)__builtin_neon_vsli_nv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); -@@ -4772,530 +5518,618 @@ vsli_n_p16 (poly16x4_t __a, poly16x4_t __b, const int __c) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsliq_n_p64 (poly64x2_t __a, poly64x2_t __b, const int __c) - { - return (poly64x2_t)__builtin_neon_vsli_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c); - } - - #pragma GCC pop_options --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsliq_n_s8 (int8x16_t __a, int8x16_t __b, const int __c) - { - return (int8x16_t)__builtin_neon_vsli_nv16qi (__a, __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsliq_n_s16 (int16x8_t __a, int16x8_t __b, const int __c) - { - return (int16x8_t)__builtin_neon_vsli_nv8hi (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsliq_n_s32 (int32x4_t __a, int32x4_t __b, const int __c) - { - return (int32x4_t)__builtin_neon_vsli_nv4si (__a, __b, __c); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsliq_n_s64 (int64x2_t __a, int64x2_t __b, const int __c) - { - return (int64x2_t)__builtin_neon_vsli_nv2di (__a, __b, __c); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) - { - return (uint8x16_t)__builtin_neon_vsli_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) - { - return (uint16x8_t)__builtin_neon_vsli_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) - { - return (uint32x4_t)__builtin_neon_vsli_nv4si ((int32x4_t) __a, (int32x4_t) __b, __c); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) - { - return (uint64x2_t)__builtin_neon_vsli_nv2di ((int64x2_t) __a, (int64x2_t) __b, __c); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsliq_n_p8 (poly8x16_t __a, poly8x16_t __b, const int __c) - { - return (poly8x16_t)__builtin_neon_vsli_nv16qi ((int8x16_t) __a, (int8x16_t) __b, __c); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsliq_n_p16 (poly16x8_t __a, poly16x8_t __b, const int __c) - { - return (poly16x8_t)__builtin_neon_vsli_nv8hi ((int16x8_t) __a, (int16x8_t) __b, __c); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabs_s8 (int8x8_t __a) - { - return (int8x8_t)__builtin_neon_vabsv8qi (__a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabs_s16 (int16x4_t __a) - { - return (int16x4_t)__builtin_neon_vabsv4hi (__a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabs_s32 (int32x2_t __a) - { - return (int32x2_t)__builtin_neon_vabsv2si (__a); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabs_f32 (float32x2_t __a) - { - return (float32x2_t)__builtin_neon_vabsv2sf (__a); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabsq_s8 (int8x16_t __a) - { - return (int8x16_t)__builtin_neon_vabsv16qi (__a); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabsq_s16 (int16x8_t __a) - { - return (int16x8_t)__builtin_neon_vabsv8hi (__a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabsq_s32 (int32x4_t __a) - { - return (int32x4_t)__builtin_neon_vabsv4si (__a); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vabsq_f32 (float32x4_t __a) - { - return (float32x4_t)__builtin_neon_vabsv4sf (__a); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqabs_s8 (int8x8_t __a) - { - return (int8x8_t)__builtin_neon_vqabsv8qi (__a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqabs_s16 (int16x4_t __a) - { - return (int16x4_t)__builtin_neon_vqabsv4hi (__a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqabs_s32 (int32x2_t __a) - { - return (int32x2_t)__builtin_neon_vqabsv2si (__a); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqabsq_s8 (int8x16_t __a) - { - return (int8x16_t)__builtin_neon_vqabsv16qi (__a); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqabsq_s16 (int16x8_t __a) - { - return (int16x8_t)__builtin_neon_vqabsv8hi (__a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqabsq_s32 (int32x4_t __a) - { - return (int32x4_t)__builtin_neon_vqabsv4si (__a); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vneg_s8 (int8x8_t __a) - { - return (int8x8_t)__builtin_neon_vnegv8qi (__a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vneg_s16 (int16x4_t __a) - { - return (int16x4_t)__builtin_neon_vnegv4hi (__a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vneg_s32 (int32x2_t __a) - { - return (int32x2_t)__builtin_neon_vnegv2si (__a); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vneg_f32 (float32x2_t __a) - { - return (float32x2_t)__builtin_neon_vnegv2sf (__a); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vnegq_s8 (int8x16_t __a) - { - return (int8x16_t)__builtin_neon_vnegv16qi (__a); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vnegq_s16 (int16x8_t __a) - { - return (int16x8_t)__builtin_neon_vnegv8hi (__a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vnegq_s32 (int32x4_t __a) - { - return (int32x4_t)__builtin_neon_vnegv4si (__a); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vnegq_f32 (float32x4_t __a) - { - return (float32x4_t)__builtin_neon_vnegv4sf (__a); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqneg_s8 (int8x8_t __a) - { - return (int8x8_t)__builtin_neon_vqnegv8qi (__a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqneg_s16 (int16x4_t __a) - { - return (int16x4_t)__builtin_neon_vqnegv4hi (__a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqneg_s32 (int32x2_t __a) - { - return (int32x2_t)__builtin_neon_vqnegv2si (__a); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqnegq_s8 (int8x16_t __a) - { - return (int8x16_t)__builtin_neon_vqnegv16qi (__a); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqnegq_s16 (int16x8_t __a) - { - return (int16x8_t)__builtin_neon_vqnegv8hi (__a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqnegq_s32 (int32x4_t __a) - { - return (int32x4_t)__builtin_neon_vqnegv4si (__a); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmvn_s8 (int8x8_t __a) - { - return (int8x8_t)__builtin_neon_vmvnv8qi (__a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmvn_s16 (int16x4_t __a) - { - return (int16x4_t)__builtin_neon_vmvnv4hi (__a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmvn_s32 (int32x2_t __a) - { - return (int32x2_t)__builtin_neon_vmvnv2si (__a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmvn_u8 (uint8x8_t __a) - { - return (uint8x8_t)__builtin_neon_vmvnv8qi ((int8x8_t) __a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmvn_u16 (uint16x4_t __a) - { - return (uint16x4_t)__builtin_neon_vmvnv4hi ((int16x4_t) __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmvn_u32 (uint32x2_t __a) - { - return (uint32x2_t)__builtin_neon_vmvnv2si ((int32x2_t) __a); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmvn_p8 (poly8x8_t __a) - { - return (poly8x8_t)__builtin_neon_vmvnv8qi ((int8x8_t) __a); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmvnq_s8 (int8x16_t __a) - { - return (int8x16_t)__builtin_neon_vmvnv16qi (__a); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmvnq_s16 (int16x8_t __a) - { - return (int16x8_t)__builtin_neon_vmvnv8hi (__a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmvnq_s32 (int32x4_t __a) - { - return (int32x4_t)__builtin_neon_vmvnv4si (__a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmvnq_u8 (uint8x16_t __a) - { - return (uint8x16_t)__builtin_neon_vmvnv16qi ((int8x16_t) __a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmvnq_u16 (uint16x8_t __a) - { - return (uint16x8_t)__builtin_neon_vmvnv8hi ((int16x8_t) __a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmvnq_u32 (uint32x4_t __a) - { - return (uint32x4_t)__builtin_neon_vmvnv4si ((int32x4_t) __a); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmvnq_p8 (poly8x16_t __a) - { - return (poly8x16_t)__builtin_neon_vmvnv16qi ((int8x16_t) __a); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcls_s8 (int8x8_t __a) - { - return (int8x8_t)__builtin_neon_vclsv8qi (__a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcls_s16 (int16x4_t __a) - { - return (int16x4_t)__builtin_neon_vclsv4hi (__a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcls_s32 (int32x2_t __a) - { - return (int32x2_t)__builtin_neon_vclsv2si (__a); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vclsq_s8 (int8x16_t __a) - { - return (int8x16_t)__builtin_neon_vclsv16qi (__a); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vclsq_s16 (int16x8_t __a) - { - return (int16x8_t)__builtin_neon_vclsv8hi (__a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vclsq_s32 (int32x4_t __a) - { - return (int32x4_t)__builtin_neon_vclsv4si (__a); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vclz_s8 (int8x8_t __a) - { - return (int8x8_t)__builtin_neon_vclzv8qi (__a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vclz_s16 (int16x4_t __a) - { - return (int16x4_t)__builtin_neon_vclzv4hi (__a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vclz_s32 (int32x2_t __a) - { - return (int32x2_t)__builtin_neon_vclzv2si (__a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vclz_u8 (uint8x8_t __a) - { - return (uint8x8_t)__builtin_neon_vclzv8qi ((int8x8_t) __a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vclz_u16 (uint16x4_t __a) - { - return (uint16x4_t)__builtin_neon_vclzv4hi ((int16x4_t) __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vclz_u32 (uint32x2_t __a) - { - return (uint32x2_t)__builtin_neon_vclzv2si ((int32x2_t) __a); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vclzq_s8 (int8x16_t __a) - { - return (int8x16_t)__builtin_neon_vclzv16qi (__a); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vclzq_s16 (int16x8_t __a) - { - return (int16x8_t)__builtin_neon_vclzv8hi (__a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vclzq_s32 (int32x4_t __a) - { - return (int32x4_t)__builtin_neon_vclzv4si (__a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vclzq_u8 (uint8x16_t __a) - { - return (uint8x16_t)__builtin_neon_vclzv16qi ((int8x16_t) __a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vclzq_u16 (uint16x8_t __a) - { - return (uint16x8_t)__builtin_neon_vclzv8hi ((int16x8_t) __a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vclzq_u32 (uint32x4_t __a) - { - return (uint32x4_t)__builtin_neon_vclzv4si ((int32x4_t) __a); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcnt_s8 (int8x8_t __a) - { - return (int8x8_t)__builtin_neon_vcntv8qi (__a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcnt_u8 (uint8x8_t __a) - { - return (uint8x8_t)__builtin_neon_vcntv8qi ((int8x8_t) __a); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcnt_p8 (poly8x8_t __a) - { - return (poly8x8_t)__builtin_neon_vcntv8qi ((int8x8_t) __a); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcntq_s8 (int8x16_t __a) - { - return (int8x16_t)__builtin_neon_vcntv16qi (__a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcntq_u8 (uint8x16_t __a) - { - return (uint8x16_t)__builtin_neon_vcntv16qi ((int8x16_t) __a); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcntq_p8 (poly8x16_t __a) - { - return (poly8x16_t)__builtin_neon_vcntv16qi ((int8x16_t) __a); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrecpe_f32 (float32x2_t __a) - { - return (float32x2_t)__builtin_neon_vrecpev2sf (__a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrecpe_u32 (uint32x2_t __a) - { - return (uint32x2_t)__builtin_neon_vrecpev2si ((int32x2_t) __a); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrecpeq_f32 (float32x4_t __a) - { - return (float32x4_t)__builtin_neon_vrecpev4sf (__a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrecpeq_u32 (uint32x4_t __a) - { - return (uint32x4_t)__builtin_neon_vrecpev4si ((int32x4_t) __a); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsqrte_f32 (float32x2_t __a) - { - return (float32x2_t)__builtin_neon_vrsqrtev2sf (__a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsqrte_u32 (uint32x2_t __a) - { - return (uint32x2_t)__builtin_neon_vrsqrtev2si ((int32x2_t) __a); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsqrteq_f32 (float32x4_t __a) - { - return (float32x4_t)__builtin_neon_vrsqrtev4sf (__a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrsqrteq_u32 (uint32x4_t __a) - { - return (uint32x4_t)__builtin_neon_vrsqrtev4si ((int32x4_t) __a); - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_s8 (int8x8_t __a, const int __b) - { - return (int8_t)__builtin_neon_vget_lanev8qi (__a, __b); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_s16 (int16x4_t __a, const int __b) - { - return (int16_t)__builtin_neon_vget_lanev4hi (__a, __b); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_s32 (int32x2_t __a, const int __b) - { - return (int32_t)__builtin_neon_vget_lanev2si (__a, __b); -@@ -5328,67 +6162,88 @@ vget_lane_s32 (int32x2_t __a, const int __b) - }) - #endif - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_f32 (float32x2_t __a, const int __b) - { - return (float32_t)__builtin_neon_vget_lanev2sf (__a, __b); - } - --__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_u8 (uint8x8_t __a, const int __b) - { - return (uint8_t)__builtin_neon_vget_laneuv8qi ((int8x8_t) __a, __b); - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_u16 (uint16x4_t __a, const int __b) - { - return (uint16_t)__builtin_neon_vget_laneuv4hi ((int16x4_t) __a, __b); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_u32 (uint32x2_t __a, const int __b) - { - return (uint32_t)__builtin_neon_vget_laneuv2si ((int32x2_t) __a, __b); - } - --__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_p8 (poly8x8_t __a, const int __b) - { - return (poly8_t)__builtin_neon_vget_laneuv8qi ((int8x8_t) __a, __b); - } - --__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_p16 (poly16x4_t __a, const int __b) - { - return (poly16_t)__builtin_neon_vget_laneuv4hi ((int16x4_t) __a, __b); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_s64 (int64x1_t __a, const int __b) - { - return (int64_t)__builtin_neon_vget_lanedi (__a, __b); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -+#pragma GCC push_options -+#pragma GCC target ("fpu=crypto-neon-fp-armv8") -+__extension__ extern __inline poly64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vget_lane_p64 (poly64x1_t __a, const int __b) -+{ -+ return (poly64_t)__builtin_neon_vget_lanedi ((int64x1_t) __a, __b); -+} -+ -+#pragma GCC pop_options -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_lane_u64 (uint64x1_t __a, const int __b) - { - return (uint64_t)__builtin_neon_vget_lanedi ((int64x1_t) __a, __b); - } - --__extension__ static __inline int8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_s8 (int8x16_t __a, const int __b) - { - return (int8_t)__builtin_neon_vget_lanev16qi (__a, __b); - } - --__extension__ static __inline int16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_s16 (int16x8_t __a, const int __b) - { - return (int16_t)__builtin_neon_vget_lanev8hi (__a, __b); - } - --__extension__ static __inline int32_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_s32 (int32x4_t __a, const int __b) - { - return (int32_t)__builtin_neon_vget_lanev4si (__a, __b); -@@ -5405,67 +6260,78 @@ vgetq_lane_s32 (int32x4_t __a, const int __b) - }) - #endif - --__extension__ static __inline float32_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_f32 (float32x4_t __a, const int __b) - { - return (float32_t)__builtin_neon_vget_lanev4sf (__a, __b); - } - --__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_u8 (uint8x16_t __a, const int __b) - { - return (uint8_t)__builtin_neon_vget_laneuv16qi ((int8x16_t) __a, __b); - } - --__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_u16 (uint16x8_t __a, const int __b) - { - return (uint16_t)__builtin_neon_vget_laneuv8hi ((int16x8_t) __a, __b); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_u32 (uint32x4_t __a, const int __b) - { - return (uint32_t)__builtin_neon_vget_laneuv4si ((int32x4_t) __a, __b); - } - --__extension__ static __inline poly8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_p8 (poly8x16_t __a, const int __b) - { - return (poly8_t)__builtin_neon_vget_laneuv16qi ((int8x16_t) __a, __b); - } - --__extension__ static __inline poly16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_p16 (poly16x8_t __a, const int __b) - { - return (poly16_t)__builtin_neon_vget_laneuv8hi ((int16x8_t) __a, __b); - } - --__extension__ static __inline int64_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_s64 (int64x2_t __a, const int __b) - { - return (int64_t)__builtin_neon_vget_lanev2di (__a, __b); - } - --__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vgetq_lane_u64 (uint64x2_t __a, const int __b) - { - return (uint64_t)__builtin_neon_vget_lanev2di ((int64x2_t) __a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_s8 (int8_t __a, int8x8_t __b, const int __c) - { - return (int8x8_t)__builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __a, __b, __c); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_s16 (int16_t __a, int16x4_t __b, const int __c) - { - return (int16x4_t)__builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __a, __b, __c); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_s32 (int32_t __a, int32x2_t __b, const int __c) - { - return (int32x2_t)__builtin_neon_vset_lanev2si ((__builtin_neon_si) __a, __b, __c); -@@ -5483,67 +6349,78 @@ vset_lane_s32 (int32_t __a, int32x2_t __b, const int __c) - }) - #endif - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_f32 (float32_t __a, float32x2_t __b, const int __c) - { - return (float32x2_t)__builtin_neon_vset_lanev2sf ((__builtin_neon_sf) __a, __b, __c); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_u8 (uint8_t __a, uint8x8_t __b, const int __c) - { - return (uint8x8_t)__builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __a, (int8x8_t) __b, __c); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_u16 (uint16_t __a, uint16x4_t __b, const int __c) - { - return (uint16x4_t)__builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __a, (int16x4_t) __b, __c); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_u32 (uint32_t __a, uint32x2_t __b, const int __c) - { - return (uint32x2_t)__builtin_neon_vset_lanev2si ((__builtin_neon_si) __a, (int32x2_t) __b, __c); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_p8 (poly8_t __a, poly8x8_t __b, const int __c) - { - return (poly8x8_t)__builtin_neon_vset_lanev8qi ((__builtin_neon_qi) __a, (int8x8_t) __b, __c); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_p16 (poly16_t __a, poly16x4_t __b, const int __c) - { - return (poly16x4_t)__builtin_neon_vset_lanev4hi ((__builtin_neon_hi) __a, (int16x4_t) __b, __c); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_s64 (int64_t __a, int64x1_t __b, const int __c) - { - return (int64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, __b, __c); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vset_lane_u64 (uint64_t __a, uint64x1_t __b, const int __c) - { - return (uint64x1_t)__builtin_neon_vset_lanedi ((__builtin_neon_di) __a, (int64x1_t) __b, __c); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_s8 (int8_t __a, int8x16_t __b, const int __c) - { - return (int8x16_t)__builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __a, __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_s16 (int16_t __a, int16x8_t __b, const int __c) - { - return (int16x8_t)__builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __c) - { - return (int32x4_t)__builtin_neon_vset_lanev4si ((__builtin_neon_si) __a, __b, __c); -@@ -5561,49 +6438,57 @@ vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __c) - }) - #endif - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_f32 (float32_t __a, float32x4_t __b, const int __c) - { - return (float32x4_t)__builtin_neon_vset_lanev4sf ((__builtin_neon_sf) __a, __b, __c); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_u8 (uint8_t __a, uint8x16_t __b, const int __c) - { - return (uint8x16_t)__builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __a, (int8x16_t) __b, __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_u16 (uint16_t __a, uint16x8_t __b, const int __c) - { - return (uint16x8_t)__builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __a, (int16x8_t) __b, __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_u32 (uint32_t __a, uint32x4_t __b, const int __c) - { - return (uint32x4_t)__builtin_neon_vset_lanev4si ((__builtin_neon_si) __a, (int32x4_t) __b, __c); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_p8 (poly8_t __a, poly8x16_t __b, const int __c) - { - return (poly8x16_t)__builtin_neon_vset_lanev16qi ((__builtin_neon_qi) __a, (int8x16_t) __b, __c); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_p16 (poly16_t __a, poly16x8_t __b, const int __c) - { - return (poly16x8_t)__builtin_neon_vset_lanev8hi ((__builtin_neon_hi) __a, (int16x8_t) __b, __c); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_s64 (int64_t __a, int64x2_t __b, const int __c) - { - return (int64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, __b, __c); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsetq_lane_u64 (uint64_t __a, uint64x2_t __b, const int __c) - { - return (uint64x2_t)__builtin_neon_vset_lanev2di ((__builtin_neon_di) __a, (int64x2_t) __b, __c); -@@ -5611,136 +6496,158 @@ vsetq_lane_u64 (uint64_t __a, uint64x2_t __b, const int __c) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_p64 (uint64_t __a) - { - return (poly64x1_t)__builtin_neon_vcreatedi ((__builtin_neon_di) __a); - } - - #pragma GCC pop_options --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_s8 (uint64_t __a) - { - return (int8x8_t)__builtin_neon_vcreatev8qi ((__builtin_neon_di) __a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_s16 (uint64_t __a) - { - return (int16x4_t)__builtin_neon_vcreatev4hi ((__builtin_neon_di) __a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_s32 (uint64_t __a) - { - return (int32x2_t)__builtin_neon_vcreatev2si ((__builtin_neon_di) __a); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_s64 (uint64_t __a) - { - return (int64x1_t)__builtin_neon_vcreatedi ((__builtin_neon_di) __a); - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_f16 (uint64_t __a) - { - return (float16x4_t) __a; - } - #endif - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_f32 (uint64_t __a) - { - return (float32x2_t)__builtin_neon_vcreatev2sf ((__builtin_neon_di) __a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_u8 (uint64_t __a) - { - return (uint8x8_t)__builtin_neon_vcreatev8qi ((__builtin_neon_di) __a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_u16 (uint64_t __a) - { - return (uint16x4_t)__builtin_neon_vcreatev4hi ((__builtin_neon_di) __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_u32 (uint64_t __a) - { - return (uint32x2_t)__builtin_neon_vcreatev2si ((__builtin_neon_di) __a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_u64 (uint64_t __a) - { - return (uint64x1_t)__builtin_neon_vcreatedi ((__builtin_neon_di) __a); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_p8 (uint64_t __a) - { - return (poly8x8_t)__builtin_neon_vcreatev8qi ((__builtin_neon_di) __a); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcreate_p16 (uint64_t __a) - { - return (poly16x4_t)__builtin_neon_vcreatev4hi ((__builtin_neon_di) __a); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_n_s8 (int8_t __a) - { - return (int8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_n_s16 (int16_t __a) - { - return (int16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_n_s32 (int32_t __a) - { - return (int32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_n_f32 (float32_t __a) - { - return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_n_u8 (uint8_t __a) - { - return (uint8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_n_u16 (uint16_t __a) - { - return (uint16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_n_u32 (uint32_t __a) - { - return (uint32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_n_p8 (poly8_t __a) - { - return (poly8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_n_p16 (poly16_t __a) - { - return (poly16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a); -@@ -5748,20 +6655,23 @@ vdup_n_p16 (poly16_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_n_p64 (poly64_t __a) - { - return (poly64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a); - } - - #pragma GCC pop_options --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_n_s64 (int64_t __a) - { - return (int64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_n_u64 (uint64_t __a) - { - return (uint64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a); -@@ -5769,260 +6679,303 @@ vdup_n_u64 (uint64_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_n_p64 (poly64_t __a) - { - return (poly64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a); - } - - #pragma GCC pop_options --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_n_s8 (int8_t __a) - { - return (int8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_n_s16 (int16_t __a) - { - return (int16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_n_s32 (int32_t __a) - { - return (int32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_n_f32 (float32_t __a) - { - return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_n_u8 (uint8_t __a) - { - return (uint8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_n_u16 (uint16_t __a) - { - return (uint16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_n_u32 (uint32_t __a) - { - return (uint32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_n_p8 (poly8_t __a) - { - return (poly8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_n_p16 (poly16_t __a) - { - return (poly16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_n_s64 (int64_t __a) - { - return (int64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_n_u64 (uint64_t __a) - { - return (uint64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmov_n_s8 (int8_t __a) - { - return (int8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmov_n_s16 (int16_t __a) - { - return (int16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmov_n_s32 (int32_t __a) - { - return (int32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmov_n_f32 (float32_t __a) - { - return (float32x2_t)__builtin_neon_vdup_nv2sf ((__builtin_neon_sf) __a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmov_n_u8 (uint8_t __a) - { - return (uint8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmov_n_u16 (uint16_t __a) - { - return (uint16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmov_n_u32 (uint32_t __a) - { - return (uint32x2_t)__builtin_neon_vdup_nv2si ((__builtin_neon_si) __a); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmov_n_p8 (poly8_t __a) - { - return (poly8x8_t)__builtin_neon_vdup_nv8qi ((__builtin_neon_qi) __a); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmov_n_p16 (poly16_t __a) - { - return (poly16x4_t)__builtin_neon_vdup_nv4hi ((__builtin_neon_hi) __a); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmov_n_s64 (int64_t __a) - { - return (int64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmov_n_u64 (uint64_t __a) - { - return (uint64x1_t)__builtin_neon_vdup_ndi ((__builtin_neon_di) __a); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovq_n_s8 (int8_t __a) - { - return (int8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovq_n_s16 (int16_t __a) - { - return (int16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovq_n_s32 (int32_t __a) - { - return (int32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovq_n_f32 (float32_t __a) - { - return (float32x4_t)__builtin_neon_vdup_nv4sf ((__builtin_neon_sf) __a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovq_n_u8 (uint8_t __a) - { - return (uint8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovq_n_u16 (uint16_t __a) - { - return (uint16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovq_n_u32 (uint32_t __a) - { - return (uint32x4_t)__builtin_neon_vdup_nv4si ((__builtin_neon_si) __a); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovq_n_p8 (poly8_t __a) - { - return (poly8x16_t)__builtin_neon_vdup_nv16qi ((__builtin_neon_qi) __a); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovq_n_p16 (poly16_t __a) - { - return (poly16x8_t)__builtin_neon_vdup_nv8hi ((__builtin_neon_hi) __a); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovq_n_s64 (int64_t __a) - { - return (int64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovq_n_u64 (uint64_t __a) - { - return (uint64x2_t)__builtin_neon_vdup_nv2di ((__builtin_neon_di) __a); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_lane_s8 (int8x8_t __a, const int __b) - { - return (int8x8_t)__builtin_neon_vdup_lanev8qi (__a, __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_lane_s16 (int16x4_t __a, const int __b) - { - return (int16x4_t)__builtin_neon_vdup_lanev4hi (__a, __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_lane_s32 (int32x2_t __a, const int __b) - { - return (int32x2_t)__builtin_neon_vdup_lanev2si (__a, __b); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_lane_f32 (float32x2_t __a, const int __b) - { - return (float32x2_t)__builtin_neon_vdup_lanev2sf (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_lane_u8 (uint8x8_t __a, const int __b) - { - return (uint8x8_t)__builtin_neon_vdup_lanev8qi ((int8x8_t) __a, __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_lane_u16 (uint16x4_t __a, const int __b) - { - return (uint16x4_t)__builtin_neon_vdup_lanev4hi ((int16x4_t) __a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_lane_u32 (uint32x2_t __a, const int __b) - { - return (uint32x2_t)__builtin_neon_vdup_lanev2si ((int32x2_t) __a, __b); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_lane_p8 (poly8x8_t __a, const int __b) - { - return (poly8x8_t)__builtin_neon_vdup_lanev8qi ((int8x8_t) __a, __b); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_lane_p16 (poly16x4_t __a, const int __b) - { - return (poly16x4_t)__builtin_neon_vdup_lanev4hi ((int16x4_t) __a, __b); -@@ -6030,74 +6983,86 @@ vdup_lane_p16 (poly16x4_t __a, const int __b) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_lane_p64 (poly64x1_t __a, const int __b) - { - return (poly64x1_t)__builtin_neon_vdup_lanedi (__a, __b); - } - - #pragma GCC pop_options --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_lane_s64 (int64x1_t __a, const int __b) - { - return (int64x1_t)__builtin_neon_vdup_lanedi (__a, __b); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdup_lane_u64 (uint64x1_t __a, const int __b) - { - return (uint64x1_t)__builtin_neon_vdup_lanedi ((int64x1_t) __a, __b); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_lane_s8 (int8x8_t __a, const int __b) - { - return (int8x16_t)__builtin_neon_vdup_lanev16qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_lane_s16 (int16x4_t __a, const int __b) - { - return (int16x8_t)__builtin_neon_vdup_lanev8hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_lane_s32 (int32x2_t __a, const int __b) - { - return (int32x4_t)__builtin_neon_vdup_lanev4si (__a, __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_lane_f32 (float32x2_t __a, const int __b) - { - return (float32x4_t)__builtin_neon_vdup_lanev4sf (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_lane_u8 (uint8x8_t __a, const int __b) - { - return (uint8x16_t)__builtin_neon_vdup_lanev16qi ((int8x8_t) __a, __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_lane_u16 (uint16x4_t __a, const int __b) - { - return (uint16x8_t)__builtin_neon_vdup_lanev8hi ((int16x4_t) __a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_lane_u32 (uint32x2_t __a, const int __b) - { - return (uint32x4_t)__builtin_neon_vdup_lanev4si ((int32x2_t) __a, __b); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_lane_p8 (poly8x8_t __a, const int __b) - { - return (poly8x16_t)__builtin_neon_vdup_lanev16qi ((int8x8_t) __a, __b); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_lane_p16 (poly16x4_t __a, const int __b) - { - return (poly16x8_t)__builtin_neon_vdup_lanev8hi ((int16x4_t) __a, __b); -@@ -6105,20 +7070,23 @@ vdupq_lane_p16 (poly16x4_t __a, const int __b) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_lane_p64 (poly64x1_t __a, const int __b) - { - return (poly64x2_t)__builtin_neon_vdup_lanev2di (__a, __b); - } - - #pragma GCC pop_options --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_lane_s64 (int64x1_t __a, const int __b) - { - return (int64x2_t)__builtin_neon_vdup_lanev2di (__a, __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vdupq_lane_u64 (uint64x1_t __a, const int __b) - { - return (uint64x2_t)__builtin_neon_vdup_lanev2di ((int64x1_t) __a, __b); -@@ -6126,82 +7094,95 @@ vdupq_lane_u64 (uint64x1_t __a, const int __b) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_p64 (poly64x1_t __a, poly64x1_t __b) - { - return (poly64x2_t)__builtin_neon_vcombinedi (__a, __b); - } - - #pragma GCC pop_options --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x16_t)__builtin_neon_vcombinev8qi (__a, __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_s16 (int16x4_t __a, int16x4_t __b) - { - return (int16x8_t)__builtin_neon_vcombinev4hi (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_s32 (int32x2_t __a, int32x2_t __b) - { - return (int32x4_t)__builtin_neon_vcombinev2si (__a, __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_s64 (int64x1_t __a, int64x1_t __b) - { - return (int64x2_t)__builtin_neon_vcombinedi (__a, __b); - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_f16 (float16x4_t __a, float16x4_t __b) - { - return __builtin_neon_vcombinev4hf (__a, __b); - } - #endif - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_f32 (float32x2_t __a, float32x2_t __b) - { - return (float32x4_t)__builtin_neon_vcombinev2sf (__a, __b); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x16_t)__builtin_neon_vcombinev8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_u16 (uint16x4_t __a, uint16x4_t __b) - { - return (uint16x8_t)__builtin_neon_vcombinev4hi ((int16x4_t) __a, (int16x4_t) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_u32 (uint32x2_t __a, uint32x2_t __b) - { - return (uint32x4_t)__builtin_neon_vcombinev2si ((int32x2_t) __a, (int32x2_t) __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_u64 (uint64x1_t __a, uint64x1_t __b) - { - return (uint64x2_t)__builtin_neon_vcombinedi ((int64x1_t) __a, (int64x1_t) __b); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_p8 (poly8x8_t __a, poly8x8_t __b) - { - return (poly8x16_t)__builtin_neon_vcombinev8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcombine_p16 (poly16x4_t __a, poly16x4_t __b) - { - return (poly16x8_t)__builtin_neon_vcombinev4hi ((int16x4_t) __a, (int16x4_t) __b); -@@ -6209,144 +7190,167 @@ vcombine_p16 (poly16x4_t __a, poly16x4_t __b) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_p64 (poly64x2_t __a) - { - return (poly64x1_t)__builtin_neon_vget_highv2di ((int64x2_t) __a); - } - - #pragma GCC pop_options --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_s8 (int8x16_t __a) - { - return (int8x8_t)__builtin_neon_vget_highv16qi (__a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_s16 (int16x8_t __a) - { - return (int16x4_t)__builtin_neon_vget_highv8hi (__a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_s32 (int32x4_t __a) - { - return (int32x2_t)__builtin_neon_vget_highv4si (__a); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_s64 (int64x2_t __a) - { - return (int64x1_t)__builtin_neon_vget_highv2di (__a); - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_f16 (float16x8_t __a) - { - return __builtin_neon_vget_highv8hf (__a); - } - #endif - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_f32 (float32x4_t __a) - { - return (float32x2_t)__builtin_neon_vget_highv4sf (__a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_u8 (uint8x16_t __a) - { - return (uint8x8_t)__builtin_neon_vget_highv16qi ((int8x16_t) __a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_u16 (uint16x8_t __a) - { - return (uint16x4_t)__builtin_neon_vget_highv8hi ((int16x8_t) __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_u32 (uint32x4_t __a) - { - return (uint32x2_t)__builtin_neon_vget_highv4si ((int32x4_t) __a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_u64 (uint64x2_t __a) - { - return (uint64x1_t)__builtin_neon_vget_highv2di ((int64x2_t) __a); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_p8 (poly8x16_t __a) - { - return (poly8x8_t)__builtin_neon_vget_highv16qi ((int8x16_t) __a); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_high_p16 (poly16x8_t __a) - { - return (poly16x4_t)__builtin_neon_vget_highv8hi ((int16x8_t) __a); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_s8 (int8x16_t __a) - { - return (int8x8_t)__builtin_neon_vget_lowv16qi (__a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_s16 (int16x8_t __a) - { - return (int16x4_t)__builtin_neon_vget_lowv8hi (__a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_s32 (int32x4_t __a) - { - return (int32x2_t)__builtin_neon_vget_lowv4si (__a); - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_f16 (float16x8_t __a) - { - return __builtin_neon_vget_lowv8hf (__a); - } - #endif - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_f32 (float32x4_t __a) - { - return (float32x2_t)__builtin_neon_vget_lowv4sf (__a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_u8 (uint8x16_t __a) - { - return (uint8x8_t)__builtin_neon_vget_lowv16qi ((int8x16_t) __a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_u16 (uint16x8_t __a) - { - return (uint16x4_t)__builtin_neon_vget_lowv8hi ((int16x8_t) __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_u32 (uint32x4_t __a) - { - return (uint32x2_t)__builtin_neon_vget_lowv4si ((int32x4_t) __a); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_p8 (poly8x16_t __a) - { - return (poly8x8_t)__builtin_neon_vget_lowv16qi ((int8x16_t) __a); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_p16 (poly16x8_t __a) - { - return (poly16x4_t)__builtin_neon_vget_lowv8hi ((int16x8_t) __a); -@@ -6354,68 +7358,79 @@ vget_low_p16 (poly16x8_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_p64 (poly64x2_t __a) - { - return (poly64x1_t)__builtin_neon_vget_lowv2di ((int64x2_t) __a); - } - - #pragma GCC pop_options --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_s64 (int64x2_t __a) - { - return (int64x1_t)__builtin_neon_vget_lowv2di (__a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vget_low_u64 (uint64x2_t __a) - { - return (uint64x1_t)__builtin_neon_vget_lowv2di ((int64x2_t) __a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcvt_s32_f32 (float32x2_t __a) - { - return (int32x2_t)__builtin_neon_vcvtsv2sf (__a); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcvt_f32_s32 (int32x2_t __a) - { - return (float32x2_t)__builtin_neon_vcvtsv2si (__a); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcvt_f32_u32 (uint32x2_t __a) - { - return (float32x2_t)__builtin_neon_vcvtuv2si ((int32x2_t) __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcvt_u32_f32 (float32x2_t __a) - { - return (uint32x2_t)__builtin_neon_vcvtuv2sf (__a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcvtq_s32_f32 (float32x4_t __a) - { - return (int32x4_t)__builtin_neon_vcvtsv4sf (__a); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcvtq_f32_s32 (int32x4_t __a) - { - return (float32x4_t)__builtin_neon_vcvtsv4si (__a); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcvtq_f32_u32 (uint32x4_t __a) - { - return (float32x4_t)__builtin_neon_vcvtuv4si ((int32x4_t) __a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcvtq_u32_f32 (float32x4_t __a) - { - return (uint32x4_t)__builtin_neon_vcvtuv4sf (__a); -@@ -6424,7 +7439,8 @@ vcvtq_u32_f32 (float32x4_t __a) - #pragma GCC push_options - #pragma GCC target ("fpu=neon-fp16") - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcvt_f16_f32 (float32x4_t __a) - { - return (float16x4_t)__builtin_neon_vcvtv4hfv4sf (__a); -@@ -6432,7 +7448,8 @@ vcvt_f16_f32 (float32x4_t __a) - #endif - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcvt_f32_f16 (float16x4_t __a) - { - return (float32x4_t)__builtin_neon_vcvtv4sfv4hf (__a); -@@ -6440,1059 +7457,1232 @@ vcvt_f32_f16 (float16x4_t __a) - #endif - #pragma GCC pop_options - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcvt_n_s32_f32 (float32x2_t __a, const int __b) - { - return (int32x2_t)__builtin_neon_vcvts_nv2sf (__a, __b); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcvt_n_f32_s32 (int32x2_t __a, const int __b) - { - return (float32x2_t)__builtin_neon_vcvts_nv2si (__a, __b); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcvt_n_f32_u32 (uint32x2_t __a, const int __b) - { - return (float32x2_t)__builtin_neon_vcvtu_nv2si ((int32x2_t) __a, __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcvt_n_u32_f32 (float32x2_t __a, const int __b) - { - return (uint32x2_t)__builtin_neon_vcvtu_nv2sf (__a, __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcvtq_n_s32_f32 (float32x4_t __a, const int __b) - { - return (int32x4_t)__builtin_neon_vcvts_nv4sf (__a, __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcvtq_n_f32_s32 (int32x4_t __a, const int __b) - { - return (float32x4_t)__builtin_neon_vcvts_nv4si (__a, __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcvtq_n_f32_u32 (uint32x4_t __a, const int __b) - { - return (float32x4_t)__builtin_neon_vcvtu_nv4si ((int32x4_t) __a, __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vcvtq_n_u32_f32 (float32x4_t __a, const int __b) - { - return (uint32x4_t)__builtin_neon_vcvtu_nv4sf (__a, __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovn_s16 (int16x8_t __a) - { - return (int8x8_t)__builtin_neon_vmovnv8hi (__a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovn_s32 (int32x4_t __a) - { - return (int16x4_t)__builtin_neon_vmovnv4si (__a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovn_s64 (int64x2_t __a) - { - return (int32x2_t)__builtin_neon_vmovnv2di (__a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovn_u16 (uint16x8_t __a) - { - return (uint8x8_t)__builtin_neon_vmovnv8hi ((int16x8_t) __a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovn_u32 (uint32x4_t __a) - { - return (uint16x4_t)__builtin_neon_vmovnv4si ((int32x4_t) __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovn_u64 (uint64x2_t __a) - { - return (uint32x2_t)__builtin_neon_vmovnv2di ((int64x2_t) __a); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqmovn_s16 (int16x8_t __a) - { - return (int8x8_t)__builtin_neon_vqmovnsv8hi (__a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqmovn_s32 (int32x4_t __a) - { - return (int16x4_t)__builtin_neon_vqmovnsv4si (__a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqmovn_s64 (int64x2_t __a) - { - return (int32x2_t)__builtin_neon_vqmovnsv2di (__a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqmovn_u16 (uint16x8_t __a) - { - return (uint8x8_t)__builtin_neon_vqmovnuv8hi ((int16x8_t) __a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqmovn_u32 (uint32x4_t __a) - { - return (uint16x4_t)__builtin_neon_vqmovnuv4si ((int32x4_t) __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqmovn_u64 (uint64x2_t __a) - { - return (uint32x2_t)__builtin_neon_vqmovnuv2di ((int64x2_t) __a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqmovun_s16 (int16x8_t __a) - { - return (uint8x8_t)__builtin_neon_vqmovunv8hi (__a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqmovun_s32 (int32x4_t __a) - { - return (uint16x4_t)__builtin_neon_vqmovunv4si (__a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqmovun_s64 (int64x2_t __a) - { - return (uint32x2_t)__builtin_neon_vqmovunv2di (__a); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovl_s8 (int8x8_t __a) - { - return (int16x8_t)__builtin_neon_vmovlsv8qi (__a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovl_s16 (int16x4_t __a) - { - return (int32x4_t)__builtin_neon_vmovlsv4hi (__a); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovl_s32 (int32x2_t __a) - { - return (int64x2_t)__builtin_neon_vmovlsv2si (__a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovl_u8 (uint8x8_t __a) - { - return (uint16x8_t)__builtin_neon_vmovluv8qi ((int8x8_t) __a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovl_u16 (uint16x4_t __a) - { - return (uint32x4_t)__builtin_neon_vmovluv4hi ((int16x4_t) __a); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmovl_u32 (uint32x2_t __a) - { - return (uint64x2_t)__builtin_neon_vmovluv2si ((int32x2_t) __a); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbl1_s8 (int8x8_t __a, int8x8_t __b) - { - return (int8x8_t)__builtin_neon_vtbl1v8qi (__a, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbl1_u8 (uint8x8_t __a, uint8x8_t __b) - { - return (uint8x8_t)__builtin_neon_vtbl1v8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbl1_p8 (poly8x8_t __a, uint8x8_t __b) - { - return (poly8x8_t)__builtin_neon_vtbl1v8qi ((int8x8_t) __a, (int8x8_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbl2_s8 (int8x8x2_t __a, int8x8_t __b) - { - union { int8x8x2_t __i; __builtin_neon_ti __o; } __au = { __a }; - return (int8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbl2_u8 (uint8x8x2_t __a, uint8x8_t __b) - { - union { uint8x8x2_t __i; __builtin_neon_ti __o; } __au = { __a }; - return (uint8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, (int8x8_t) __b); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbl2_p8 (poly8x8x2_t __a, uint8x8_t __b) - { - union { poly8x8x2_t __i; __builtin_neon_ti __o; } __au = { __a }; - return (poly8x8_t)__builtin_neon_vtbl2v8qi (__au.__o, (int8x8_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbl3_s8 (int8x8x3_t __a, int8x8_t __b) - { - union { int8x8x3_t __i; __builtin_neon_ei __o; } __au = { __a }; - return (int8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbl3_u8 (uint8x8x3_t __a, uint8x8_t __b) - { - union { uint8x8x3_t __i; __builtin_neon_ei __o; } __au = { __a }; - return (uint8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, (int8x8_t) __b); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbl3_p8 (poly8x8x3_t __a, uint8x8_t __b) - { - union { poly8x8x3_t __i; __builtin_neon_ei __o; } __au = { __a }; - return (poly8x8_t)__builtin_neon_vtbl3v8qi (__au.__o, (int8x8_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbl4_s8 (int8x8x4_t __a, int8x8_t __b) - { - union { int8x8x4_t __i; __builtin_neon_oi __o; } __au = { __a }; - return (int8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, __b); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbl4_u8 (uint8x8x4_t __a, uint8x8_t __b) - { - union { uint8x8x4_t __i; __builtin_neon_oi __o; } __au = { __a }; - return (uint8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, (int8x8_t) __b); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbl4_p8 (poly8x8x4_t __a, uint8x8_t __b) - { - union { poly8x8x4_t __i; __builtin_neon_oi __o; } __au = { __a }; - return (poly8x8_t)__builtin_neon_vtbl4v8qi (__au.__o, (int8x8_t) __b); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbx1_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c) - { - return (int8x8_t)__builtin_neon_vtbx1v8qi (__a, __b, __c); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbx1_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) - { - return (uint8x8_t)__builtin_neon_vtbx1v8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbx1_p8 (poly8x8_t __a, poly8x8_t __b, uint8x8_t __c) - { - return (poly8x8_t)__builtin_neon_vtbx1v8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbx2_s8 (int8x8_t __a, int8x8x2_t __b, int8x8_t __c) - { - union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - return (int8x8_t)__builtin_neon_vtbx2v8qi (__a, __bu.__o, __c); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbx2_u8 (uint8x8_t __a, uint8x8x2_t __b, uint8x8_t __c) - { - union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - return (uint8x8_t)__builtin_neon_vtbx2v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbx2_p8 (poly8x8_t __a, poly8x8x2_t __b, uint8x8_t __c) - { - union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - return (poly8x8_t)__builtin_neon_vtbx2v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbx3_s8 (int8x8_t __a, int8x8x3_t __b, int8x8_t __c) - { - union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - return (int8x8_t)__builtin_neon_vtbx3v8qi (__a, __bu.__o, __c); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbx3_u8 (uint8x8_t __a, uint8x8x3_t __b, uint8x8_t __c) - { - union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - return (uint8x8_t)__builtin_neon_vtbx3v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbx3_p8 (poly8x8_t __a, poly8x8x3_t __b, uint8x8_t __c) - { - union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - return (poly8x8_t)__builtin_neon_vtbx3v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbx4_s8 (int8x8_t __a, int8x8x4_t __b, int8x8_t __c) - { - union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - return (int8x8_t)__builtin_neon_vtbx4v8qi (__a, __bu.__o, __c); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbx4_u8 (uint8x8_t __a, uint8x8x4_t __b, uint8x8_t __c) - { - union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - return (uint8x8_t)__builtin_neon_vtbx4v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtbx4_p8 (poly8x8_t __a, poly8x8x4_t __b, uint8x8_t __c) - { - union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - return (poly8x8_t)__builtin_neon_vtbx4v8qi ((int8x8_t) __a, __bu.__o, (int8x8_t) __c); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) - { - return (int16x4_t)__builtin_neon_vmul_lanev4hi (__a, __b, __c); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) - { - return (int32x2_t)__builtin_neon_vmul_lanev2si (__a, __b, __c); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_lane_f32 (float32x2_t __a, float32x2_t __b, const int __c) - { - return (float32x2_t)__builtin_neon_vmul_lanev2sf (__a, __b, __c); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) - { - return (uint16x4_t)__builtin_neon_vmul_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, __c); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) - { - return (uint32x2_t)__builtin_neon_vmul_lanev2si ((int32x2_t) __a, (int32x2_t) __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c) - { - return (int16x8_t)__builtin_neon_vmul_lanev8hi (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) - { - return (int32x4_t)__builtin_neon_vmul_lanev4si (__a, __b, __c); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_lane_f32 (float32x4_t __a, float32x2_t __b, const int __c) - { - return (float32x4_t)__builtin_neon_vmul_lanev4sf (__a, __b, __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_lane_u16 (uint16x8_t __a, uint16x4_t __b, const int __c) - { - return (uint16x8_t)__builtin_neon_vmul_lanev8hi ((int16x8_t) __a, (int16x4_t) __b, __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_lane_u32 (uint32x4_t __a, uint32x2_t __b, const int __c) - { - return (uint32x4_t)__builtin_neon_vmul_lanev4si ((int32x4_t) __a, (int32x2_t) __b, __c); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmla_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) - { - return (int16x4_t)__builtin_neon_vmla_lanev4hi (__a, __b, __c, __d); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmla_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) - { - return (int32x2_t)__builtin_neon_vmla_lanev2si (__a, __b, __c, __d); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmla_lane_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c, const int __d) - { - return (float32x2_t)__builtin_neon_vmla_lanev2sf (__a, __b, __c, __d); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmla_lane_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d) - { - return (uint16x4_t)__builtin_neon_vmla_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmla_lane_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d) - { - return (uint32x2_t)__builtin_neon_vmla_lanev2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlaq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d) - { - return (int16x8_t)__builtin_neon_vmla_lanev8hi (__a, __b, __c, __d); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlaq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d) - { - return (int32x4_t)__builtin_neon_vmla_lanev4si (__a, __b, __c, __d); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlaq_lane_f32 (float32x4_t __a, float32x4_t __b, float32x2_t __c, const int __d) - { - return (float32x4_t)__builtin_neon_vmla_lanev4sf (__a, __b, __c, __d); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlaq_lane_u16 (uint16x8_t __a, uint16x8_t __b, uint16x4_t __c, const int __d) - { - return (uint16x8_t)__builtin_neon_vmla_lanev8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x4_t) __c, __d); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlaq_lane_u32 (uint32x4_t __a, uint32x4_t __b, uint32x2_t __c, const int __d) - { - return (uint32x4_t)__builtin_neon_vmla_lanev4si ((int32x4_t) __a, (int32x4_t) __b, (int32x2_t) __c, __d); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) - { - return (int32x4_t)__builtin_neon_vmlals_lanev4hi (__a, __b, __c, __d); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) - { - return (int64x2_t)__builtin_neon_vmlals_lanev2si (__a, __b, __c, __d); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlal_lane_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d) - { - return (uint32x4_t)__builtin_neon_vmlalu_lanev4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlal_lane_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d) - { - return (uint64x2_t)__builtin_neon_vmlalu_lanev2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmlal_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) - { - return (int32x4_t)__builtin_neon_vqdmlal_lanev4hi (__a, __b, __c, __d); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmlal_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) - { - return (int64x2_t)__builtin_neon_vqdmlal_lanev2si (__a, __b, __c, __d); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmls_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) - { - return (int16x4_t)__builtin_neon_vmls_lanev4hi (__a, __b, __c, __d); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmls_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) - { - return (int32x2_t)__builtin_neon_vmls_lanev2si (__a, __b, __c, __d); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmls_lane_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c, const int __d) - { - return (float32x2_t)__builtin_neon_vmls_lanev2sf (__a, __b, __c, __d); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmls_lane_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d) - { - return (uint16x4_t)__builtin_neon_vmls_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmls_lane_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d) - { - return (uint32x2_t)__builtin_neon_vmls_lanev2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d) - { - return (int16x8_t)__builtin_neon_vmls_lanev8hi (__a, __b, __c, __d); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d) - { - return (int32x4_t)__builtin_neon_vmls_lanev4si (__a, __b, __c, __d); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsq_lane_f32 (float32x4_t __a, float32x4_t __b, float32x2_t __c, const int __d) - { - return (float32x4_t)__builtin_neon_vmls_lanev4sf (__a, __b, __c, __d); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsq_lane_u16 (uint16x8_t __a, uint16x8_t __b, uint16x4_t __c, const int __d) - { - return (uint16x8_t)__builtin_neon_vmls_lanev8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x4_t) __c, __d); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsq_lane_u32 (uint32x4_t __a, uint32x4_t __b, uint32x2_t __c, const int __d) - { - return (uint32x4_t)__builtin_neon_vmls_lanev4si ((int32x4_t) __a, (int32x4_t) __b, (int32x2_t) __c, __d); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) - { - return (int32x4_t)__builtin_neon_vmlsls_lanev4hi (__a, __b, __c, __d); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) - { - return (int64x2_t)__builtin_neon_vmlsls_lanev2si (__a, __b, __c, __d); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsl_lane_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c, const int __d) - { - return (uint32x4_t)__builtin_neon_vmlslu_lanev4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c, __d); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsl_lane_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c, const int __d) - { - return (uint64x2_t)__builtin_neon_vmlslu_lanev2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c, __d); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmlsl_lane_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) - { - return (int32x4_t)__builtin_neon_vqdmlsl_lanev4hi (__a, __b, __c, __d); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmlsl_lane_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) - { - return (int64x2_t)__builtin_neon_vqdmlsl_lanev2si (__a, __b, __c, __d); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) - { - return (int32x4_t)__builtin_neon_vmulls_lanev4hi (__a, __b, __c); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) - { - return (int64x2_t)__builtin_neon_vmulls_lanev2si (__a, __b, __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmull_lane_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) - { - return (uint32x4_t)__builtin_neon_vmullu_lanev4hi ((int16x4_t) __a, (int16x4_t) __b, __c); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmull_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) - { - return (uint64x2_t)__builtin_neon_vmullu_lanev2si ((int32x2_t) __a, (int32x2_t) __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmull_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) - { - return (int32x4_t)__builtin_neon_vqdmull_lanev4hi (__a, __b, __c); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmull_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) - { - return (int64x2_t)__builtin_neon_vqdmull_lanev2si (__a, __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c) - { - return (int16x8_t)__builtin_neon_vqdmulh_lanev8hi (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) - { - return (int32x4_t)__builtin_neon_vqdmulh_lanev4si (__a, __b, __c); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) - { - return (int16x4_t)__builtin_neon_vqdmulh_lanev4hi (__a, __b, __c); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) - { - return (int32x2_t)__builtin_neon_vqdmulh_lanev2si (__a, __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c) - { - return (int16x8_t)__builtin_neon_vqrdmulh_lanev8hi (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) - { - return (int32x4_t)__builtin_neon_vqrdmulh_lanev4si (__a, __b, __c); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) - { - return (int16x4_t)__builtin_neon_vqrdmulh_lanev4hi (__a, __b, __c); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) - { - return (int32x2_t)__builtin_neon_vqrdmulh_lanev2si (__a, __b, __c); - } - - #ifdef __ARM_FEATURE_QRDMX --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmlahq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d) - { - return (int16x8_t)__builtin_neon_vqrdmlah_lanev8hi (__a, __b, __c, __d); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmlahq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d) - { - return (int32x4_t)__builtin_neon_vqrdmlah_lanev4si (__a, __b, __c, __d); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmlah_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) - { - return (int16x4_t)__builtin_neon_vqrdmlah_lanev4hi (__a, __b, __c, __d); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmlah_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) - { - return (int32x2_t)__builtin_neon_vqrdmlah_lanev2si (__a, __b, __c, __d); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmlshq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d) - { - return (int16x8_t)__builtin_neon_vqrdmlsh_lanev8hi (__a, __b, __c, __d); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmlshq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d) - { - return (int32x4_t)__builtin_neon_vqrdmlsh_lanev4si (__a, __b, __c, __d); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmlsh_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) - { - return (int16x4_t)__builtin_neon_vqrdmlsh_lanev4hi (__a, __b, __c, __d); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmlsh_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) - { - return (int32x2_t)__builtin_neon_vqrdmlsh_lanev2si (__a, __b, __c, __d); - } - #endif - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_n_s16 (int16x4_t __a, int16_t __b) - { - return (int16x4_t)__builtin_neon_vmul_nv4hi (__a, (__builtin_neon_hi) __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_n_s32 (int32x2_t __a, int32_t __b) - { - return (int32x2_t)__builtin_neon_vmul_nv2si (__a, (__builtin_neon_si) __b); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_n_f32 (float32x2_t __a, float32_t __b) - { - return (float32x2_t)__builtin_neon_vmul_nv2sf (__a, (__builtin_neon_sf) __b); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_n_u16 (uint16x4_t __a, uint16_t __b) - { - return (uint16x4_t)__builtin_neon_vmul_nv4hi ((int16x4_t) __a, (__builtin_neon_hi) __b); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmul_n_u32 (uint32x2_t __a, uint32_t __b) - { - return (uint32x2_t)__builtin_neon_vmul_nv2si ((int32x2_t) __a, (__builtin_neon_si) __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_n_s16 (int16x8_t __a, int16_t __b) - { - return (int16x8_t)__builtin_neon_vmul_nv8hi (__a, (__builtin_neon_hi) __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_n_s32 (int32x4_t __a, int32_t __b) - { - return (int32x4_t)__builtin_neon_vmul_nv4si (__a, (__builtin_neon_si) __b); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_n_f32 (float32x4_t __a, float32_t __b) - { - return (float32x4_t)__builtin_neon_vmul_nv4sf (__a, (__builtin_neon_sf) __b); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_n_u16 (uint16x8_t __a, uint16_t __b) - { - return (uint16x8_t)__builtin_neon_vmul_nv8hi ((int16x8_t) __a, (__builtin_neon_hi) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmulq_n_u32 (uint32x4_t __a, uint32_t __b) - { - return (uint32x4_t)__builtin_neon_vmul_nv4si ((int32x4_t) __a, (__builtin_neon_si) __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmull_n_s16 (int16x4_t __a, int16_t __b) - { - return (int32x4_t)__builtin_neon_vmulls_nv4hi (__a, (__builtin_neon_hi) __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmull_n_s32 (int32x2_t __a, int32_t __b) - { - return (int64x2_t)__builtin_neon_vmulls_nv2si (__a, (__builtin_neon_si) __b); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmull_n_u16 (uint16x4_t __a, uint16_t __b) - { - return (uint32x4_t)__builtin_neon_vmullu_nv4hi ((int16x4_t) __a, (__builtin_neon_hi) __b); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmull_n_u32 (uint32x2_t __a, uint32_t __b) - { - return (uint64x2_t)__builtin_neon_vmullu_nv2si ((int32x2_t) __a, (__builtin_neon_si) __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmull_n_s16 (int16x4_t __a, int16_t __b) - { - return (int32x4_t)__builtin_neon_vqdmull_nv4hi (__a, (__builtin_neon_hi) __b); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmull_n_s32 (int32x2_t __a, int32_t __b) - { - return (int64x2_t)__builtin_neon_vqdmull_nv2si (__a, (__builtin_neon_si) __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmulhq_n_s16 (int16x8_t __a, int16_t __b) - { - return (int16x8_t)__builtin_neon_vqdmulh_nv8hi (__a, (__builtin_neon_hi) __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmulhq_n_s32 (int32x4_t __a, int32_t __b) - { - return (int32x4_t)__builtin_neon_vqdmulh_nv4si (__a, (__builtin_neon_si) __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmulh_n_s16 (int16x4_t __a, int16_t __b) - { - return (int16x4_t)__builtin_neon_vqdmulh_nv4hi (__a, (__builtin_neon_hi) __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmulh_n_s32 (int32x2_t __a, int32_t __b) - { - return (int32x2_t)__builtin_neon_vqdmulh_nv2si (__a, (__builtin_neon_si) __b); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmulhq_n_s16 (int16x8_t __a, int16_t __b) - { - return (int16x8_t)__builtin_neon_vqrdmulh_nv8hi (__a, (__builtin_neon_hi) __b); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmulhq_n_s32 (int32x4_t __a, int32_t __b) - { - return (int32x4_t)__builtin_neon_vqrdmulh_nv4si (__a, (__builtin_neon_si) __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmulh_n_s16 (int16x4_t __a, int16_t __b) - { - return (int16x4_t)__builtin_neon_vqrdmulh_nv4hi (__a, (__builtin_neon_hi) __b); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqrdmulh_n_s32 (int32x2_t __a, int32_t __b) - { - return (int32x2_t)__builtin_neon_vqrdmulh_nv2si (__a, (__builtin_neon_si) __b); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmla_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c) - { - return (int16x4_t)__builtin_neon_vmla_nv4hi (__a, __b, (__builtin_neon_hi) __c); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmla_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c) - { - return (int32x2_t)__builtin_neon_vmla_nv2si (__a, __b, (__builtin_neon_si) __c); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmla_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) - { - return (float32x2_t)__builtin_neon_vmla_nv2sf (__a, __b, (__builtin_neon_sf) __c); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmla_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c) - { - return (uint16x4_t)__builtin_neon_vmla_nv4hi ((int16x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmla_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c) - { - return (uint32x2_t)__builtin_neon_vmla_nv2si ((int32x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlaq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c) - { - return (int16x8_t)__builtin_neon_vmla_nv8hi (__a, __b, (__builtin_neon_hi) __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlaq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c) - { - return (int32x4_t)__builtin_neon_vmla_nv4si (__a, __b, (__builtin_neon_si) __c); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlaq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) - { - return (float32x4_t)__builtin_neon_vmla_nv4sf (__a, __b, (__builtin_neon_sf) __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlaq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c) - { - return (uint16x8_t)__builtin_neon_vmla_nv8hi ((int16x8_t) __a, (int16x8_t) __b, (__builtin_neon_hi) __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlaq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c) - { - return (uint32x4_t)__builtin_neon_vmla_nv4si ((int32x4_t) __a, (int32x4_t) __b, (__builtin_neon_si) __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) - { - return (int32x4_t)__builtin_neon_vmlals_nv4hi (__a, __b, (__builtin_neon_hi) __c); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) - { - return (int64x2_t)__builtin_neon_vmlals_nv2si (__a, __b, (__builtin_neon_si) __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlal_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c) - { - return (uint32x4_t)__builtin_neon_vmlalu_nv4hi ((int32x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlal_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c) - { - return (uint64x2_t)__builtin_neon_vmlalu_nv2si ((int64x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmlal_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) - { - return (int32x4_t)__builtin_neon_vqdmlal_nv4hi (__a, __b, (__builtin_neon_hi) __c); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmlal_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) - { - return (int64x2_t)__builtin_neon_vqdmlal_nv2si (__a, __b, (__builtin_neon_si) __c); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmls_n_s16 (int16x4_t __a, int16x4_t __b, int16_t __c) - { - return (int16x4_t)__builtin_neon_vmls_nv4hi (__a, __b, (__builtin_neon_hi) __c); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmls_n_s32 (int32x2_t __a, int32x2_t __b, int32_t __c) - { - return (int32x2_t)__builtin_neon_vmls_nv2si (__a, __b, (__builtin_neon_si) __c); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmls_n_f32 (float32x2_t __a, float32x2_t __b, float32_t __c) - { - return (float32x2_t)__builtin_neon_vmls_nv2sf (__a, __b, (__builtin_neon_sf) __c); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmls_n_u16 (uint16x4_t __a, uint16x4_t __b, uint16_t __c) - { - return (uint16x4_t)__builtin_neon_vmls_nv4hi ((int16x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmls_n_u32 (uint32x2_t __a, uint32x2_t __b, uint32_t __c) - { - return (uint32x2_t)__builtin_neon_vmls_nv2si ((int32x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsq_n_s16 (int16x8_t __a, int16x8_t __b, int16_t __c) - { - return (int16x8_t)__builtin_neon_vmls_nv8hi (__a, __b, (__builtin_neon_hi) __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsq_n_s32 (int32x4_t __a, int32x4_t __b, int32_t __c) - { - return (int32x4_t)__builtin_neon_vmls_nv4si (__a, __b, (__builtin_neon_si) __c); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsq_n_f32 (float32x4_t __a, float32x4_t __b, float32_t __c) - { - return (float32x4_t)__builtin_neon_vmls_nv4sf (__a, __b, (__builtin_neon_sf) __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsq_n_u16 (uint16x8_t __a, uint16x8_t __b, uint16_t __c) - { - return (uint16x8_t)__builtin_neon_vmls_nv8hi ((int16x8_t) __a, (int16x8_t) __b, (__builtin_neon_hi) __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsq_n_u32 (uint32x4_t __a, uint32x4_t __b, uint32_t __c) - { - return (uint32x4_t)__builtin_neon_vmls_nv4si ((int32x4_t) __a, (int32x4_t) __b, (__builtin_neon_si) __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) - { - return (int32x4_t)__builtin_neon_vmlsls_nv4hi (__a, __b, (__builtin_neon_hi) __c); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) - { - return (int64x2_t)__builtin_neon_vmlsls_nv2si (__a, __b, (__builtin_neon_si) __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsl_n_u16 (uint32x4_t __a, uint16x4_t __b, uint16_t __c) - { - return (uint32x4_t)__builtin_neon_vmlslu_nv4hi ((int32x4_t) __a, (int16x4_t) __b, (__builtin_neon_hi) __c); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmlsl_n_u32 (uint64x2_t __a, uint32x2_t __b, uint32_t __c) - { - return (uint64x2_t)__builtin_neon_vmlslu_nv2si ((int64x2_t) __a, (int32x2_t) __b, (__builtin_neon_si) __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmlsl_n_s16 (int32x4_t __a, int16x4_t __b, int16_t __c) - { - return (int32x4_t)__builtin_neon_vqdmlsl_nv4hi (__a, __b, (__builtin_neon_hi) __c); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) - { - return (int64x2_t)__builtin_neon_vqdmlsl_nv2si (__a, __b, (__builtin_neon_si) __c); -@@ -7500,74 +8690,86 @@ vqdmlsl_n_s32 (int64x2_t __a, int32x2_t __b, int32_t __c) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vext_p64 (poly64x1_t __a, poly64x1_t __b, const int __c) - { - return (poly64x1_t)__builtin_neon_vextdi (__a, __b, __c); - } - - #pragma GCC pop_options --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vext_s8 (int8x8_t __a, int8x8_t __b, const int __c) - { - return (int8x8_t)__builtin_neon_vextv8qi (__a, __b, __c); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vext_s16 (int16x4_t __a, int16x4_t __b, const int __c) - { - return (int16x4_t)__builtin_neon_vextv4hi (__a, __b, __c); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vext_s32 (int32x2_t __a, int32x2_t __b, const int __c) - { - return (int32x2_t)__builtin_neon_vextv2si (__a, __b, __c); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vext_s64 (int64x1_t __a, int64x1_t __b, const int __c) - { - return (int64x1_t)__builtin_neon_vextdi (__a, __b, __c); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vext_f32 (float32x2_t __a, float32x2_t __b, const int __c) - { - return (float32x2_t)__builtin_neon_vextv2sf (__a, __b, __c); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vext_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) - { - return (uint8x8_t)__builtin_neon_vextv8qi ((int8x8_t) __a, (int8x8_t) __b, __c); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vext_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) - { - return (uint16x4_t)__builtin_neon_vextv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vext_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) - { - return (uint32x2_t)__builtin_neon_vextv2si ((int32x2_t) __a, (int32x2_t) __b, __c); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vext_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) - { - return (uint64x1_t)__builtin_neon_vextdi ((int64x1_t) __a, (int64x1_t) __b, __c); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vext_p8 (poly8x8_t __a, poly8x8_t __b, const int __c) - { - return (poly8x8_t)__builtin_neon_vextv8qi ((int8x8_t) __a, (int8x8_t) __b, __c); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vext_p16 (poly16x4_t __a, poly16x4_t __b, const int __c) - { - return (poly16x4_t)__builtin_neon_vextv4hi ((int16x4_t) __a, (int16x4_t) __b, __c); -@@ -7575,290 +8777,338 @@ vext_p16 (poly16x4_t __a, poly16x4_t __b, const int __c) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vextq_p64 (poly64x2_t __a, poly64x2_t __b, const int __c) - { - return (poly64x2_t)__builtin_neon_vextv2di ((int64x2_t) __a, (int64x2_t) __b, __c); - } - - #pragma GCC pop_options --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vextq_s8 (int8x16_t __a, int8x16_t __b, const int __c) - { - return (int8x16_t)__builtin_neon_vextv16qi (__a, __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vextq_s16 (int16x8_t __a, int16x8_t __b, const int __c) - { - return (int16x8_t)__builtin_neon_vextv8hi (__a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vextq_s32 (int32x4_t __a, int32x4_t __b, const int __c) - { - return (int32x4_t)__builtin_neon_vextv4si (__a, __b, __c); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vextq_s64 (int64x2_t __a, int64x2_t __b, const int __c) - { - return (int64x2_t)__builtin_neon_vextv2di (__a, __b, __c); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vextq_f32 (float32x4_t __a, float32x4_t __b, const int __c) - { - return (float32x4_t)__builtin_neon_vextv4sf (__a, __b, __c); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vextq_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) - { - return (uint8x16_t)__builtin_neon_vextv16qi ((int8x16_t) __a, (int8x16_t) __b, __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vextq_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) - { - return (uint16x8_t)__builtin_neon_vextv8hi ((int16x8_t) __a, (int16x8_t) __b, __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vextq_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) - { - return (uint32x4_t)__builtin_neon_vextv4si ((int32x4_t) __a, (int32x4_t) __b, __c); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vextq_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) - { - return (uint64x2_t)__builtin_neon_vextv2di ((int64x2_t) __a, (int64x2_t) __b, __c); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vextq_p8 (poly8x16_t __a, poly8x16_t __b, const int __c) - { - return (poly8x16_t)__builtin_neon_vextv16qi ((int8x16_t) __a, (int8x16_t) __b, __c); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vextq_p16 (poly16x8_t __a, poly16x8_t __b, const int __c) - { - return (poly16x8_t)__builtin_neon_vextv8hi ((int16x8_t) __a, (int16x8_t) __b, __c); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev64_s8 (int8x8_t __a) - { - return (int8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev64_s16 (int16x4_t __a) - { - return (int16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 }); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev64_s32 (int32x2_t __a) - { - return (int32x2_t) __builtin_shuffle (__a, (uint32x2_t) { 1, 0 }); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev64_f32 (float32x2_t __a) - { - return (float32x2_t) __builtin_shuffle (__a, (uint32x2_t) { 1, 0 }); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev64_u8 (uint8x8_t __a) - { - return (uint8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev64_u16 (uint16x4_t __a) - { - return (uint16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 }); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev64_u32 (uint32x2_t __a) - { - return (uint32x2_t) __builtin_shuffle (__a, (uint32x2_t) { 1, 0 }); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev64_p8 (poly8x8_t __a) - { - return (poly8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev64_p16 (poly16x4_t __a) - { - return (poly16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 3, 2, 1, 0 }); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev64q_s8 (int8x16_t __a) - { - return (int8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev64q_s16 (int16x8_t __a) - { - return (int16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev64q_s32 (int32x4_t __a) - { - return (int32x4_t) __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 }); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev64q_f32 (float32x4_t __a) - { - return (float32x4_t) __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 }); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev64q_u8 (uint8x16_t __a) - { - return (uint8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev64q_u16 (uint16x8_t __a) - { - return (uint16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev64q_u32 (uint32x4_t __a) - { - return (uint32x4_t) __builtin_shuffle (__a, (uint32x4_t) { 1, 0, 3, 2 }); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev64q_p8 (poly8x16_t __a) - { - return (poly8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev64q_p16 (poly16x8_t __a) - { - return (poly16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev32_s8 (int8x8_t __a) - { - return (int8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev32_s16 (int16x4_t __a) - { - return (int16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 }); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev32_u8 (uint8x8_t __a) - { - return (uint8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev32_u16 (uint16x4_t __a) - { - return (uint16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 }); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev32_p8 (poly8x8_t __a) - { - return (poly8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev32_p16 (poly16x4_t __a) - { - return (poly16x4_t) __builtin_shuffle (__a, (uint16x4_t) { 1, 0, 3, 2 }); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev32q_s8 (int8x16_t __a) - { - return (int8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev32q_s16 (int16x8_t __a) - { - return (int16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev32q_u8 (uint8x16_t __a) - { - return (uint8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev32q_u16 (uint16x8_t __a) - { - return (uint16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev32q_p8 (poly8x16_t __a) - { - return (poly8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev32q_p16 (poly16x8_t __a) - { - return (poly16x8_t) __builtin_shuffle (__a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev16_s8 (int8x8_t __a) - { - return (int8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev16_u8 (uint8x8_t __a) - { - return (uint8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev16_p8 (poly8x8_t __a) - { - return (poly8x8_t) __builtin_shuffle (__a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev16q_s8 (int8x16_t __a) - { - return (int8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev16q_u8 (uint8x16_t __a) - { - return (uint8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vrev16q_p8 (poly8x16_t __a) - { - return (poly8x16_t) __builtin_shuffle (__a, (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); -@@ -7866,74 +9116,86 @@ vrev16q_p8 (poly8x16_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbsl_p64 (uint64x1_t __a, poly64x1_t __b, poly64x1_t __c) - { - return (poly64x1_t)__builtin_neon_vbsldi ((int64x1_t) __a, __b, __c); - } - - #pragma GCC pop_options --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbsl_s8 (uint8x8_t __a, int8x8_t __b, int8x8_t __c) - { - return (int8x8_t)__builtin_neon_vbslv8qi ((int8x8_t) __a, __b, __c); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbsl_s16 (uint16x4_t __a, int16x4_t __b, int16x4_t __c) - { - return (int16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, __b, __c); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbsl_s32 (uint32x2_t __a, int32x2_t __b, int32x2_t __c) - { - return (int32x2_t)__builtin_neon_vbslv2si ((int32x2_t) __a, __b, __c); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbsl_s64 (uint64x1_t __a, int64x1_t __b, int64x1_t __c) - { - return (int64x1_t)__builtin_neon_vbsldi ((int64x1_t) __a, __b, __c); - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbsl_f32 (uint32x2_t __a, float32x2_t __b, float32x2_t __c) - { - return (float32x2_t)__builtin_neon_vbslv2sf ((int32x2_t) __a, __b, __c); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbsl_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) - { - return (uint8x8_t)__builtin_neon_vbslv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbsl_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) - { - return (uint16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbsl_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) - { - return (uint32x2_t)__builtin_neon_vbslv2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbsl_u64 (uint64x1_t __a, uint64x1_t __b, uint64x1_t __c) - { - return (uint64x1_t)__builtin_neon_vbsldi ((int64x1_t) __a, (int64x1_t) __b, (int64x1_t) __c); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbsl_p8 (uint8x8_t __a, poly8x8_t __b, poly8x8_t __c) - { - return (poly8x8_t)__builtin_neon_vbslv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c) - { - return (poly16x4_t)__builtin_neon_vbslv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c); -@@ -7941,74 +9203,86 @@ vbsl_p16 (uint16x4_t __a, poly16x4_t __b, poly16x4_t __c) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbslq_p64 (uint64x2_t __a, poly64x2_t __b, poly64x2_t __c) - { - return (poly64x2_t)__builtin_neon_vbslv2di ((int64x2_t) __a, (int64x2_t) __b, (int64x2_t) __c); - } - - #pragma GCC pop_options --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbslq_s8 (uint8x16_t __a, int8x16_t __b, int8x16_t __c) - { - return (int8x16_t)__builtin_neon_vbslv16qi ((int8x16_t) __a, __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbslq_s16 (uint16x8_t __a, int16x8_t __b, int16x8_t __c) - { - return (int16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbslq_s32 (uint32x4_t __a, int32x4_t __b, int32x4_t __c) - { - return (int32x4_t)__builtin_neon_vbslv4si ((int32x4_t) __a, __b, __c); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbslq_s64 (uint64x2_t __a, int64x2_t __b, int64x2_t __c) - { - return (int64x2_t)__builtin_neon_vbslv2di ((int64x2_t) __a, __b, __c); - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbslq_f32 (uint32x4_t __a, float32x4_t __b, float32x4_t __c) - { - return (float32x4_t)__builtin_neon_vbslv4sf ((int32x4_t) __a, __b, __c); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbslq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) - { - return (uint8x16_t)__builtin_neon_vbslv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbslq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) - { - return (uint16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbslq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) - { - return (uint32x4_t)__builtin_neon_vbslv4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbslq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) - { - return (uint64x2_t)__builtin_neon_vbslv2di ((int64x2_t) __a, (int64x2_t) __b, (int64x2_t) __c); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbslq_p8 (uint8x16_t __a, poly8x16_t __b, poly8x16_t __c) - { - return (poly8x16_t)__builtin_neon_vbslv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c) - { - return (poly16x8_t)__builtin_neon_vbslv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c); -@@ -8025,7 +9299,8 @@ vbslq_p16 (uint16x8_t __a, poly16x8_t __b, poly16x8_t __c) - vector, and will itself be loaded in reverse order (again, relative to the - neon intrinsics view, i.e. that would result from a "vld1" instruction). */ - --__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtrn_s8 (int8x8_t __a, int8x8_t __b) - { - int8x8x2_t __rv; -@@ -8043,7 +9318,8 @@ vtrn_s8 (int8x8_t __a, int8x8_t __b) - return __rv; - } - --__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtrn_s16 (int16x4_t __a, int16x4_t __b) - { - int16x4x2_t __rv; -@@ -8057,7 +9333,8 @@ vtrn_s16 (int16x4_t __a, int16x4_t __b) - return __rv; - } - --__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtrn_u8 (uint8x8_t __a, uint8x8_t __b) - { - uint8x8x2_t __rv; -@@ -8075,7 +9352,8 @@ vtrn_u8 (uint8x8_t __a, uint8x8_t __b) - return __rv; - } - --__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtrn_u16 (uint16x4_t __a, uint16x4_t __b) - { - uint16x4x2_t __rv; -@@ -8089,7 +9367,8 @@ vtrn_u16 (uint16x4_t __a, uint16x4_t __b) - return __rv; - } - --__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtrn_p8 (poly8x8_t __a, poly8x8_t __b) - { - poly8x8x2_t __rv; -@@ -8107,7 +9386,8 @@ vtrn_p8 (poly8x8_t __a, poly8x8_t __b) - return __rv; - } - --__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtrn_p16 (poly16x4_t __a, poly16x4_t __b) - { - poly16x4x2_t __rv; -@@ -8121,7 +9401,8 @@ vtrn_p16 (poly16x4_t __a, poly16x4_t __b) - return __rv; - } - --__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtrn_s32 (int32x2_t __a, int32x2_t __b) - { - int32x2x2_t __rv; -@@ -8135,7 +9416,8 @@ vtrn_s32 (int32x2_t __a, int32x2_t __b) - return __rv; - } - --__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtrn_f32 (float32x2_t __a, float32x2_t __b) - { - float32x2x2_t __rv; -@@ -8149,7 +9431,8 @@ vtrn_f32 (float32x2_t __a, float32x2_t __b) - return __rv; - } - --__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtrn_u32 (uint32x2_t __a, uint32x2_t __b) - { - uint32x2x2_t __rv; -@@ -8163,7 +9446,8 @@ vtrn_u32 (uint32x2_t __a, uint32x2_t __b) - return __rv; - } - --__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtrnq_s8 (int8x16_t __a, int8x16_t __b) - { - int8x16x2_t __rv; -@@ -8181,7 +9465,8 @@ vtrnq_s8 (int8x16_t __a, int8x16_t __b) - return __rv; - } - --__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtrnq_s16 (int16x8_t __a, int16x8_t __b) - { - int16x8x2_t __rv; -@@ -8199,7 +9484,8 @@ vtrnq_s16 (int16x8_t __a, int16x8_t __b) - return __rv; - } - --__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtrnq_s32 (int32x4_t __a, int32x4_t __b) - { - int32x4x2_t __rv; -@@ -8213,7 +9499,8 @@ vtrnq_s32 (int32x4_t __a, int32x4_t __b) - return __rv; - } - --__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtrnq_f32 (float32x4_t __a, float32x4_t __b) - { - float32x4x2_t __rv; -@@ -8227,7 +9514,8 @@ vtrnq_f32 (float32x4_t __a, float32x4_t __b) - return __rv; - } - --__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtrnq_u8 (uint8x16_t __a, uint8x16_t __b) - { - uint8x16x2_t __rv; -@@ -8245,7 +9533,8 @@ vtrnq_u8 (uint8x16_t __a, uint8x16_t __b) - return __rv; - } - --__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtrnq_u16 (uint16x8_t __a, uint16x8_t __b) - { - uint16x8x2_t __rv; -@@ -8263,7 +9552,8 @@ vtrnq_u16 (uint16x8_t __a, uint16x8_t __b) - return __rv; - } - --__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtrnq_u32 (uint32x4_t __a, uint32x4_t __b) - { - uint32x4x2_t __rv; -@@ -8277,7 +9567,8 @@ vtrnq_u32 (uint32x4_t __a, uint32x4_t __b) - return __rv; - } - --__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtrnq_p8 (poly8x16_t __a, poly8x16_t __b) - { - poly8x16x2_t __rv; -@@ -8295,7 +9586,8 @@ vtrnq_p8 (poly8x16_t __a, poly8x16_t __b) - return __rv; - } - --__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtrnq_p16 (poly16x8_t __a, poly16x8_t __b) - { - poly16x8x2_t __rv; -@@ -8313,7 +9605,8 @@ vtrnq_p16 (poly16x8_t __a, poly16x8_t __b) - return __rv; - } - --__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vzip_s8 (int8x8_t __a, int8x8_t __b) - { - int8x8x2_t __rv; -@@ -8331,7 +9624,8 @@ vzip_s8 (int8x8_t __a, int8x8_t __b) - return __rv; - } - --__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vzip_s16 (int16x4_t __a, int16x4_t __b) - { - int16x4x2_t __rv; -@@ -8345,7 +9639,8 @@ vzip_s16 (int16x4_t __a, int16x4_t __b) - return __rv; - } - --__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vzip_u8 (uint8x8_t __a, uint8x8_t __b) - { - uint8x8x2_t __rv; -@@ -8363,7 +9658,8 @@ vzip_u8 (uint8x8_t __a, uint8x8_t __b) - return __rv; - } - --__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vzip_u16 (uint16x4_t __a, uint16x4_t __b) - { - uint16x4x2_t __rv; -@@ -8377,7 +9673,8 @@ vzip_u16 (uint16x4_t __a, uint16x4_t __b) - return __rv; - } - --__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vzip_p8 (poly8x8_t __a, poly8x8_t __b) - { - poly8x8x2_t __rv; -@@ -8395,7 +9692,8 @@ vzip_p8 (poly8x8_t __a, poly8x8_t __b) - return __rv; - } - --__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vzip_p16 (poly16x4_t __a, poly16x4_t __b) - { - poly16x4x2_t __rv; -@@ -8409,7 +9707,8 @@ vzip_p16 (poly16x4_t __a, poly16x4_t __b) - return __rv; - } - --__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vzip_s32 (int32x2_t __a, int32x2_t __b) - { - int32x2x2_t __rv; -@@ -8423,7 +9722,8 @@ vzip_s32 (int32x2_t __a, int32x2_t __b) - return __rv; - } - --__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vzip_f32 (float32x2_t __a, float32x2_t __b) - { - float32x2x2_t __rv; -@@ -8437,7 +9737,8 @@ vzip_f32 (float32x2_t __a, float32x2_t __b) - return __rv; - } - --__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vzip_u32 (uint32x2_t __a, uint32x2_t __b) - { - uint32x2x2_t __rv; -@@ -8451,7 +9752,8 @@ vzip_u32 (uint32x2_t __a, uint32x2_t __b) - return __rv; - } - --__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vzipq_s8 (int8x16_t __a, int8x16_t __b) - { - int8x16x2_t __rv; -@@ -8469,7 +9771,8 @@ vzipq_s8 (int8x16_t __a, int8x16_t __b) - return __rv; - } - --__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vzipq_s16 (int16x8_t __a, int16x8_t __b) - { - int16x8x2_t __rv; -@@ -8487,7 +9790,8 @@ vzipq_s16 (int16x8_t __a, int16x8_t __b) - return __rv; - } - --__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vzipq_s32 (int32x4_t __a, int32x4_t __b) - { - int32x4x2_t __rv; -@@ -8501,7 +9805,8 @@ vzipq_s32 (int32x4_t __a, int32x4_t __b) - return __rv; - } - --__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vzipq_f32 (float32x4_t __a, float32x4_t __b) - { - float32x4x2_t __rv; -@@ -8515,7 +9820,8 @@ vzipq_f32 (float32x4_t __a, float32x4_t __b) - return __rv; - } - --__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vzipq_u8 (uint8x16_t __a, uint8x16_t __b) - { - uint8x16x2_t __rv; -@@ -8533,7 +9839,8 @@ vzipq_u8 (uint8x16_t __a, uint8x16_t __b) - return __rv; - } - --__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vzipq_u16 (uint16x8_t __a, uint16x8_t __b) - { - uint16x8x2_t __rv; -@@ -8551,7 +9858,8 @@ vzipq_u16 (uint16x8_t __a, uint16x8_t __b) - return __rv; - } - --__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vzipq_u32 (uint32x4_t __a, uint32x4_t __b) - { - uint32x4x2_t __rv; -@@ -8565,7 +9873,8 @@ vzipq_u32 (uint32x4_t __a, uint32x4_t __b) - return __rv; - } - --__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vzipq_p8 (poly8x16_t __a, poly8x16_t __b) - { - poly8x16x2_t __rv; -@@ -8583,7 +9892,8 @@ vzipq_p8 (poly8x16_t __a, poly8x16_t __b) - return __rv; - } - --__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vzipq_p16 (poly16x8_t __a, poly16x8_t __b) - { - poly16x8x2_t __rv; -@@ -8601,7 +9911,8 @@ vzipq_p16 (poly16x8_t __a, poly16x8_t __b) - return __rv; - } - --__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vuzp_s8 (int8x8_t __a, int8x8_t __b) - { - int8x8x2_t __rv; -@@ -8619,7 +9930,8 @@ vuzp_s8 (int8x8_t __a, int8x8_t __b) - return __rv; - } - --__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vuzp_s16 (int16x4_t __a, int16x4_t __b) - { - int16x4x2_t __rv; -@@ -8633,7 +9945,8 @@ vuzp_s16 (int16x4_t __a, int16x4_t __b) - return __rv; - } - --__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vuzp_s32 (int32x2_t __a, int32x2_t __b) - { - int32x2x2_t __rv; -@@ -8647,7 +9960,8 @@ vuzp_s32 (int32x2_t __a, int32x2_t __b) - return __rv; - } - --__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vuzp_f32 (float32x2_t __a, float32x2_t __b) - { - float32x2x2_t __rv; -@@ -8661,7 +9975,8 @@ vuzp_f32 (float32x2_t __a, float32x2_t __b) - return __rv; - } - --__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vuzp_u8 (uint8x8_t __a, uint8x8_t __b) - { - uint8x8x2_t __rv; -@@ -8679,7 +9994,8 @@ vuzp_u8 (uint8x8_t __a, uint8x8_t __b) - return __rv; - } - --__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vuzp_u16 (uint16x4_t __a, uint16x4_t __b) - { - uint16x4x2_t __rv; -@@ -8693,7 +10009,8 @@ vuzp_u16 (uint16x4_t __a, uint16x4_t __b) - return __rv; - } - --__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vuzp_u32 (uint32x2_t __a, uint32x2_t __b) - { - uint32x2x2_t __rv; -@@ -8707,7 +10024,8 @@ vuzp_u32 (uint32x2_t __a, uint32x2_t __b) - return __rv; - } - --__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vuzp_p8 (poly8x8_t __a, poly8x8_t __b) - { - poly8x8x2_t __rv; -@@ -8725,7 +10043,8 @@ vuzp_p8 (poly8x8_t __a, poly8x8_t __b) - return __rv; - } - --__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vuzp_p16 (poly16x4_t __a, poly16x4_t __b) - { - poly16x4x2_t __rv; -@@ -8739,7 +10058,8 @@ vuzp_p16 (poly16x4_t __a, poly16x4_t __b) - return __rv; - } - --__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vuzpq_s8 (int8x16_t __a, int8x16_t __b) - { - int8x16x2_t __rv; -@@ -8757,7 +10077,8 @@ vuzpq_s8 (int8x16_t __a, int8x16_t __b) - return __rv; - } - --__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vuzpq_s16 (int16x8_t __a, int16x8_t __b) - { - int16x8x2_t __rv; -@@ -8775,7 +10096,8 @@ vuzpq_s16 (int16x8_t __a, int16x8_t __b) - return __rv; - } - --__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vuzpq_s32 (int32x4_t __a, int32x4_t __b) - { - int32x4x2_t __rv; -@@ -8789,7 +10111,8 @@ vuzpq_s32 (int32x4_t __a, int32x4_t __b) - return __rv; - } - --__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vuzpq_f32 (float32x4_t __a, float32x4_t __b) - { - float32x4x2_t __rv; -@@ -8803,7 +10126,8 @@ vuzpq_f32 (float32x4_t __a, float32x4_t __b) - return __rv; - } - --__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vuzpq_u8 (uint8x16_t __a, uint8x16_t __b) - { - uint8x16x2_t __rv; -@@ -8821,7 +10145,8 @@ vuzpq_u8 (uint8x16_t __a, uint8x16_t __b) - return __rv; - } - --__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vuzpq_u16 (uint16x8_t __a, uint16x8_t __b) - { - uint16x8x2_t __rv; -@@ -8839,7 +10164,8 @@ vuzpq_u16 (uint16x8_t __a, uint16x8_t __b) - return __rv; - } - --__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vuzpq_u32 (uint32x4_t __a, uint32x4_t __b) - { - uint32x4x2_t __rv; -@@ -8853,7 +10179,8 @@ vuzpq_u32 (uint32x4_t __a, uint32x4_t __b) - return __rv; - } - --__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vuzpq_p8 (poly8x16_t __a, poly8x16_t __b) - { - poly8x16x2_t __rv; -@@ -8871,7 +10198,8 @@ vuzpq_p8 (poly8x16_t __a, poly8x16_t __b) - return __rv; - } - --__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vuzpq_p16 (poly16x8_t __a, poly16x8_t __b) - { - poly16x8x2_t __rv; -@@ -8891,82 +10219,95 @@ vuzpq_p16 (poly16x8_t __a, poly16x8_t __b) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_p64 (const poly64_t * __a) - { - return (poly64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a); - } - - #pragma GCC pop_options --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_s8 (const int8_t * __a) - { - return (int8x8_t)__builtin_neon_vld1v8qi ((const __builtin_neon_qi *) __a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_s16 (const int16_t * __a) - { - return (int16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_s32 (const int32_t * __a) - { - return (int32x2_t)__builtin_neon_vld1v2si ((const __builtin_neon_si *) __a); - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_s64 (const int64_t * __a) - { - return (int64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a); - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_f16 (const float16_t * __a) - { - return __builtin_neon_vld1v4hf (__a); - } - #endif - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_f32 (const float32_t * __a) - { - return (float32x2_t)__builtin_neon_vld1v2sf ((const __builtin_neon_sf *) __a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_u8 (const uint8_t * __a) - { - return (uint8x8_t)__builtin_neon_vld1v8qi ((const __builtin_neon_qi *) __a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_u16 (const uint16_t * __a) - { - return (uint16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_u32 (const uint32_t * __a) - { - return (uint32x2_t)__builtin_neon_vld1v2si ((const __builtin_neon_si *) __a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_u64 (const uint64_t * __a) - { - return (uint64x1_t)__builtin_neon_vld1di ((const __builtin_neon_di *) __a); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_p8 (const poly8_t * __a) - { - return (poly8x8_t)__builtin_neon_vld1v8qi ((const __builtin_neon_qi *) __a); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_p16 (const poly16_t * __a) - { - return (poly16x4_t)__builtin_neon_vld1v4hi ((const __builtin_neon_hi *) __a); -@@ -8974,144 +10315,167 @@ vld1_p16 (const poly16_t * __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_p64 (const poly64_t * __a) - { - return (poly64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a); - } - - #pragma GCC pop_options --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_s8 (const int8_t * __a) - { - return (int8x16_t)__builtin_neon_vld1v16qi ((const __builtin_neon_qi *) __a); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_s16 (const int16_t * __a) - { - return (int16x8_t)__builtin_neon_vld1v8hi ((const __builtin_neon_hi *) __a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_s32 (const int32_t * __a) - { - return (int32x4_t)__builtin_neon_vld1v4si ((const __builtin_neon_si *) __a); - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_s64 (const int64_t * __a) - { - return (int64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a); - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_f16 (const float16_t * __a) - { - return __builtin_neon_vld1v8hf (__a); - } - #endif - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_f32 (const float32_t * __a) - { - return (float32x4_t)__builtin_neon_vld1v4sf ((const __builtin_neon_sf *) __a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_u8 (const uint8_t * __a) - { - return (uint8x16_t)__builtin_neon_vld1v16qi ((const __builtin_neon_qi *) __a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_u16 (const uint16_t * __a) - { - return (uint16x8_t)__builtin_neon_vld1v8hi ((const __builtin_neon_hi *) __a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_u32 (const uint32_t * __a) - { - return (uint32x4_t)__builtin_neon_vld1v4si ((const __builtin_neon_si *) __a); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_u64 (const uint64_t * __a) - { - return (uint64x2_t)__builtin_neon_vld1v2di ((const __builtin_neon_di *) __a); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_p8 (const poly8_t * __a) - { - return (poly8x16_t)__builtin_neon_vld1v16qi ((const __builtin_neon_qi *) __a); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_p16 (const poly16_t * __a) - { - return (poly16x8_t)__builtin_neon_vld1v8hi ((const __builtin_neon_hi *) __a); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_lane_s8 (const int8_t * __a, int8x8_t __b, const int __c) - { - return (int8x8_t)__builtin_neon_vld1_lanev8qi ((const __builtin_neon_qi *) __a, __b, __c); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_lane_s16 (const int16_t * __a, int16x4_t __b, const int __c) - { - return (int16x4_t)__builtin_neon_vld1_lanev4hi ((const __builtin_neon_hi *) __a, __b, __c); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_lane_s32 (const int32_t * __a, int32x2_t __b, const int __c) - { - return (int32x2_t)__builtin_neon_vld1_lanev2si ((const __builtin_neon_si *) __a, __b, __c); - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_lane_f16 (const float16_t * __a, float16x4_t __b, const int __c) - { - return vset_lane_f16 (*__a, __b, __c); - } - #endif - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_lane_f32 (const float32_t * __a, float32x2_t __b, const int __c) - { - return (float32x2_t)__builtin_neon_vld1_lanev2sf ((const __builtin_neon_sf *) __a, __b, __c); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_lane_u8 (const uint8_t * __a, uint8x8_t __b, const int __c) - { - return (uint8x8_t)__builtin_neon_vld1_lanev8qi ((const __builtin_neon_qi *) __a, (int8x8_t) __b, __c); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_lane_u16 (const uint16_t * __a, uint16x4_t __b, const int __c) - { - return (uint16x4_t)__builtin_neon_vld1_lanev4hi ((const __builtin_neon_hi *) __a, (int16x4_t) __b, __c); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_lane_u32 (const uint32_t * __a, uint32x2_t __b, const int __c) - { - return (uint32x2_t)__builtin_neon_vld1_lanev2si ((const __builtin_neon_si *) __a, (int32x2_t) __b, __c); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_lane_p8 (const poly8_t * __a, poly8x8_t __b, const int __c) - { - return (poly8x8_t)__builtin_neon_vld1_lanev8qi ((const __builtin_neon_qi *) __a, (int8x8_t) __b, __c); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_lane_p16 (const poly16_t * __a, poly16x4_t __b, const int __c) - { - return (poly16x4_t)__builtin_neon_vld1_lanev4hi ((const __builtin_neon_hi *) __a, (int16x4_t) __b, __c); -@@ -9119,82 +10483,95 @@ vld1_lane_p16 (const poly16_t * __a, poly16x4_t __b, const int __c) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_lane_p64 (const poly64_t * __a, poly64x1_t __b, const int __c) - { - return (poly64x1_t)__builtin_neon_vld1_lanedi ((const __builtin_neon_di *) __a, __b, __c); - } - - #pragma GCC pop_options --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_lane_s64 (const int64_t * __a, int64x1_t __b, const int __c) - { - return (int64x1_t)__builtin_neon_vld1_lanedi ((const __builtin_neon_di *) __a, __b, __c); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_lane_u64 (const uint64_t * __a, uint64x1_t __b, const int __c) - { - return (uint64x1_t)__builtin_neon_vld1_lanedi ((const __builtin_neon_di *) __a, (int64x1_t) __b, __c); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_lane_s8 (const int8_t * __a, int8x16_t __b, const int __c) - { - return (int8x16_t)__builtin_neon_vld1_lanev16qi ((const __builtin_neon_qi *) __a, __b, __c); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_lane_s16 (const int16_t * __a, int16x8_t __b, const int __c) - { - return (int16x8_t)__builtin_neon_vld1_lanev8hi ((const __builtin_neon_hi *) __a, __b, __c); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_lane_s32 (const int32_t * __a, int32x4_t __b, const int __c) - { - return (int32x4_t)__builtin_neon_vld1_lanev4si ((const __builtin_neon_si *) __a, __b, __c); - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_lane_f16 (const float16_t * __a, float16x8_t __b, const int __c) - { - return vsetq_lane_f16 (*__a, __b, __c); - } - #endif - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_lane_f32 (const float32_t * __a, float32x4_t __b, const int __c) - { - return (float32x4_t)__builtin_neon_vld1_lanev4sf ((const __builtin_neon_sf *) __a, __b, __c); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_lane_u8 (const uint8_t * __a, uint8x16_t __b, const int __c) - { - return (uint8x16_t)__builtin_neon_vld1_lanev16qi ((const __builtin_neon_qi *) __a, (int8x16_t) __b, __c); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_lane_u16 (const uint16_t * __a, uint16x8_t __b, const int __c) - { - return (uint16x8_t)__builtin_neon_vld1_lanev8hi ((const __builtin_neon_hi *) __a, (int16x8_t) __b, __c); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_lane_u32 (const uint32_t * __a, uint32x4_t __b, const int __c) - { - return (uint32x4_t)__builtin_neon_vld1_lanev4si ((const __builtin_neon_si *) __a, (int32x4_t) __b, __c); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_lane_p8 (const poly8_t * __a, poly8x16_t __b, const int __c) - { - return (poly8x16_t)__builtin_neon_vld1_lanev16qi ((const __builtin_neon_qi *) __a, (int8x16_t) __b, __c); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_lane_p16 (const poly16_t * __a, poly16x8_t __b, const int __c) - { - return (poly16x8_t)__builtin_neon_vld1_lanev8hi ((const __builtin_neon_hi *) __a, (int16x8_t) __b, __c); -@@ -9202,45 +10579,52 @@ vld1q_lane_p16 (const poly16_t * __a, poly16x8_t __b, const int __c) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_lane_p64 (const poly64_t * __a, poly64x2_t __b, const int __c) - { - return (poly64x2_t)__builtin_neon_vld1_lanev2di ((const __builtin_neon_di *) __a, (int64x2_t) __b, __c); - } - - #pragma GCC pop_options --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_lane_s64 (const int64_t * __a, int64x2_t __b, const int __c) - { - return (int64x2_t)__builtin_neon_vld1_lanev2di ((const __builtin_neon_di *) __a, __b, __c); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_lane_u64 (const uint64_t * __a, uint64x2_t __b, const int __c) - { - return (uint64x2_t)__builtin_neon_vld1_lanev2di ((const __builtin_neon_di *) __a, (int64x2_t) __b, __c); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_dup_s8 (const int8_t * __a) - { - return (int8x8_t)__builtin_neon_vld1_dupv8qi ((const __builtin_neon_qi *) __a); - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_dup_s16 (const int16_t * __a) - { - return (int16x4_t)__builtin_neon_vld1_dupv4hi ((const __builtin_neon_hi *) __a); - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_dup_s32 (const int32_t * __a) - { - return (int32x2_t)__builtin_neon_vld1_dupv2si ((const __builtin_neon_si *) __a); - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_dup_f16 (const float16_t * __a) - { - float16_t __f = *__a; -@@ -9248,37 +10632,43 @@ vld1_dup_f16 (const float16_t * __a) - } - #endif - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_dup_f32 (const float32_t * __a) - { - return (float32x2_t)__builtin_neon_vld1_dupv2sf ((const __builtin_neon_sf *) __a); - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_dup_u8 (const uint8_t * __a) - { - return (uint8x8_t)__builtin_neon_vld1_dupv8qi ((const __builtin_neon_qi *) __a); - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_dup_u16 (const uint16_t * __a) - { - return (uint16x4_t)__builtin_neon_vld1_dupv4hi ((const __builtin_neon_hi *) __a); - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_dup_u32 (const uint32_t * __a) - { - return (uint32x2_t)__builtin_neon_vld1_dupv2si ((const __builtin_neon_si *) __a); - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_dup_p8 (const poly8_t * __a) - { - return (poly8x8_t)__builtin_neon_vld1_dupv8qi ((const __builtin_neon_qi *) __a); - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_dup_p16 (const poly16_t * __a) - { - return (poly16x4_t)__builtin_neon_vld1_dupv4hi ((const __builtin_neon_hi *) __a); -@@ -9286,45 +10676,52 @@ vld1_dup_p16 (const poly16_t * __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_dup_p64 (const poly64_t * __a) - { - return (poly64x1_t)__builtin_neon_vld1_dupdi ((const __builtin_neon_di *) __a); - } - - #pragma GCC pop_options --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_dup_s64 (const int64_t * __a) - { - return (int64x1_t)__builtin_neon_vld1_dupdi ((const __builtin_neon_di *) __a); - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1_dup_u64 (const uint64_t * __a) - { - return (uint64x1_t)__builtin_neon_vld1_dupdi ((const __builtin_neon_di *) __a); - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_dup_s8 (const int8_t * __a) - { - return (int8x16_t)__builtin_neon_vld1_dupv16qi ((const __builtin_neon_qi *) __a); - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_dup_s16 (const int16_t * __a) - { - return (int16x8_t)__builtin_neon_vld1_dupv8hi ((const __builtin_neon_hi *) __a); - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_dup_s32 (const int32_t * __a) - { - return (int32x4_t)__builtin_neon_vld1_dupv4si ((const __builtin_neon_si *) __a); - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_dup_f16 (const float16_t * __a) - { - float16_t __f = *__a; -@@ -9332,37 +10729,43 @@ vld1q_dup_f16 (const float16_t * __a) - } - #endif - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_dup_f32 (const float32_t * __a) - { - return (float32x4_t)__builtin_neon_vld1_dupv4sf ((const __builtin_neon_sf *) __a); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_dup_u8 (const uint8_t * __a) - { - return (uint8x16_t)__builtin_neon_vld1_dupv16qi ((const __builtin_neon_qi *) __a); - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_dup_u16 (const uint16_t * __a) - { - return (uint16x8_t)__builtin_neon_vld1_dupv8hi ((const __builtin_neon_hi *) __a); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_dup_u32 (const uint32_t * __a) - { - return (uint32x4_t)__builtin_neon_vld1_dupv4si ((const __builtin_neon_si *) __a); - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_dup_p8 (const poly8_t * __a) - { - return (poly8x16_t)__builtin_neon_vld1_dupv16qi ((const __builtin_neon_qi *) __a); - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_dup_p16 (const poly16_t * __a) - { - return (poly16x8_t)__builtin_neon_vld1_dupv8hi ((const __builtin_neon_hi *) __a); -@@ -9370,20 +10773,23 @@ vld1q_dup_p16 (const poly16_t * __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_dup_p64 (const poly64_t * __a) - { - return (poly64x2_t)__builtin_neon_vld1_dupv2di ((const __builtin_neon_di *) __a); - } - - #pragma GCC pop_options --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_dup_s64 (const int64_t * __a) - { - return (int64x2_t)__builtin_neon_vld1_dupv2di ((const __builtin_neon_di *) __a); - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld1q_dup_u64 (const uint64_t * __a) - { - return (uint64x2_t)__builtin_neon_vld1_dupv2di ((const __builtin_neon_di *) __a); -@@ -9391,82 +10797,95 @@ vld1q_dup_u64 (const uint64_t * __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_p64 (poly64_t * __a, poly64x1_t __b) - { - __builtin_neon_vst1di ((__builtin_neon_di *) __a, __b); - } - - #pragma GCC pop_options --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_s8 (int8_t * __a, int8x8_t __b) - { - __builtin_neon_vst1v8qi ((__builtin_neon_qi *) __a, __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_s16 (int16_t * __a, int16x4_t __b) - { - __builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_s32 (int32_t * __a, int32x2_t __b) - { - __builtin_neon_vst1v2si ((__builtin_neon_si *) __a, __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_s64 (int64_t * __a, int64x1_t __b) - { - __builtin_neon_vst1di ((__builtin_neon_di *) __a, __b); - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_f16 (float16_t * __a, float16x4_t __b) - { - __builtin_neon_vst1v4hf (__a, __b); - } - #endif - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_f32 (float32_t * __a, float32x2_t __b) - { - __builtin_neon_vst1v2sf ((__builtin_neon_sf *) __a, __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_u8 (uint8_t * __a, uint8x8_t __b) - { - __builtin_neon_vst1v8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_u16 (uint16_t * __a, uint16x4_t __b) - { - __builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_u32 (uint32_t * __a, uint32x2_t __b) - { - __builtin_neon_vst1v2si ((__builtin_neon_si *) __a, (int32x2_t) __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_u64 (uint64_t * __a, uint64x1_t __b) - { - __builtin_neon_vst1di ((__builtin_neon_di *) __a, (int64x1_t) __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_p8 (poly8_t * __a, poly8x8_t __b) - { - __builtin_neon_vst1v8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_p16 (poly16_t * __a, poly16x4_t __b) - { - __builtin_neon_vst1v4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b); -@@ -9474,144 +10893,167 @@ vst1_p16 (poly16_t * __a, poly16x4_t __b) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_p64 (poly64_t * __a, poly64x2_t __b) - { - __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, (int64x2_t) __b); - } - - #pragma GCC pop_options --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_s8 (int8_t * __a, int8x16_t __b) - { - __builtin_neon_vst1v16qi ((__builtin_neon_qi *) __a, __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_s16 (int16_t * __a, int16x8_t __b) - { - __builtin_neon_vst1v8hi ((__builtin_neon_hi *) __a, __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_s32 (int32_t * __a, int32x4_t __b) - { - __builtin_neon_vst1v4si ((__builtin_neon_si *) __a, __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_s64 (int64_t * __a, int64x2_t __b) - { - __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, __b); - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_f16 (float16_t * __a, float16x8_t __b) - { - __builtin_neon_vst1v8hf (__a, __b); - } - #endif - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_f32 (float32_t * __a, float32x4_t __b) - { - __builtin_neon_vst1v4sf ((__builtin_neon_sf *) __a, __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_u8 (uint8_t * __a, uint8x16_t __b) - { - __builtin_neon_vst1v16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_u16 (uint16_t * __a, uint16x8_t __b) - { - __builtin_neon_vst1v8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_u32 (uint32_t * __a, uint32x4_t __b) - { - __builtin_neon_vst1v4si ((__builtin_neon_si *) __a, (int32x4_t) __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_u64 (uint64_t * __a, uint64x2_t __b) - { - __builtin_neon_vst1v2di ((__builtin_neon_di *) __a, (int64x2_t) __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_p8 (poly8_t * __a, poly8x16_t __b) - { - __builtin_neon_vst1v16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_p16 (poly16_t * __a, poly16x8_t __b) - { - __builtin_neon_vst1v8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_lane_s8 (int8_t * __a, int8x8_t __b, const int __c) - { - __builtin_neon_vst1_lanev8qi ((__builtin_neon_qi *) __a, __b, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_lane_s16 (int16_t * __a, int16x4_t __b, const int __c) - { - __builtin_neon_vst1_lanev4hi ((__builtin_neon_hi *) __a, __b, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_lane_s32 (int32_t * __a, int32x2_t __b, const int __c) - { - __builtin_neon_vst1_lanev2si ((__builtin_neon_si *) __a, __b, __c); - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_lane_f16 (float16_t * __a, float16x4_t __b, const int __c) - { - __builtin_neon_vst1_lanev4hf (__a, __b, __c); - } - #endif - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_lane_f32 (float32_t * __a, float32x2_t __b, const int __c) - { - __builtin_neon_vst1_lanev2sf ((__builtin_neon_sf *) __a, __b, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_lane_u8 (uint8_t * __a, uint8x8_t __b, const int __c) - { - __builtin_neon_vst1_lanev8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_lane_u16 (uint16_t * __a, uint16x4_t __b, const int __c) - { - __builtin_neon_vst1_lanev4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_lane_u32 (uint32_t * __a, uint32x2_t __b, const int __c) - { - __builtin_neon_vst1_lanev2si ((__builtin_neon_si *) __a, (int32x2_t) __b, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_lane_p8 (poly8_t * __a, poly8x8_t __b, const int __c) - { - __builtin_neon_vst1_lanev8qi ((__builtin_neon_qi *) __a, (int8x8_t) __b, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_lane_p16 (poly16_t * __a, poly16x4_t __b, const int __c) - { - __builtin_neon_vst1_lanev4hi ((__builtin_neon_hi *) __a, (int16x4_t) __b, __c); -@@ -9619,82 +11061,95 @@ vst1_lane_p16 (poly16_t * __a, poly16x4_t __b, const int __c) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_lane_p64 (poly64_t * __a, poly64x1_t __b, const int __c) - { - __builtin_neon_vst1_lanedi ((__builtin_neon_di *) __a, __b, __c); - } - - #pragma GCC pop_options --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_lane_s64 (int64_t * __a, int64x1_t __b, const int __c) - { - __builtin_neon_vst1_lanedi ((__builtin_neon_di *) __a, __b, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1_lane_u64 (uint64_t * __a, uint64x1_t __b, const int __c) - { - __builtin_neon_vst1_lanedi ((__builtin_neon_di *) __a, (int64x1_t) __b, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_lane_s8 (int8_t * __a, int8x16_t __b, const int __c) - { - __builtin_neon_vst1_lanev16qi ((__builtin_neon_qi *) __a, __b, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_lane_s16 (int16_t * __a, int16x8_t __b, const int __c) - { - __builtin_neon_vst1_lanev8hi ((__builtin_neon_hi *) __a, __b, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_lane_s32 (int32_t * __a, int32x4_t __b, const int __c) - { - __builtin_neon_vst1_lanev4si ((__builtin_neon_si *) __a, __b, __c); - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_lane_f16 (float16_t * __a, float16x8_t __b, const int __c) - { - __builtin_neon_vst1_lanev8hf (__a, __b, __c); - } - #endif - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_lane_f32 (float32_t * __a, float32x4_t __b, const int __c) - { - __builtin_neon_vst1_lanev4sf ((__builtin_neon_sf *) __a, __b, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_lane_u8 (uint8_t * __a, uint8x16_t __b, const int __c) - { - __builtin_neon_vst1_lanev16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_lane_u16 (uint16_t * __a, uint16x8_t __b, const int __c) - { - __builtin_neon_vst1_lanev8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_lane_u32 (uint32_t * __a, uint32x4_t __b, const int __c) - { - __builtin_neon_vst1_lanev4si ((__builtin_neon_si *) __a, (int32x4_t) __b, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_lane_p8 (poly8_t * __a, poly8x16_t __b, const int __c) - { - __builtin_neon_vst1_lanev16qi ((__builtin_neon_qi *) __a, (int8x16_t) __b, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_lane_p16 (poly16_t * __a, poly16x8_t __b, const int __c) - { - __builtin_neon_vst1_lanev8hi ((__builtin_neon_hi *) __a, (int16x8_t) __b, __c); -@@ -9702,26 +11157,30 @@ vst1q_lane_p16 (poly16_t * __a, poly16x8_t __b, const int __c) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_lane_p64 (poly64_t * __a, poly64x2_t __b, const int __c) - { - __builtin_neon_vst1_lanev2di ((__builtin_neon_di *) __a, (int64x2_t) __b, __c); - } - - #pragma GCC pop_options --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_lane_s64 (int64_t * __a, int64x2_t __b, const int __c) - { - __builtin_neon_vst1_lanev2di ((__builtin_neon_di *) __a, __b, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst1q_lane_u64 (uint64_t * __a, uint64x2_t __b, const int __c) - { - __builtin_neon_vst1_lanev2di ((__builtin_neon_di *) __a, (int64x2_t) __b, __c); - } - --__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_s8 (const int8_t * __a) - { - union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -9729,7 +11188,8 @@ vld2_s8 (const int8_t * __a) - return __rv.__i; - } - --__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_s16 (const int16_t * __a) - { - union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -9737,7 +11197,8 @@ vld2_s16 (const int16_t * __a) - return __rv.__i; - } - --__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_s32 (const int32_t * __a) - { - union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -9746,7 +11207,8 @@ vld2_s32 (const int32_t * __a) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_f16 (const float16_t * __a) - { - union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -9755,7 +11217,8 @@ vld2_f16 (const float16_t * __a) - } - #endif - --__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_f32 (const float32_t * __a) - { - union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -9763,7 +11226,8 @@ vld2_f32 (const float32_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_u8 (const uint8_t * __a) - { - union { uint8x8x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -9771,7 +11235,8 @@ vld2_u8 (const uint8_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_u16 (const uint16_t * __a) - { - union { uint16x4x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -9779,7 +11244,8 @@ vld2_u16 (const uint16_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_u32 (const uint32_t * __a) - { - union { uint32x2x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -9787,7 +11253,8 @@ vld2_u32 (const uint32_t * __a) - return __rv.__i; - } - --__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_p8 (const poly8_t * __a) - { - union { poly8x8x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -9795,7 +11262,8 @@ vld2_p8 (const poly8_t * __a) - return __rv.__i; - } - --__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_p16 (const poly16_t * __a) - { - union { poly16x4x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -9805,7 +11273,8 @@ vld2_p16 (const poly16_t * __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x1x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_p64 (const poly64_t * __a) - { - union { poly64x1x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -9814,7 +11283,8 @@ vld2_p64 (const poly64_t * __a) - } - - #pragma GCC pop_options --__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_s64 (const int64_t * __a) - { - union { int64x1x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -9822,7 +11292,8 @@ vld2_s64 (const int64_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_u64 (const uint64_t * __a) - { - union { uint64x1x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -9830,7 +11301,8 @@ vld2_u64 (const uint64_t * __a) - return __rv.__i; - } - --__extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2q_s8 (const int8_t * __a) - { - union { int8x16x2_t __i; __builtin_neon_oi __o; } __rv; -@@ -9838,7 +11310,8 @@ vld2q_s8 (const int8_t * __a) - return __rv.__i; - } - --__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2q_s16 (const int16_t * __a) - { - union { int16x8x2_t __i; __builtin_neon_oi __o; } __rv; -@@ -9846,7 +11319,8 @@ vld2q_s16 (const int16_t * __a) - return __rv.__i; - } - --__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2q_s32 (const int32_t * __a) - { - union { int32x4x2_t __i; __builtin_neon_oi __o; } __rv; -@@ -9855,7 +11329,8 @@ vld2q_s32 (const int32_t * __a) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2q_f16 (const float16_t * __a) - { - union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv; -@@ -9864,7 +11339,8 @@ vld2q_f16 (const float16_t * __a) - } - #endif - --__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2q_f32 (const float32_t * __a) - { - union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv; -@@ -9872,7 +11348,8 @@ vld2q_f32 (const float32_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2q_u8 (const uint8_t * __a) - { - union { uint8x16x2_t __i; __builtin_neon_oi __o; } __rv; -@@ -9880,7 +11357,8 @@ vld2q_u8 (const uint8_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2q_u16 (const uint16_t * __a) - { - union { uint16x8x2_t __i; __builtin_neon_oi __o; } __rv; -@@ -9888,7 +11366,8 @@ vld2q_u16 (const uint16_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2q_u32 (const uint32_t * __a) - { - union { uint32x4x2_t __i; __builtin_neon_oi __o; } __rv; -@@ -9896,7 +11375,8 @@ vld2q_u32 (const uint32_t * __a) - return __rv.__i; - } - --__extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2q_p8 (const poly8_t * __a) - { - union { poly8x16x2_t __i; __builtin_neon_oi __o; } __rv; -@@ -9904,7 +11384,8 @@ vld2q_p8 (const poly8_t * __a) - return __rv.__i; - } - --__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2q_p16 (const poly16_t * __a) - { - union { poly16x8x2_t __i; __builtin_neon_oi __o; } __rv; -@@ -9912,7 +11393,8 @@ vld2q_p16 (const poly16_t * __a) - return __rv.__i; - } - --__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_lane_s8 (const int8_t * __a, int8x8x2_t __b, const int __c) - { - union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; -@@ -9921,7 +11403,8 @@ vld2_lane_s8 (const int8_t * __a, int8x8x2_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_lane_s16 (const int16_t * __a, int16x4x2_t __b, const int __c) - { - union { int16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; -@@ -9930,7 +11413,8 @@ vld2_lane_s16 (const int16_t * __a, int16x4x2_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_lane_s32 (const int32_t * __a, int32x2x2_t __b, const int __c) - { - union { int32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; -@@ -9940,7 +11424,8 @@ vld2_lane_s32 (const int32_t * __a, int32x2x2_t __b, const int __c) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_lane_f16 (const float16_t * __a, float16x4x2_t __b, const int __c) - { - union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; -@@ -9950,7 +11435,8 @@ vld2_lane_f16 (const float16_t * __a, float16x4x2_t __b, const int __c) - } - #endif - --__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_lane_f32 (const float32_t * __a, float32x2x2_t __b, const int __c) - { - union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; -@@ -9959,7 +11445,8 @@ vld2_lane_f32 (const float32_t * __a, float32x2x2_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_lane_u8 (const uint8_t * __a, uint8x8x2_t __b, const int __c) - { - union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; -@@ -9968,7 +11455,8 @@ vld2_lane_u8 (const uint8_t * __a, uint8x8x2_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_lane_u16 (const uint16_t * __a, uint16x4x2_t __b, const int __c) - { - union { uint16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; -@@ -9977,7 +11465,8 @@ vld2_lane_u16 (const uint16_t * __a, uint16x4x2_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_lane_u32 (const uint32_t * __a, uint32x2x2_t __b, const int __c) - { - union { uint32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; -@@ -9986,7 +11475,8 @@ vld2_lane_u32 (const uint32_t * __a, uint32x2x2_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_lane_p8 (const poly8_t * __a, poly8x8x2_t __b, const int __c) - { - union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; -@@ -9995,7 +11485,8 @@ vld2_lane_p8 (const poly8_t * __a, poly8x8x2_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_lane_p16 (const poly16_t * __a, poly16x4x2_t __b, const int __c) - { - union { poly16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; -@@ -10004,7 +11495,8 @@ vld2_lane_p16 (const poly16_t * __a, poly16x4x2_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2q_lane_s16 (const int16_t * __a, int16x8x2_t __b, const int __c) - { - union { int16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -10013,7 +11505,8 @@ vld2q_lane_s16 (const int16_t * __a, int16x8x2_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2q_lane_s32 (const int32_t * __a, int32x4x2_t __b, const int __c) - { - union { int32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -10023,7 +11516,8 @@ vld2q_lane_s32 (const int32_t * __a, int32x4x2_t __b, const int __c) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2q_lane_f16 (const float16_t * __a, float16x8x2_t __b, const int __c) - { - union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -10033,7 +11527,8 @@ vld2q_lane_f16 (const float16_t * __a, float16x8x2_t __b, const int __c) - } - #endif - --__extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2q_lane_f32 (const float32_t * __a, float32x4x2_t __b, const int __c) - { - union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -10042,7 +11537,8 @@ vld2q_lane_f32 (const float32_t * __a, float32x4x2_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2q_lane_u16 (const uint16_t * __a, uint16x8x2_t __b, const int __c) - { - union { uint16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -10051,7 +11547,8 @@ vld2q_lane_u16 (const uint16_t * __a, uint16x8x2_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2q_lane_u32 (const uint32_t * __a, uint32x4x2_t __b, const int __c) - { - union { uint32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -10060,7 +11557,8 @@ vld2q_lane_u32 (const uint32_t * __a, uint32x4x2_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2q_lane_p16 (const poly16_t * __a, poly16x8x2_t __b, const int __c) - { - union { poly16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -10069,7 +11567,8 @@ vld2q_lane_p16 (const poly16_t * __a, poly16x8x2_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline int8x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_dup_s8 (const int8_t * __a) - { - union { int8x8x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -10077,7 +11576,8 @@ vld2_dup_s8 (const int8_t * __a) - return __rv.__i; - } - --__extension__ static __inline int16x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_dup_s16 (const int16_t * __a) - { - union { int16x4x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -10085,7 +11585,8 @@ vld2_dup_s16 (const int16_t * __a) - return __rv.__i; - } - --__extension__ static __inline int32x2x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_dup_s32 (const int32_t * __a) - { - union { int32x2x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -10094,7 +11595,8 @@ vld2_dup_s32 (const int32_t * __a) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_dup_f16 (const float16_t * __a) - { - union { float16x4x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -10103,7 +11605,8 @@ vld2_dup_f16 (const float16_t * __a) - } - #endif - --__extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_dup_f32 (const float32_t * __a) - { - union { float32x2x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -10111,7 +11614,8 @@ vld2_dup_f32 (const float32_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint8x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_dup_u8 (const uint8_t * __a) - { - union { uint8x8x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -10119,7 +11623,8 @@ vld2_dup_u8 (const uint8_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint16x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_dup_u16 (const uint16_t * __a) - { - union { uint16x4x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -10127,7 +11632,8 @@ vld2_dup_u16 (const uint16_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint32x2x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_dup_u32 (const uint32_t * __a) - { - union { uint32x2x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -10135,7 +11641,8 @@ vld2_dup_u32 (const uint32_t * __a) - return __rv.__i; - } - --__extension__ static __inline poly8x8x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_dup_p8 (const poly8_t * __a) - { - union { poly8x8x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -10143,7 +11650,8 @@ vld2_dup_p8 (const poly8_t * __a) - return __rv.__i; - } - --__extension__ static __inline poly16x4x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_dup_p16 (const poly16_t * __a) - { - union { poly16x4x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -10153,7 +11661,8 @@ vld2_dup_p16 (const poly16_t * __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x1x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_dup_p64 (const poly64_t * __a) - { - union { poly64x1x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -10162,7 +11671,8 @@ vld2_dup_p64 (const poly64_t * __a) - } - - #pragma GCC pop_options --__extension__ static __inline int64x1x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_dup_s64 (const int64_t * __a) - { - union { int64x1x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -10170,7 +11680,8 @@ vld2_dup_s64 (const int64_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint64x1x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld2_dup_u64 (const uint64_t * __a) - { - union { uint64x1x2_t __i; __builtin_neon_ti __o; } __rv; -@@ -10178,21 +11689,24 @@ vld2_dup_u64 (const uint64_t * __a) - return __rv.__i; - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_s8 (int8_t * __a, int8x8x2_t __b) - { - union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2v8qi ((__builtin_neon_qi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_s16 (int16_t * __a, int16x4x2_t __b) - { - union { int16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2v4hi ((__builtin_neon_hi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_s32 (int32_t * __a, int32x2x2_t __b) - { - union { int32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; -@@ -10200,7 +11714,8 @@ vst2_s32 (int32_t * __a, int32x2x2_t __b) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_f16 (float16_t * __a, float16x4x2_t __b) - { - union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; -@@ -10208,42 +11723,48 @@ vst2_f16 (float16_t * __a, float16x4x2_t __b) - } - #endif - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_f32 (float32_t * __a, float32x2x2_t __b) - { - union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2v2sf ((__builtin_neon_sf *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_u8 (uint8_t * __a, uint8x8x2_t __b) - { - union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2v8qi ((__builtin_neon_qi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_u16 (uint16_t * __a, uint16x4x2_t __b) - { - union { uint16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2v4hi ((__builtin_neon_hi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_u32 (uint32_t * __a, uint32x2x2_t __b) - { - union { uint32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2v2si ((__builtin_neon_si *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_p8 (poly8_t * __a, poly8x8x2_t __b) - { - union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2v8qi ((__builtin_neon_qi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_p16 (poly16_t * __a, poly16x4x2_t __b) - { - union { poly16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; -@@ -10252,7 +11773,8 @@ vst2_p16 (poly16_t * __a, poly16x4x2_t __b) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_p64 (poly64_t * __a, poly64x1x2_t __b) - { - union { poly64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; -@@ -10260,35 +11782,40 @@ vst2_p64 (poly64_t * __a, poly64x1x2_t __b) - } - - #pragma GCC pop_options --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_s64 (int64_t * __a, int64x1x2_t __b) - { - union { int64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2di ((__builtin_neon_di *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_u64 (uint64_t * __a, uint64x1x2_t __b) - { - union { uint64x1x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2di ((__builtin_neon_di *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2q_s8 (int8_t * __a, int8x16x2_t __b) - { - union { int8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst2v16qi ((__builtin_neon_qi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2q_s16 (int16_t * __a, int16x8x2_t __b) - { - union { int16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst2v8hi ((__builtin_neon_hi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2q_s32 (int32_t * __a, int32x4x2_t __b) - { - union { int32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -10296,7 +11823,8 @@ vst2q_s32 (int32_t * __a, int32x4x2_t __b) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2q_f16 (float16_t * __a, float16x8x2_t __b) - { - union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -10304,63 +11832,72 @@ vst2q_f16 (float16_t * __a, float16x8x2_t __b) - } - #endif - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2q_f32 (float32_t * __a, float32x4x2_t __b) - { - union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst2v4sf ((__builtin_neon_sf *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2q_u8 (uint8_t * __a, uint8x16x2_t __b) - { - union { uint8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst2v16qi ((__builtin_neon_qi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2q_u16 (uint16_t * __a, uint16x8x2_t __b) - { - union { uint16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst2v8hi ((__builtin_neon_hi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2q_u32 (uint32_t * __a, uint32x4x2_t __b) - { - union { uint32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst2v4si ((__builtin_neon_si *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2q_p8 (poly8_t * __a, poly8x16x2_t __b) - { - union { poly8x16x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst2v16qi ((__builtin_neon_qi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2q_p16 (poly16_t * __a, poly16x8x2_t __b) - { - union { poly16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst2v8hi ((__builtin_neon_hi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_lane_s8 (int8_t * __a, int8x8x2_t __b, const int __c) - { - union { int8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_lane_s16 (int16_t * __a, int16x4x2_t __b, const int __c) - { - union { int16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_lane_s32 (int32_t * __a, int32x2x2_t __b, const int __c) - { - union { int32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; -@@ -10368,7 +11905,8 @@ vst2_lane_s32 (int32_t * __a, int32x2x2_t __b, const int __c) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_lane_f16 (float16_t * __a, float16x4x2_t __b, const int __c) - { - union { float16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; -@@ -10376,56 +11914,64 @@ vst2_lane_f16 (float16_t * __a, float16x4x2_t __b, const int __c) - } - #endif - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_lane_f32 (float32_t * __a, float32x2x2_t __b, const int __c) - { - union { float32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_lane_u8 (uint8_t * __a, uint8x8x2_t __b, const int __c) - { - union { uint8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_lane_u16 (uint16_t * __a, uint16x4x2_t __b, const int __c) - { - union { uint16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_lane_u32 (uint32_t * __a, uint32x2x2_t __b, const int __c) - { - union { uint32x2x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_lane_p8 (poly8_t * __a, poly8x8x2_t __b, const int __c) - { - union { poly8x8x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2_lane_p16 (poly16_t * __a, poly16x4x2_t __b, const int __c) - { - union { poly16x4x2_t __i; __builtin_neon_ti __o; } __bu = { __b }; - __builtin_neon_vst2_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2q_lane_s16 (int16_t * __a, int16x8x2_t __b, const int __c) - { - union { int16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst2_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2q_lane_s32 (int32_t * __a, int32x4x2_t __b, const int __c) - { - union { int32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -10433,7 +11979,8 @@ vst2q_lane_s32 (int32_t * __a, int32x4x2_t __b, const int __c) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2q_lane_f16 (float16_t * __a, float16x8x2_t __b, const int __c) - { - union { float16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -10441,35 +11988,40 @@ vst2q_lane_f16 (float16_t * __a, float16x8x2_t __b, const int __c) - } - #endif - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2q_lane_f32 (float32_t * __a, float32x4x2_t __b, const int __c) - { - union { float32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst2_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2q_lane_u16 (uint16_t * __a, uint16x8x2_t __b, const int __c) - { - union { uint16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst2_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2q_lane_u32 (uint32_t * __a, uint32x4x2_t __b, const int __c) - { - union { uint32x4x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst2_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst2q_lane_p16 (poly16_t * __a, poly16x8x2_t __b, const int __c) - { - union { poly16x8x2_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst2_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); - } - --__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_s8 (const int8_t * __a) - { - union { int8x8x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10477,7 +12029,8 @@ vld3_s8 (const int8_t * __a) - return __rv.__i; - } - --__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_s16 (const int16_t * __a) - { - union { int16x4x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10485,7 +12038,8 @@ vld3_s16 (const int16_t * __a) - return __rv.__i; - } - --__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_s32 (const int32_t * __a) - { - union { int32x2x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10494,7 +12048,8 @@ vld3_s32 (const int32_t * __a) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_f16 (const float16_t * __a) - { - union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10503,7 +12058,8 @@ vld3_f16 (const float16_t * __a) - } - #endif - --__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_f32 (const float32_t * __a) - { - union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10511,7 +12067,8 @@ vld3_f32 (const float32_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_u8 (const uint8_t * __a) - { - union { uint8x8x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10519,7 +12076,8 @@ vld3_u8 (const uint8_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_u16 (const uint16_t * __a) - { - union { uint16x4x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10527,7 +12085,8 @@ vld3_u16 (const uint16_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_u32 (const uint32_t * __a) - { - union { uint32x2x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10535,7 +12094,8 @@ vld3_u32 (const uint32_t * __a) - return __rv.__i; - } - --__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_p8 (const poly8_t * __a) - { - union { poly8x8x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10543,7 +12103,8 @@ vld3_p8 (const poly8_t * __a) - return __rv.__i; - } - --__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_p16 (const poly16_t * __a) - { - union { poly16x4x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10553,7 +12114,8 @@ vld3_p16 (const poly16_t * __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x1x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_p64 (const poly64_t * __a) - { - union { poly64x1x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10562,7 +12124,8 @@ vld3_p64 (const poly64_t * __a) - } - - #pragma GCC pop_options --__extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_s64 (const int64_t * __a) - { - union { int64x1x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10570,7 +12133,8 @@ vld3_s64 (const int64_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_u64 (const uint64_t * __a) - { - union { uint64x1x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10578,7 +12142,8 @@ vld3_u64 (const uint64_t * __a) - return __rv.__i; - } - --__extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3q_s8 (const int8_t * __a) - { - union { int8x16x3_t __i; __builtin_neon_ci __o; } __rv; -@@ -10586,7 +12151,8 @@ vld3q_s8 (const int8_t * __a) - return __rv.__i; - } - --__extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3q_s16 (const int16_t * __a) - { - union { int16x8x3_t __i; __builtin_neon_ci __o; } __rv; -@@ -10594,7 +12160,8 @@ vld3q_s16 (const int16_t * __a) - return __rv.__i; - } - --__extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3q_s32 (const int32_t * __a) - { - union { int32x4x3_t __i; __builtin_neon_ci __o; } __rv; -@@ -10603,7 +12170,8 @@ vld3q_s32 (const int32_t * __a) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3q_f16 (const float16_t * __a) - { - union { float16x8x3_t __i; __builtin_neon_ci __o; } __rv; -@@ -10612,7 +12180,8 @@ vld3q_f16 (const float16_t * __a) - } - #endif - --__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3q_f32 (const float32_t * __a) - { - union { float32x4x3_t __i; __builtin_neon_ci __o; } __rv; -@@ -10620,7 +12189,8 @@ vld3q_f32 (const float32_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3q_u8 (const uint8_t * __a) - { - union { uint8x16x3_t __i; __builtin_neon_ci __o; } __rv; -@@ -10628,7 +12198,8 @@ vld3q_u8 (const uint8_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3q_u16 (const uint16_t * __a) - { - union { uint16x8x3_t __i; __builtin_neon_ci __o; } __rv; -@@ -10636,7 +12207,8 @@ vld3q_u16 (const uint16_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3q_u32 (const uint32_t * __a) - { - union { uint32x4x3_t __i; __builtin_neon_ci __o; } __rv; -@@ -10644,7 +12216,8 @@ vld3q_u32 (const uint32_t * __a) - return __rv.__i; - } - --__extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3q_p8 (const poly8_t * __a) - { - union { poly8x16x3_t __i; __builtin_neon_ci __o; } __rv; -@@ -10652,7 +12225,8 @@ vld3q_p8 (const poly8_t * __a) - return __rv.__i; - } - --__extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3q_p16 (const poly16_t * __a) - { - union { poly16x8x3_t __i; __builtin_neon_ci __o; } __rv; -@@ -10660,7 +12234,8 @@ vld3q_p16 (const poly16_t * __a) - return __rv.__i; - } - --__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_lane_s8 (const int8_t * __a, int8x8x3_t __b, const int __c) - { - union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; -@@ -10669,7 +12244,8 @@ vld3_lane_s8 (const int8_t * __a, int8x8x3_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_lane_s16 (const int16_t * __a, int16x4x3_t __b, const int __c) - { - union { int16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; -@@ -10678,7 +12254,8 @@ vld3_lane_s16 (const int16_t * __a, int16x4x3_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_lane_s32 (const int32_t * __a, int32x2x3_t __b, const int __c) - { - union { int32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; -@@ -10688,7 +12265,8 @@ vld3_lane_s32 (const int32_t * __a, int32x2x3_t __b, const int __c) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_lane_f16 (const float16_t * __a, float16x4x3_t __b, const int __c) - { - union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; -@@ -10698,7 +12276,8 @@ vld3_lane_f16 (const float16_t * __a, float16x4x3_t __b, const int __c) - } - #endif - --__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_lane_f32 (const float32_t * __a, float32x2x3_t __b, const int __c) - { - union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; -@@ -10707,7 +12286,8 @@ vld3_lane_f32 (const float32_t * __a, float32x2x3_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_lane_u8 (const uint8_t * __a, uint8x8x3_t __b, const int __c) - { - union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; -@@ -10716,7 +12296,8 @@ vld3_lane_u8 (const uint8_t * __a, uint8x8x3_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_lane_u16 (const uint16_t * __a, uint16x4x3_t __b, const int __c) - { - union { uint16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; -@@ -10725,7 +12306,8 @@ vld3_lane_u16 (const uint16_t * __a, uint16x4x3_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_lane_u32 (const uint32_t * __a, uint32x2x3_t __b, const int __c) - { - union { uint32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; -@@ -10734,7 +12316,8 @@ vld3_lane_u32 (const uint32_t * __a, uint32x2x3_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_lane_p8 (const poly8_t * __a, poly8x8x3_t __b, const int __c) - { - union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; -@@ -10743,7 +12326,8 @@ vld3_lane_p8 (const poly8_t * __a, poly8x8x3_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_lane_p16 (const poly16_t * __a, poly16x4x3_t __b, const int __c) - { - union { poly16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; -@@ -10752,7 +12336,8 @@ vld3_lane_p16 (const poly16_t * __a, poly16x4x3_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3q_lane_s16 (const int16_t * __a, int16x8x3_t __b, const int __c) - { - union { int16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; -@@ -10761,7 +12346,8 @@ vld3q_lane_s16 (const int16_t * __a, int16x8x3_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3q_lane_s32 (const int32_t * __a, int32x4x3_t __b, const int __c) - { - union { int32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; -@@ -10771,7 +12357,8 @@ vld3q_lane_s32 (const int32_t * __a, int32x4x3_t __b, const int __c) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3q_lane_f16 (const float16_t * __a, float16x8x3_t __b, const int __c) - { - union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; -@@ -10781,7 +12368,8 @@ vld3q_lane_f16 (const float16_t * __a, float16x8x3_t __b, const int __c) - } - #endif - --__extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3q_lane_f32 (const float32_t * __a, float32x4x3_t __b, const int __c) - { - union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; -@@ -10790,7 +12378,8 @@ vld3q_lane_f32 (const float32_t * __a, float32x4x3_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3q_lane_u16 (const uint16_t * __a, uint16x8x3_t __b, const int __c) - { - union { uint16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; -@@ -10799,7 +12388,8 @@ vld3q_lane_u16 (const uint16_t * __a, uint16x8x3_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3q_lane_u32 (const uint32_t * __a, uint32x4x3_t __b, const int __c) - { - union { uint32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; -@@ -10808,7 +12398,8 @@ vld3q_lane_u32 (const uint32_t * __a, uint32x4x3_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3q_lane_p16 (const poly16_t * __a, poly16x8x3_t __b, const int __c) - { - union { poly16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; -@@ -10817,7 +12408,8 @@ vld3q_lane_p16 (const poly16_t * __a, poly16x8x3_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline int8x8x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_dup_s8 (const int8_t * __a) - { - union { int8x8x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10825,7 +12417,8 @@ vld3_dup_s8 (const int8_t * __a) - return __rv.__i; - } - --__extension__ static __inline int16x4x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_dup_s16 (const int16_t * __a) - { - union { int16x4x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10833,7 +12426,8 @@ vld3_dup_s16 (const int16_t * __a) - return __rv.__i; - } - --__extension__ static __inline int32x2x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_dup_s32 (const int32_t * __a) - { - union { int32x2x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10842,7 +12436,8 @@ vld3_dup_s32 (const int32_t * __a) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_dup_f16 (const float16_t * __a) - { - union { float16x4x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10851,7 +12446,8 @@ vld3_dup_f16 (const float16_t * __a) - } - #endif - --__extension__ static __inline float32x2x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_dup_f32 (const float32_t * __a) - { - union { float32x2x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10859,7 +12455,8 @@ vld3_dup_f32 (const float32_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint8x8x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_dup_u8 (const uint8_t * __a) - { - union { uint8x8x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10867,7 +12464,8 @@ vld3_dup_u8 (const uint8_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint16x4x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_dup_u16 (const uint16_t * __a) - { - union { uint16x4x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10875,7 +12473,8 @@ vld3_dup_u16 (const uint16_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint32x2x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_dup_u32 (const uint32_t * __a) - { - union { uint32x2x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10883,7 +12482,8 @@ vld3_dup_u32 (const uint32_t * __a) - return __rv.__i; - } - --__extension__ static __inline poly8x8x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_dup_p8 (const poly8_t * __a) - { - union { poly8x8x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10891,7 +12491,8 @@ vld3_dup_p8 (const poly8_t * __a) - return __rv.__i; - } - --__extension__ static __inline poly16x4x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_dup_p16 (const poly16_t * __a) - { - union { poly16x4x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10901,7 +12502,8 @@ vld3_dup_p16 (const poly16_t * __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x1x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_dup_p64 (const poly64_t * __a) - { - union { poly64x1x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10910,7 +12512,8 @@ vld3_dup_p64 (const poly64_t * __a) - } - - #pragma GCC pop_options --__extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_dup_s64 (const int64_t * __a) - { - union { int64x1x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10918,7 +12521,8 @@ vld3_dup_s64 (const int64_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint64x1x3_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1x3_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld3_dup_u64 (const uint64_t * __a) - { - union { uint64x1x3_t __i; __builtin_neon_ei __o; } __rv; -@@ -10926,21 +12530,24 @@ vld3_dup_u64 (const uint64_t * __a) - return __rv.__i; - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_s8 (int8_t * __a, int8x8x3_t __b) - { - union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3v8qi ((__builtin_neon_qi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_s16 (int16_t * __a, int16x4x3_t __b) - { - union { int16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3v4hi ((__builtin_neon_hi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_s32 (int32_t * __a, int32x2x3_t __b) - { - union { int32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; -@@ -10948,7 +12555,8 @@ vst3_s32 (int32_t * __a, int32x2x3_t __b) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_f16 (float16_t * __a, float16x4x3_t __b) - { - union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; -@@ -10956,42 +12564,48 @@ vst3_f16 (float16_t * __a, float16x4x3_t __b) - } - #endif - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_f32 (float32_t * __a, float32x2x3_t __b) - { - union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3v2sf ((__builtin_neon_sf *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_u8 (uint8_t * __a, uint8x8x3_t __b) - { - union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3v8qi ((__builtin_neon_qi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_u16 (uint16_t * __a, uint16x4x3_t __b) - { - union { uint16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3v4hi ((__builtin_neon_hi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_u32 (uint32_t * __a, uint32x2x3_t __b) - { - union { uint32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3v2si ((__builtin_neon_si *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_p8 (poly8_t * __a, poly8x8x3_t __b) - { - union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3v8qi ((__builtin_neon_qi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_p16 (poly16_t * __a, poly16x4x3_t __b) - { - union { poly16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; -@@ -11000,7 +12614,8 @@ vst3_p16 (poly16_t * __a, poly16x4x3_t __b) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_p64 (poly64_t * __a, poly64x1x3_t __b) - { - union { poly64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; -@@ -11008,35 +12623,40 @@ vst3_p64 (poly64_t * __a, poly64x1x3_t __b) - } - - #pragma GCC pop_options --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_s64 (int64_t * __a, int64x1x3_t __b) - { - union { int64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3di ((__builtin_neon_di *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_u64 (uint64_t * __a, uint64x1x3_t __b) - { - union { uint64x1x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3di ((__builtin_neon_di *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3q_s8 (int8_t * __a, int8x16x3_t __b) - { - union { int8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst3v16qi ((__builtin_neon_qi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3q_s16 (int16_t * __a, int16x8x3_t __b) - { - union { int16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst3v8hi ((__builtin_neon_hi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3q_s32 (int32_t * __a, int32x4x3_t __b) - { - union { int32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; -@@ -11044,7 +12664,8 @@ vst3q_s32 (int32_t * __a, int32x4x3_t __b) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3q_f16 (float16_t * __a, float16x8x3_t __b) - { - union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; -@@ -11052,63 +12673,72 @@ vst3q_f16 (float16_t * __a, float16x8x3_t __b) - } - #endif - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3q_f32 (float32_t * __a, float32x4x3_t __b) - { - union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst3v4sf ((__builtin_neon_sf *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3q_u8 (uint8_t * __a, uint8x16x3_t __b) - { - union { uint8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst3v16qi ((__builtin_neon_qi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3q_u16 (uint16_t * __a, uint16x8x3_t __b) - { - union { uint16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst3v8hi ((__builtin_neon_hi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3q_u32 (uint32_t * __a, uint32x4x3_t __b) - { - union { uint32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst3v4si ((__builtin_neon_si *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3q_p8 (poly8_t * __a, poly8x16x3_t __b) - { - union { poly8x16x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst3v16qi ((__builtin_neon_qi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3q_p16 (poly16_t * __a, poly16x8x3_t __b) - { - union { poly16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst3v8hi ((__builtin_neon_hi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_lane_s8 (int8_t * __a, int8x8x3_t __b, const int __c) - { - union { int8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_lane_s16 (int16_t * __a, int16x4x3_t __b, const int __c) - { - union { int16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_lane_s32 (int32_t * __a, int32x2x3_t __b, const int __c) - { - union { int32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; -@@ -11116,7 +12746,8 @@ vst3_lane_s32 (int32_t * __a, int32x2x3_t __b, const int __c) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_lane_f16 (float16_t * __a, float16x4x3_t __b, const int __c) - { - union { float16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; -@@ -11124,56 +12755,64 @@ vst3_lane_f16 (float16_t * __a, float16x4x3_t __b, const int __c) - } - #endif - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_lane_f32 (float32_t * __a, float32x2x3_t __b, const int __c) - { - union { float32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_lane_u8 (uint8_t * __a, uint8x8x3_t __b, const int __c) - { - union { uint8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_lane_u16 (uint16_t * __a, uint16x4x3_t __b, const int __c) - { - union { uint16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_lane_u32 (uint32_t * __a, uint32x2x3_t __b, const int __c) - { - union { uint32x2x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_lane_p8 (poly8_t * __a, poly8x8x3_t __b, const int __c) - { - union { poly8x8x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3_lane_p16 (poly16_t * __a, poly16x4x3_t __b, const int __c) - { - union { poly16x4x3_t __i; __builtin_neon_ei __o; } __bu = { __b }; - __builtin_neon_vst3_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3q_lane_s16 (int16_t * __a, int16x8x3_t __b, const int __c) - { - union { int16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst3_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3q_lane_s32 (int32_t * __a, int32x4x3_t __b, const int __c) - { - union { int32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; -@@ -11181,7 +12820,8 @@ vst3q_lane_s32 (int32_t * __a, int32x4x3_t __b, const int __c) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3q_lane_f16 (float16_t * __a, float16x8x3_t __b, const int __c) - { - union { float16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; -@@ -11189,35 +12829,40 @@ vst3q_lane_f16 (float16_t * __a, float16x8x3_t __b, const int __c) - } - #endif - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3q_lane_f32 (float32_t * __a, float32x4x3_t __b, const int __c) - { - union { float32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst3_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3q_lane_u16 (uint16_t * __a, uint16x8x3_t __b, const int __c) - { - union { uint16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst3_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3q_lane_u32 (uint32_t * __a, uint32x4x3_t __b, const int __c) - { - union { uint32x4x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst3_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst3q_lane_p16 (poly16_t * __a, poly16x8x3_t __b, const int __c) - { - union { poly16x8x3_t __i; __builtin_neon_ci __o; } __bu = { __b }; - __builtin_neon_vst3_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); - } - --__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_s8 (const int8_t * __a) - { - union { int8x8x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11225,7 +12870,8 @@ vld4_s8 (const int8_t * __a) - return __rv.__i; - } - --__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_s16 (const int16_t * __a) - { - union { int16x4x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11233,7 +12879,8 @@ vld4_s16 (const int16_t * __a) - return __rv.__i; - } - --__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_s32 (const int32_t * __a) - { - union { int32x2x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11242,7 +12889,8 @@ vld4_s32 (const int32_t * __a) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_f16 (const float16_t * __a) - { - union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11251,7 +12899,8 @@ vld4_f16 (const float16_t * __a) - } - #endif - --__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_f32 (const float32_t * __a) - { - union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11259,7 +12908,8 @@ vld4_f32 (const float32_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_u8 (const uint8_t * __a) - { - union { uint8x8x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11267,7 +12917,8 @@ vld4_u8 (const uint8_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_u16 (const uint16_t * __a) - { - union { uint16x4x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11275,7 +12926,8 @@ vld4_u16 (const uint16_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_u32 (const uint32_t * __a) - { - union { uint32x2x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11283,7 +12935,8 @@ vld4_u32 (const uint32_t * __a) - return __rv.__i; - } - --__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_p8 (const poly8_t * __a) - { - union { poly8x8x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11291,7 +12944,8 @@ vld4_p8 (const poly8_t * __a) - return __rv.__i; - } - --__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_p16 (const poly16_t * __a) - { - union { poly16x4x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11301,7 +12955,8 @@ vld4_p16 (const poly16_t * __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x1x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_p64 (const poly64_t * __a) - { - union { poly64x1x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11310,7 +12965,8 @@ vld4_p64 (const poly64_t * __a) - } - - #pragma GCC pop_options --__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_s64 (const int64_t * __a) - { - union { int64x1x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11318,7 +12974,8 @@ vld4_s64 (const int64_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_u64 (const uint64_t * __a) - { - union { uint64x1x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11326,7 +12983,8 @@ vld4_u64 (const uint64_t * __a) - return __rv.__i; - } - --__extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4q_s8 (const int8_t * __a) - { - union { int8x16x4_t __i; __builtin_neon_xi __o; } __rv; -@@ -11334,7 +12992,8 @@ vld4q_s8 (const int8_t * __a) - return __rv.__i; - } - --__extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4q_s16 (const int16_t * __a) - { - union { int16x8x4_t __i; __builtin_neon_xi __o; } __rv; -@@ -11342,7 +13001,8 @@ vld4q_s16 (const int16_t * __a) - return __rv.__i; - } - --__extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4q_s32 (const int32_t * __a) - { - union { int32x4x4_t __i; __builtin_neon_xi __o; } __rv; -@@ -11351,7 +13011,8 @@ vld4q_s32 (const int32_t * __a) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4q_f16 (const float16_t * __a) - { - union { float16x8x4_t __i; __builtin_neon_xi __o; } __rv; -@@ -11360,7 +13021,8 @@ vld4q_f16 (const float16_t * __a) - } - #endif - --__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4q_f32 (const float32_t * __a) - { - union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv; -@@ -11368,7 +13030,8 @@ vld4q_f32 (const float32_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4q_u8 (const uint8_t * __a) - { - union { uint8x16x4_t __i; __builtin_neon_xi __o; } __rv; -@@ -11376,7 +13039,8 @@ vld4q_u8 (const uint8_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4q_u16 (const uint16_t * __a) - { - union { uint16x8x4_t __i; __builtin_neon_xi __o; } __rv; -@@ -11384,7 +13048,8 @@ vld4q_u16 (const uint16_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4q_u32 (const uint32_t * __a) - { - union { uint32x4x4_t __i; __builtin_neon_xi __o; } __rv; -@@ -11392,7 +13057,8 @@ vld4q_u32 (const uint32_t * __a) - return __rv.__i; - } - --__extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4q_p8 (const poly8_t * __a) - { - union { poly8x16x4_t __i; __builtin_neon_xi __o; } __rv; -@@ -11400,7 +13066,8 @@ vld4q_p8 (const poly8_t * __a) - return __rv.__i; - } - --__extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4q_p16 (const poly16_t * __a) - { - union { poly16x8x4_t __i; __builtin_neon_xi __o; } __rv; -@@ -11408,7 +13075,8 @@ vld4q_p16 (const poly16_t * __a) - return __rv.__i; - } - --__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_lane_s8 (const int8_t * __a, int8x8x4_t __b, const int __c) - { - union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -11417,7 +13085,8 @@ vld4_lane_s8 (const int8_t * __a, int8x8x4_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_lane_s16 (const int16_t * __a, int16x4x4_t __b, const int __c) - { - union { int16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -11426,7 +13095,8 @@ vld4_lane_s16 (const int16_t * __a, int16x4x4_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_lane_s32 (const int32_t * __a, int32x2x4_t __b, const int __c) - { - union { int32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -11436,7 +13106,8 @@ vld4_lane_s32 (const int32_t * __a, int32x2x4_t __b, const int __c) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_lane_f16 (const float16_t * __a, float16x4x4_t __b, const int __c) - { - union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -11447,7 +13118,8 @@ vld4_lane_f16 (const float16_t * __a, float16x4x4_t __b, const int __c) - } - #endif - --__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_lane_f32 (const float32_t * __a, float32x2x4_t __b, const int __c) - { - union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -11456,7 +13128,8 @@ vld4_lane_f32 (const float32_t * __a, float32x2x4_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_lane_u8 (const uint8_t * __a, uint8x8x4_t __b, const int __c) - { - union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -11465,7 +13138,8 @@ vld4_lane_u8 (const uint8_t * __a, uint8x8x4_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_lane_u16 (const uint16_t * __a, uint16x4x4_t __b, const int __c) - { - union { uint16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -11474,7 +13148,8 @@ vld4_lane_u16 (const uint16_t * __a, uint16x4x4_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_lane_u32 (const uint32_t * __a, uint32x2x4_t __b, const int __c) - { - union { uint32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -11483,7 +13158,8 @@ vld4_lane_u32 (const uint32_t * __a, uint32x2x4_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_lane_p8 (const poly8_t * __a, poly8x8x4_t __b, const int __c) - { - union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -11492,7 +13168,8 @@ vld4_lane_p8 (const poly8_t * __a, poly8x8x4_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_lane_p16 (const poly16_t * __a, poly16x4x4_t __b, const int __c) - { - union { poly16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -11501,7 +13178,8 @@ vld4_lane_p16 (const poly16_t * __a, poly16x4x4_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4q_lane_s16 (const int16_t * __a, int16x8x4_t __b, const int __c) - { - union { int16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; -@@ -11510,7 +13188,8 @@ vld4q_lane_s16 (const int16_t * __a, int16x8x4_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4q_lane_s32 (const int32_t * __a, int32x4x4_t __b, const int __c) - { - union { int32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; -@@ -11520,7 +13199,8 @@ vld4q_lane_s32 (const int32_t * __a, int32x4x4_t __b, const int __c) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4q_lane_f16 (const float16_t * __a, float16x8x4_t __b, const int __c) - { - union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; -@@ -11531,7 +13211,8 @@ vld4q_lane_f16 (const float16_t * __a, float16x8x4_t __b, const int __c) - } - #endif - --__extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4q_lane_f32 (const float32_t * __a, float32x4x4_t __b, const int __c) - { - union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; -@@ -11540,7 +13221,8 @@ vld4q_lane_f32 (const float32_t * __a, float32x4x4_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4q_lane_u16 (const uint16_t * __a, uint16x8x4_t __b, const int __c) - { - union { uint16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; -@@ -11549,7 +13231,8 @@ vld4q_lane_u16 (const uint16_t * __a, uint16x8x4_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4q_lane_u32 (const uint32_t * __a, uint32x4x4_t __b, const int __c) - { - union { uint32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; -@@ -11558,7 +13241,8 @@ vld4q_lane_u32 (const uint32_t * __a, uint32x4x4_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4q_lane_p16 (const poly16_t * __a, poly16x8x4_t __b, const int __c) - { - union { poly16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; -@@ -11567,7 +13251,8 @@ vld4q_lane_p16 (const poly16_t * __a, poly16x8x4_t __b, const int __c) - return __rv.__i; - } - --__extension__ static __inline int8x8x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_dup_s8 (const int8_t * __a) - { - union { int8x8x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11575,7 +13260,8 @@ vld4_dup_s8 (const int8_t * __a) - return __rv.__i; - } - --__extension__ static __inline int16x4x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_dup_s16 (const int16_t * __a) - { - union { int16x4x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11583,7 +13269,8 @@ vld4_dup_s16 (const int16_t * __a) - return __rv.__i; - } - --__extension__ static __inline int32x2x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_dup_s32 (const int32_t * __a) - { - union { int32x2x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11592,7 +13279,8 @@ vld4_dup_s32 (const int32_t * __a) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_dup_f16 (const float16_t * __a) - { - union { float16x4x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11601,7 +13289,8 @@ vld4_dup_f16 (const float16_t * __a) - } - #endif - --__extension__ static __inline float32x2x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_dup_f32 (const float32_t * __a) - { - union { float32x2x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11609,7 +13298,8 @@ vld4_dup_f32 (const float32_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint8x8x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_dup_u8 (const uint8_t * __a) - { - union { uint8x8x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11617,7 +13307,8 @@ vld4_dup_u8 (const uint8_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint16x4x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_dup_u16 (const uint16_t * __a) - { - union { uint16x4x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11625,7 +13316,8 @@ vld4_dup_u16 (const uint16_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint32x2x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_dup_u32 (const uint32_t * __a) - { - union { uint32x2x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11633,7 +13325,8 @@ vld4_dup_u32 (const uint32_t * __a) - return __rv.__i; - } - --__extension__ static __inline poly8x8x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_dup_p8 (const poly8_t * __a) - { - union { poly8x8x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11641,7 +13334,8 @@ vld4_dup_p8 (const poly8_t * __a) - return __rv.__i; - } - --__extension__ static __inline poly16x4x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_dup_p16 (const poly16_t * __a) - { - union { poly16x4x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11651,7 +13345,8 @@ vld4_dup_p16 (const poly16_t * __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x1x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_dup_p64 (const poly64_t * __a) - { - union { poly64x1x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11660,7 +13355,8 @@ vld4_dup_p64 (const poly64_t * __a) - } - - #pragma GCC pop_options --__extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_dup_s64 (const int64_t * __a) - { - union { int64x1x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11668,7 +13364,8 @@ vld4_dup_s64 (const int64_t * __a) - return __rv.__i; - } - --__extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vld4_dup_u64 (const uint64_t * __a) - { - union { uint64x1x4_t __i; __builtin_neon_oi __o; } __rv; -@@ -11676,21 +13373,24 @@ vld4_dup_u64 (const uint64_t * __a) - return __rv.__i; - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_s8 (int8_t * __a, int8x8x4_t __b) - { - union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4v8qi ((__builtin_neon_qi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_s16 (int16_t * __a, int16x4x4_t __b) - { - union { int16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4v4hi ((__builtin_neon_hi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_s32 (int32_t * __a, int32x2x4_t __b) - { - union { int32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -11698,7 +13398,8 @@ vst4_s32 (int32_t * __a, int32x2x4_t __b) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_f16 (float16_t * __a, float16x4x4_t __b) - { - union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -11706,42 +13407,48 @@ vst4_f16 (float16_t * __a, float16x4x4_t __b) - } - #endif - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_f32 (float32_t * __a, float32x2x4_t __b) - { - union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4v2sf ((__builtin_neon_sf *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_u8 (uint8_t * __a, uint8x8x4_t __b) - { - union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4v8qi ((__builtin_neon_qi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_u16 (uint16_t * __a, uint16x4x4_t __b) - { - union { uint16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4v4hi ((__builtin_neon_hi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_u32 (uint32_t * __a, uint32x2x4_t __b) - { - union { uint32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4v2si ((__builtin_neon_si *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_p8 (poly8_t * __a, poly8x8x4_t __b) - { - union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4v8qi ((__builtin_neon_qi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_p16 (poly16_t * __a, poly16x4x4_t __b) - { - union { poly16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -11750,7 +13457,8 @@ vst4_p16 (poly16_t * __a, poly16x4x4_t __b) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_p64 (poly64_t * __a, poly64x1x4_t __b) - { - union { poly64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -11758,35 +13466,40 @@ vst4_p64 (poly64_t * __a, poly64x1x4_t __b) - } - - #pragma GCC pop_options --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_s64 (int64_t * __a, int64x1x4_t __b) - { - union { int64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4di ((__builtin_neon_di *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_u64 (uint64_t * __a, uint64x1x4_t __b) - { - union { uint64x1x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4di ((__builtin_neon_di *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4q_s8 (int8_t * __a, int8x16x4_t __b) - { - union { int8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst4v16qi ((__builtin_neon_qi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4q_s16 (int16_t * __a, int16x8x4_t __b) - { - union { int16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst4v8hi ((__builtin_neon_hi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4q_s32 (int32_t * __a, int32x4x4_t __b) - { - union { int32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; -@@ -11794,7 +13507,8 @@ vst4q_s32 (int32_t * __a, int32x4x4_t __b) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4q_f16 (float16_t * __a, float16x8x4_t __b) - { - union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; -@@ -11802,63 +13516,72 @@ vst4q_f16 (float16_t * __a, float16x8x4_t __b) - } - #endif - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4q_f32 (float32_t * __a, float32x4x4_t __b) - { - union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst4v4sf ((__builtin_neon_sf *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4q_u8 (uint8_t * __a, uint8x16x4_t __b) - { - union { uint8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst4v16qi ((__builtin_neon_qi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4q_u16 (uint16_t * __a, uint16x8x4_t __b) - { - union { uint16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst4v8hi ((__builtin_neon_hi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4q_u32 (uint32_t * __a, uint32x4x4_t __b) - { - union { uint32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst4v4si ((__builtin_neon_si *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4q_p8 (poly8_t * __a, poly8x16x4_t __b) - { - union { poly8x16x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst4v16qi ((__builtin_neon_qi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4q_p16 (poly16_t * __a, poly16x8x4_t __b) - { - union { poly16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst4v8hi ((__builtin_neon_hi *) __a, __bu.__o); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_lane_s8 (int8_t * __a, int8x8x4_t __b, const int __c) - { - union { int8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_lane_s16 (int16_t * __a, int16x4x4_t __b, const int __c) - { - union { int16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_lane_s32 (int32_t * __a, int32x2x4_t __b, const int __c) - { - union { int32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -11866,7 +13589,8 @@ vst4_lane_s32 (int32_t * __a, int32x2x4_t __b, const int __c) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_lane_f16 (float16_t * __a, float16x4x4_t __b, const int __c) - { - union { float16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; -@@ -11874,56 +13598,64 @@ vst4_lane_f16 (float16_t * __a, float16x4x4_t __b, const int __c) - } - #endif - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_lane_f32 (float32_t * __a, float32x2x4_t __b, const int __c) - { - union { float32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4_lanev2sf ((__builtin_neon_sf *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_lane_u8 (uint8_t * __a, uint8x8x4_t __b, const int __c) - { - union { uint8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_lane_u16 (uint16_t * __a, uint16x4x4_t __b, const int __c) - { - union { uint16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_lane_u32 (uint32_t * __a, uint32x2x4_t __b, const int __c) - { - union { uint32x2x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4_lanev2si ((__builtin_neon_si *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_lane_p8 (poly8_t * __a, poly8x8x4_t __b, const int __c) - { - union { poly8x8x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4_lanev8qi ((__builtin_neon_qi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4_lane_p16 (poly16_t * __a, poly16x4x4_t __b, const int __c) - { - union { poly16x4x4_t __i; __builtin_neon_oi __o; } __bu = { __b }; - __builtin_neon_vst4_lanev4hi ((__builtin_neon_hi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4q_lane_s16 (int16_t * __a, int16x8x4_t __b, const int __c) - { - union { int16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst4_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4q_lane_s32 (int32_t * __a, int32x4x4_t __b, const int __c) - { - union { int32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; -@@ -11931,7 +13663,8 @@ vst4q_lane_s32 (int32_t * __a, int32x4x4_t __b, const int __c) - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4q_lane_f16 (float16_t * __a, float16x8x4_t __b, const int __c) - { - union { float16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; -@@ -11939,529 +13672,616 @@ vst4q_lane_f16 (float16_t * __a, float16x8x4_t __b, const int __c) - } - #endif - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4q_lane_f32 (float32_t * __a, float32x4x4_t __b, const int __c) - { - union { float32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst4_lanev4sf ((__builtin_neon_sf *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4q_lane_u16 (uint16_t * __a, uint16x8x4_t __b, const int __c) - { - union { uint16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst4_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4q_lane_u32 (uint32_t * __a, uint32x4x4_t __b, const int __c) - { - union { uint32x4x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst4_lanev4si ((__builtin_neon_si *) __a, __bu.__o, __c); - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vst4q_lane_p16 (poly16_t * __a, poly16x8x4_t __b, const int __c) - { - union { poly16x8x4_t __i; __builtin_neon_xi __o; } __bu = { __b }; - __builtin_neon_vst4_lanev8hi ((__builtin_neon_hi *) __a, __bu.__o, __c); - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vand_s8 (int8x8_t __a, int8x8_t __b) - { - return __a & __b; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vand_s16 (int16x4_t __a, int16x4_t __b) - { - return __a & __b; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vand_s32 (int32x2_t __a, int32x2_t __b) - { - return __a & __b; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vand_u8 (uint8x8_t __a, uint8x8_t __b) - { - return __a & __b; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vand_u16 (uint16x4_t __a, uint16x4_t __b) - { - return __a & __b; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vand_u32 (uint32x2_t __a, uint32x2_t __b) - { - return __a & __b; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vand_s64 (int64x1_t __a, int64x1_t __b) - { - return __a & __b; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vand_u64 (uint64x1_t __a, uint64x1_t __b) - { - return __a & __b; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vandq_s8 (int8x16_t __a, int8x16_t __b) - { - return __a & __b; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vandq_s16 (int16x8_t __a, int16x8_t __b) - { - return __a & __b; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vandq_s32 (int32x4_t __a, int32x4_t __b) - { - return __a & __b; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vandq_s64 (int64x2_t __a, int64x2_t __b) - { - return __a & __b; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vandq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return __a & __b; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vandq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return __a & __b; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vandq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return __a & __b; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vandq_u64 (uint64x2_t __a, uint64x2_t __b) - { - return __a & __b; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorr_s8 (int8x8_t __a, int8x8_t __b) - { - return __a | __b; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorr_s16 (int16x4_t __a, int16x4_t __b) - { - return __a | __b; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorr_s32 (int32x2_t __a, int32x2_t __b) - { - return __a | __b; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorr_u8 (uint8x8_t __a, uint8x8_t __b) - { - return __a | __b; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorr_u16 (uint16x4_t __a, uint16x4_t __b) - { - return __a | __b; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorr_u32 (uint32x2_t __a, uint32x2_t __b) - { - return __a | __b; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorr_s64 (int64x1_t __a, int64x1_t __b) - { - return __a | __b; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorr_u64 (uint64x1_t __a, uint64x1_t __b) - { - return __a | __b; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorrq_s8 (int8x16_t __a, int8x16_t __b) - { - return __a | __b; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorrq_s16 (int16x8_t __a, int16x8_t __b) - { - return __a | __b; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorrq_s32 (int32x4_t __a, int32x4_t __b) - { - return __a | __b; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorrq_s64 (int64x2_t __a, int64x2_t __b) - { - return __a | __b; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorrq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return __a | __b; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorrq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return __a | __b; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorrq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return __a | __b; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorrq_u64 (uint64x2_t __a, uint64x2_t __b) - { - return __a | __b; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veor_s8 (int8x8_t __a, int8x8_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veor_s16 (int16x4_t __a, int16x4_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veor_s32 (int32x2_t __a, int32x2_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veor_u8 (uint8x8_t __a, uint8x8_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veor_u16 (uint16x4_t __a, uint16x4_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veor_u32 (uint32x2_t __a, uint32x2_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veor_s64 (int64x1_t __a, int64x1_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veor_u64 (uint64x1_t __a, uint64x1_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veorq_s8 (int8x16_t __a, int8x16_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veorq_s16 (int16x8_t __a, int16x8_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veorq_s32 (int32x4_t __a, int32x4_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veorq_s64 (int64x2_t __a, int64x2_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veorq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veorq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veorq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - veorq_u64 (uint64x2_t __a, uint64x2_t __b) - { - return __a ^ __b; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbic_s8 (int8x8_t __a, int8x8_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbic_s16 (int16x4_t __a, int16x4_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbic_s32 (int32x2_t __a, int32x2_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbic_u8 (uint8x8_t __a, uint8x8_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbic_u16 (uint16x4_t __a, uint16x4_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbic_u32 (uint32x2_t __a, uint32x2_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbic_s64 (int64x1_t __a, int64x1_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbic_u64 (uint64x1_t __a, uint64x1_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbicq_s8 (int8x16_t __a, int8x16_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbicq_s16 (int16x8_t __a, int16x8_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbicq_s32 (int32x4_t __a, int32x4_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbicq_s64 (int64x2_t __a, int64x2_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbicq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbicq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbicq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vbicq_u64 (uint64x2_t __a, uint64x2_t __b) - { - return __a & ~__b; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorn_s8 (int8x8_t __a, int8x8_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorn_s16 (int16x4_t __a, int16x4_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorn_s32 (int32x2_t __a, int32x2_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorn_u8 (uint8x8_t __a, uint8x8_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorn_u16 (uint16x4_t __a, uint16x4_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorn_u32 (uint32x2_t __a, uint32x2_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorn_s64 (int64x1_t __a, int64x1_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vorn_u64 (uint64x1_t __a, uint64x1_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vornq_s8 (int8x16_t __a, int8x16_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vornq_s16 (int16x8_t __a, int16x8_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vornq_s32 (int32x4_t __a, int32x4_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vornq_s64 (int64x2_t __a, int64x2_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vornq_u8 (uint8x16_t __a, uint8x16_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vornq_u16 (uint16x8_t __a, uint16x8_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vornq_u32 (uint32x4_t __a, uint32x4_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vornq_u64 (uint64x2_t __a, uint64x2_t __b) - { - return __a | ~__b; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_p16 (poly16x4_t __a) - { - return (poly8x8_t) __a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_f16 (float16x4_t __a) - { - return (poly8x8_t) __a; - } - #endif - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_f32 (float32x2_t __a) - { - return (poly8x8_t)__a; -@@ -12469,76 +14289,88 @@ vreinterpret_p8_f32 (float32x2_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_p64 (poly64x1_t __a) - { - return (poly8x8_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_s64 (int64x1_t __a) - { - return (poly8x8_t)__a; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_u64 (uint64x1_t __a) - { - return (poly8x8_t)__a; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_s8 (int8x8_t __a) - { - return (poly8x8_t)__a; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_s16 (int16x4_t __a) - { - return (poly8x8_t)__a; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_s32 (int32x2_t __a) - { - return (poly8x8_t)__a; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_u8 (uint8x8_t __a) - { - return (poly8x8_t)__a; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_u16 (uint16x4_t __a) - { - return (poly8x8_t)__a; - } - --__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p8_u32 (uint32x2_t __a) - { - return (poly8x8_t)__a; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_p8 (poly8x8_t __a) - { - return (poly16x4_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_f16 (float16x4_t __a) - { - return (poly16x4_t) __a; - } - #endif - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_f32 (float32x2_t __a) - { - return (poly16x4_t)__a; -@@ -12546,63 +14378,73 @@ vreinterpret_p16_f32 (float32x2_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_p64 (poly64x1_t __a) - { - return (poly16x4_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_s64 (int64x1_t __a) - { - return (poly16x4_t)__a; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_u64 (uint64x1_t __a) - { - return (poly16x4_t)__a; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_s8 (int8x8_t __a) - { - return (poly16x4_t)__a; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_s16 (int16x4_t __a) - { - return (poly16x4_t)__a; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_s32 (int32x2_t __a) - { - return (poly16x4_t)__a; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_u8 (uint8x8_t __a) - { - return (poly16x4_t)__a; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_u16 (uint16x4_t __a) - { - return (poly16x4_t)__a; - } - --__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p16_u32 (uint32x2_t __a) - { - return (poly16x4_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_p8 (poly8x8_t __a) - { - return (float16x4_t) __a; -@@ -12610,7 +14452,8 @@ vreinterpret_f16_p8 (poly8x8_t __a) - #endif - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_p16 (poly16x4_t __a) - { - return (float16x4_t) __a; -@@ -12618,7 +14461,8 @@ vreinterpret_f16_p16 (poly16x4_t __a) - #endif - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_f32 (float32x2_t __a) - { - return (float16x4_t) __a; -@@ -12628,7 +14472,8 @@ vreinterpret_f16_f32 (float32x2_t __a) - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_p64 (poly64x1_t __a) - { - return (float16x4_t) __a; -@@ -12637,7 +14482,8 @@ vreinterpret_f16_p64 (poly64x1_t __a) - #endif - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_s64 (int64x1_t __a) - { - return (float16x4_t) __a; -@@ -12645,7 +14491,8 @@ vreinterpret_f16_s64 (int64x1_t __a) - #endif - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_u64 (uint64x1_t __a) - { - return (float16x4_t) __a; -@@ -12653,7 +14500,8 @@ vreinterpret_f16_u64 (uint64x1_t __a) - #endif - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_s8 (int8x8_t __a) - { - return (float16x4_t) __a; -@@ -12661,7 +14509,8 @@ vreinterpret_f16_s8 (int8x8_t __a) - #endif - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_s16 (int16x4_t __a) - { - return (float16x4_t) __a; -@@ -12669,7 +14518,8 @@ vreinterpret_f16_s16 (int16x4_t __a) - #endif - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_s32 (int32x2_t __a) - { - return (float16x4_t) __a; -@@ -12677,7 +14527,8 @@ vreinterpret_f16_s32 (int32x2_t __a) - #endif - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_u8 (uint8x8_t __a) - { - return (float16x4_t) __a; -@@ -12685,7 +14536,8 @@ vreinterpret_f16_u8 (uint8x8_t __a) - #endif - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_u16 (uint16x4_t __a) - { - return (float16x4_t) __a; -@@ -12693,27 +14545,31 @@ vreinterpret_f16_u16 (uint16x4_t __a) - #endif - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f16_u32 (uint32x2_t __a) - { - return (float16x4_t) __a; - } - #endif - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_p8 (poly8x8_t __a) - { - return (float32x2_t)__a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_p16 (poly16x4_t __a) - { - return (float32x2_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_f16 (float16x4_t __a) - { - return (float32x2_t) __a; -@@ -12722,56 +14578,65 @@ vreinterpret_f32_f16 (float16x4_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_p64 (poly64x1_t __a) - { - return (float32x2_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_s64 (int64x1_t __a) - { - return (float32x2_t)__a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_u64 (uint64x1_t __a) - { - return (float32x2_t)__a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_s8 (int8x8_t __a) - { - return (float32x2_t)__a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_s16 (int16x4_t __a) - { - return (float32x2_t)__a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_s32 (int32x2_t __a) - { - return (float32x2_t)__a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_u8 (uint8x8_t __a) - { - return (float32x2_t)__a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_u16 (uint16x4_t __a) - { - return (float32x2_t)__a; - } - --__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_f32_u32 (uint32x2_t __a) - { - return (float32x2_t)__a; -@@ -12779,102 +14644,118 @@ vreinterpret_f32_u32 (uint32x2_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p64_p8 (poly8x8_t __a) - { - return (poly64x1_t)__a; - } - --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p64_p16 (poly16x4_t __a) - { - return (poly64x1_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p64_f16 (float16x4_t __a) - { - return (poly64x1_t) __a; - } - #endif - --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p64_f32 (float32x2_t __a) - { - return (poly64x1_t)__a; - } - --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p64_s64 (int64x1_t __a) - { - return (poly64x1_t)__a; - } - --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p64_u64 (uint64x1_t __a) - { - return (poly64x1_t)__a; - } - --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p64_s8 (int8x8_t __a) - { - return (poly64x1_t)__a; - } - --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p64_s16 (int16x4_t __a) - { - return (poly64x1_t)__a; - } - --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p64_s32 (int32x2_t __a) - { - return (poly64x1_t)__a; - } - --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p64_u8 (uint8x8_t __a) - { - return (poly64x1_t)__a; - } - --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p64_u16 (uint16x4_t __a) - { - return (poly64x1_t)__a; - } - --__extension__ static __inline poly64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_p64_u32 (uint32x2_t __a) - { - return (poly64x1_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_p8 (poly8x8_t __a) - { - return (int64x1_t)__a; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_p16 (poly16x4_t __a) - { - return (int64x1_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_f16 (float16x4_t __a) - { - return (int64x1_t) __a; - } - #endif - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_f32 (float32x2_t __a) - { - return (int64x1_t)__a; -@@ -12882,76 +14763,88 @@ vreinterpret_s64_f32 (float32x2_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_p64 (poly64x1_t __a) - { - return (int64x1_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_u64 (uint64x1_t __a) - { - return (int64x1_t)__a; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_s8 (int8x8_t __a) - { - return (int64x1_t)__a; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_s16 (int16x4_t __a) - { - return (int64x1_t)__a; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_s32 (int32x2_t __a) - { - return (int64x1_t)__a; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_u8 (uint8x8_t __a) - { - return (int64x1_t)__a; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_u16 (uint16x4_t __a) - { - return (int64x1_t)__a; - } - --__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s64_u32 (uint32x2_t __a) - { - return (int64x1_t)__a; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_p8 (poly8x8_t __a) - { - return (uint64x1_t)__a; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_p16 (poly16x4_t __a) - { - return (uint64x1_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_f16 (float16x4_t __a) - { - return (uint64x1_t) __a; - } - #endif - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_f32 (float32x2_t __a) - { - return (uint64x1_t)__a; -@@ -12959,76 +14852,88 @@ vreinterpret_u64_f32 (float32x2_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_p64 (poly64x1_t __a) - { - return (uint64x1_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_s64 (int64x1_t __a) - { - return (uint64x1_t)__a; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_s8 (int8x8_t __a) - { - return (uint64x1_t)__a; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_s16 (int16x4_t __a) - { - return (uint64x1_t)__a; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_s32 (int32x2_t __a) - { - return (uint64x1_t)__a; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_u8 (uint8x8_t __a) - { - return (uint64x1_t)__a; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_u16 (uint16x4_t __a) - { - return (uint64x1_t)__a; - } - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u64_u32 (uint32x2_t __a) - { - return (uint64x1_t)__a; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_p8 (poly8x8_t __a) - { - return (int8x8_t)__a; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_p16 (poly16x4_t __a) - { - return (int8x8_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_f16 (float16x4_t __a) - { - return (int8x8_t) __a; - } - #endif - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_f32 (float32x2_t __a) - { - return (int8x8_t)__a; -@@ -13036,76 +14941,88 @@ vreinterpret_s8_f32 (float32x2_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_p64 (poly64x1_t __a) - { - return (int8x8_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_s64 (int64x1_t __a) - { - return (int8x8_t)__a; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_u64 (uint64x1_t __a) - { - return (int8x8_t)__a; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_s16 (int16x4_t __a) - { - return (int8x8_t)__a; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_s32 (int32x2_t __a) - { - return (int8x8_t)__a; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_u8 (uint8x8_t __a) - { - return (int8x8_t)__a; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_u16 (uint16x4_t __a) - { - return (int8x8_t)__a; - } - --__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s8_u32 (uint32x2_t __a) - { - return (int8x8_t)__a; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_p8 (poly8x8_t __a) - { - return (int16x4_t)__a; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_p16 (poly16x4_t __a) - { - return (int16x4_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_f16 (float16x4_t __a) - { - return (int16x4_t) __a; - } - #endif - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_f32 (float32x2_t __a) - { - return (int16x4_t)__a; -@@ -13113,76 +15030,88 @@ vreinterpret_s16_f32 (float32x2_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_p64 (poly64x1_t __a) - { - return (int16x4_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_s64 (int64x1_t __a) - { - return (int16x4_t)__a; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_u64 (uint64x1_t __a) - { - return (int16x4_t)__a; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_s8 (int8x8_t __a) - { - return (int16x4_t)__a; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_s32 (int32x2_t __a) - { - return (int16x4_t)__a; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_u8 (uint8x8_t __a) - { - return (int16x4_t)__a; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_u16 (uint16x4_t __a) - { - return (int16x4_t)__a; - } - --__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s16_u32 (uint32x2_t __a) - { - return (int16x4_t)__a; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_p8 (poly8x8_t __a) - { - return (int32x2_t)__a; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_p16 (poly16x4_t __a) - { - return (int32x2_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_f16 (float16x4_t __a) - { - return (int32x2_t) __a; - } - #endif - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_f32 (float32x2_t __a) - { - return (int32x2_t)__a; -@@ -13190,76 +15119,88 @@ vreinterpret_s32_f32 (float32x2_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_p64 (poly64x1_t __a) - { - return (int32x2_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_s64 (int64x1_t __a) - { - return (int32x2_t)__a; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_u64 (uint64x1_t __a) - { - return (int32x2_t)__a; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_s8 (int8x8_t __a) - { - return (int32x2_t)__a; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_s16 (int16x4_t __a) - { - return (int32x2_t)__a; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_u8 (uint8x8_t __a) - { - return (int32x2_t)__a; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_u16 (uint16x4_t __a) - { - return (int32x2_t)__a; - } - --__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_s32_u32 (uint32x2_t __a) - { - return (int32x2_t)__a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_p8 (poly8x8_t __a) - { - return (uint8x8_t)__a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_p16 (poly16x4_t __a) - { - return (uint8x8_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_f16 (float16x4_t __a) - { - return (uint8x8_t) __a; - } - #endif - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_f32 (float32x2_t __a) - { - return (uint8x8_t)__a; -@@ -13267,76 +15208,88 @@ vreinterpret_u8_f32 (float32x2_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_p64 (poly64x1_t __a) - { - return (uint8x8_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_s64 (int64x1_t __a) - { - return (uint8x8_t)__a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_u64 (uint64x1_t __a) - { - return (uint8x8_t)__a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_s8 (int8x8_t __a) - { - return (uint8x8_t)__a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_s16 (int16x4_t __a) - { - return (uint8x8_t)__a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_s32 (int32x2_t __a) - { - return (uint8x8_t)__a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_u16 (uint16x4_t __a) - { - return (uint8x8_t)__a; - } - --__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u8_u32 (uint32x2_t __a) - { - return (uint8x8_t)__a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_p8 (poly8x8_t __a) - { - return (uint16x4_t)__a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_p16 (poly16x4_t __a) - { - return (uint16x4_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_f16 (float16x4_t __a) - { - return (uint16x4_t) __a; - } - #endif - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_f32 (float32x2_t __a) - { - return (uint16x4_t)__a; -@@ -13344,76 +15297,88 @@ vreinterpret_u16_f32 (float32x2_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_p64 (poly64x1_t __a) - { - return (uint16x4_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_s64 (int64x1_t __a) - { - return (uint16x4_t)__a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_u64 (uint64x1_t __a) - { - return (uint16x4_t)__a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_s8 (int8x8_t __a) - { - return (uint16x4_t)__a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_s16 (int16x4_t __a) - { - return (uint16x4_t)__a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_s32 (int32x2_t __a) - { - return (uint16x4_t)__a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_u8 (uint8x8_t __a) - { - return (uint16x4_t)__a; - } - --__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u16_u32 (uint32x2_t __a) - { - return (uint16x4_t)__a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_p8 (poly8x8_t __a) - { - return (uint32x2_t)__a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_p16 (poly16x4_t __a) - { - return (uint32x2_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_f16 (float16x4_t __a) - { - return (uint32x2_t) __a; - } - #endif - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_f32 (float32x2_t __a) - { - return (uint32x2_t)__a; -@@ -13421,70 +15386,81 @@ vreinterpret_u32_f32 (float32x2_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_p64 (poly64x1_t __a) - { - return (uint32x2_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_s64 (int64x1_t __a) - { - return (uint32x2_t)__a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_u64 (uint64x1_t __a) - { - return (uint32x2_t)__a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_s8 (int8x8_t __a) - { - return (uint32x2_t)__a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_s16 (int16x4_t __a) - { - return (uint32x2_t)__a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_s32 (int32x2_t __a) - { - return (uint32x2_t)__a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_u8 (uint8x8_t __a) - { - return (uint32x2_t)__a; - } - --__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpret_u32_u16 (uint16x4_t __a) - { - return (uint32x2_t)__a; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_p16 (poly16x8_t __a) - { - return (poly8x16_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_f16 (float16x8_t __a) - { - return (poly8x16_t) __a; - } - #endif - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_f32 (float32x4_t __a) - { - return (poly8x16_t)__a; -@@ -13492,83 +15468,96 @@ vreinterpretq_p8_f32 (float32x4_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_p64 (poly64x2_t __a) - { - return (poly8x16_t)__a; - } - - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_p128 (poly128_t __a) - { - return (poly8x16_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_s64 (int64x2_t __a) - { - return (poly8x16_t)__a; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_u64 (uint64x2_t __a) - { - return (poly8x16_t)__a; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_s8 (int8x16_t __a) - { - return (poly8x16_t)__a; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_s16 (int16x8_t __a) - { - return (poly8x16_t)__a; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_s32 (int32x4_t __a) - { - return (poly8x16_t)__a; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_u8 (uint8x16_t __a) - { - return (poly8x16_t)__a; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_u16 (uint16x8_t __a) - { - return (poly8x16_t)__a; - } - --__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p8_u32 (uint32x4_t __a) - { - return (poly8x16_t)__a; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_p8 (poly8x16_t __a) - { - return (poly16x8_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_f16 (float16x8_t __a) - { - return (poly16x8_t) __a; - } - #endif - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_f32 (float32x4_t __a) - { - return (poly16x8_t)__a; -@@ -13576,69 +15565,80 @@ vreinterpretq_p16_f32 (float32x4_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_p64 (poly64x2_t __a) - { - return (poly16x8_t)__a; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_p128 (poly128_t __a) - { - return (poly16x8_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_s64 (int64x2_t __a) - { - return (poly16x8_t)__a; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_u64 (uint64x2_t __a) - { - return (poly16x8_t)__a; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_s8 (int8x16_t __a) - { - return (poly16x8_t)__a; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_s16 (int16x8_t __a) - { - return (poly16x8_t)__a; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_s32 (int32x4_t __a) - { - return (poly16x8_t)__a; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_u8 (uint8x16_t __a) - { - return (poly16x8_t)__a; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_u16 (uint16x8_t __a) - { - return (poly16x8_t)__a; - } - --__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p16_u32 (uint32x4_t __a) - { - return (poly16x8_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_p8 (poly8x16_t __a) - { - return (float16x8_t) __a; -@@ -13646,7 +15646,8 @@ vreinterpretq_f16_p8 (poly8x16_t __a) - #endif - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_p16 (poly16x8_t __a) - { - return (float16x8_t) __a; -@@ -13654,7 +15655,8 @@ vreinterpretq_f16_p16 (poly16x8_t __a) - #endif - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_f32 (float32x4_t __a) - { - return (float16x8_t) __a; -@@ -13665,7 +15667,8 @@ vreinterpretq_f16_f32 (float32x4_t __a) - #pragma GCC target ("fpu=crypto-neon-fp-armv8") - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_p64 (poly64x2_t __a) - { - return (float16x8_t) __a; -@@ -13673,7 +15676,8 @@ vreinterpretq_f16_p64 (poly64x2_t __a) - #endif - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_p128 (poly128_t __a) - { - return (float16x8_t) __a; -@@ -13683,7 +15687,8 @@ vreinterpretq_f16_p128 (poly128_t __a) - #pragma GCC pop_options - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_s64 (int64x2_t __a) - { - return (float16x8_t) __a; -@@ -13691,7 +15696,8 @@ vreinterpretq_f16_s64 (int64x2_t __a) - #endif - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_u64 (uint64x2_t __a) - { - return (float16x8_t) __a; -@@ -13699,7 +15705,8 @@ vreinterpretq_f16_u64 (uint64x2_t __a) - #endif - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_s8 (int8x16_t __a) - { - return (float16x8_t) __a; -@@ -13707,7 +15714,8 @@ vreinterpretq_f16_s8 (int8x16_t __a) - #endif - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_s16 (int16x8_t __a) - { - return (float16x8_t) __a; -@@ -13715,7 +15723,8 @@ vreinterpretq_f16_s16 (int16x8_t __a) - #endif - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_s32 (int32x4_t __a) - { - return (float16x8_t) __a; -@@ -13723,7 +15732,8 @@ vreinterpretq_f16_s32 (int32x4_t __a) - #endif - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_u8 (uint8x16_t __a) - { - return (float16x8_t) __a; -@@ -13731,7 +15741,8 @@ vreinterpretq_f16_u8 (uint8x16_t __a) - #endif - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_u16 (uint16x8_t __a) - { - return (float16x8_t) __a; -@@ -13739,27 +15750,31 @@ vreinterpretq_f16_u16 (uint16x8_t __a) - #endif - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f16_u32 (uint32x4_t __a) - { - return (float16x8_t) __a; - } - #endif - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_p8 (poly8x16_t __a) - { - return (float32x4_t)__a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_p16 (poly16x8_t __a) - { - return (float32x4_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_f16 (float16x8_t __a) - { - return (float32x4_t) __a; -@@ -13768,62 +15783,72 @@ vreinterpretq_f32_f16 (float16x8_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_p64 (poly64x2_t __a) - { - return (float32x4_t)__a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_p128 (poly128_t __a) - { - return (float32x4_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_s64 (int64x2_t __a) - { - return (float32x4_t)__a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_u64 (uint64x2_t __a) - { - return (float32x4_t)__a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_s8 (int8x16_t __a) - { - return (float32x4_t)__a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_s16 (int16x8_t __a) - { - return (float32x4_t)__a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_s32 (int32x4_t __a) - { - return (float32x4_t)__a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_u8 (uint8x16_t __a) - { - return (float32x4_t)__a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_u16 (uint16x8_t __a) - { - return (float32x4_t)__a; - } - --__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline float32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_f32_u32 (uint32x4_t __a) - { - return (float32x4_t)__a; -@@ -13831,188 +15856,218 @@ vreinterpretq_f32_u32 (uint32x4_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p64_p8 (poly8x16_t __a) - { - return (poly64x2_t)__a; - } - --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p64_p16 (poly16x8_t __a) - { - return (poly64x2_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p64_f16 (float16x8_t __a) - { - return (poly64x2_t) __a; - } - #endif - --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p64_f32 (float32x4_t __a) - { - return (poly64x2_t)__a; - } - --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p64_p128 (poly128_t __a) - { - return (poly64x2_t)__a; - } - --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p64_s64 (int64x2_t __a) - { - return (poly64x2_t)__a; - } - --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p64_u64 (uint64x2_t __a) - { - return (poly64x2_t)__a; - } - --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p64_s8 (int8x16_t __a) - { - return (poly64x2_t)__a; - } - --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p64_s16 (int16x8_t __a) - { - return (poly64x2_t)__a; - } - --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p64_s32 (int32x4_t __a) - { - return (poly64x2_t)__a; - } - --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p64_u8 (uint8x16_t __a) - { - return (poly64x2_t)__a; - } - --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p64_u16 (uint16x8_t __a) - { - return (poly64x2_t)__a; - } - --__extension__ static __inline poly64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p64_u32 (uint32x4_t __a) - { - return (poly64x2_t)__a; - } - --__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p128_p8 (poly8x16_t __a) - { - return (poly128_t)__a; - } - --__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p128_p16 (poly16x8_t __a) - { - return (poly128_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p128_f16 (float16x8_t __a) - { - return (poly128_t) __a; - } - #endif - --__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p128_f32 (float32x4_t __a) - { - return (poly128_t)__a; - } - --__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p128_p64 (poly64x2_t __a) - { - return (poly128_t)__a; - } - --__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p128_s64 (int64x2_t __a) - { - return (poly128_t)__a; - } - --__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p128_u64 (uint64x2_t __a) - { - return (poly128_t)__a; - } - --__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p128_s8 (int8x16_t __a) - { - return (poly128_t)__a; - } - --__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p128_s16 (int16x8_t __a) - { - return (poly128_t)__a; - } - --__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p128_s32 (int32x4_t __a) - { - return (poly128_t)__a; - } - --__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p128_u8 (uint8x16_t __a) - { - return (poly128_t)__a; - } - --__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p128_u16 (uint16x8_t __a) - { - return (poly128_t)__a; - } - --__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_p128_u32 (uint32x4_t __a) - { - return (poly128_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_p8 (poly8x16_t __a) - { - return (int64x2_t)__a; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_p16 (poly16x8_t __a) - { - return (int64x2_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_f16 (float16x8_t __a) - { - return (int64x2_t) __a; - } - #endif - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_f32 (float32x4_t __a) - { - return (int64x2_t)__a; -@@ -14020,82 +16075,95 @@ vreinterpretq_s64_f32 (float32x4_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_p64 (poly64x2_t __a) - { - return (int64x2_t)__a; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_p128 (poly128_t __a) - { - return (int64x2_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_u64 (uint64x2_t __a) - { - return (int64x2_t)__a; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_s8 (int8x16_t __a) - { - return (int64x2_t)__a; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_s16 (int16x8_t __a) - { - return (int64x2_t)__a; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_s32 (int32x4_t __a) - { - return (int64x2_t)__a; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_u8 (uint8x16_t __a) - { - return (int64x2_t)__a; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_u16 (uint16x8_t __a) - { - return (int64x2_t)__a; - } - --__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s64_u32 (uint32x4_t __a) - { - return (int64x2_t)__a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_p8 (poly8x16_t __a) - { - return (uint64x2_t)__a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_p16 (poly16x8_t __a) - { - return (uint64x2_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_f16 (float16x8_t __a) - { - return (uint64x2_t) __a; - } - #endif - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_f32 (float32x4_t __a) - { - return (uint64x2_t)__a; -@@ -14103,82 +16171,95 @@ vreinterpretq_u64_f32 (float32x4_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_p64 (poly64x2_t __a) - { - return (uint64x2_t)__a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_p128 (poly128_t __a) - { - return (uint64x2_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_s64 (int64x2_t __a) - { - return (uint64x2_t)__a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_s8 (int8x16_t __a) - { - return (uint64x2_t)__a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_s16 (int16x8_t __a) - { - return (uint64x2_t)__a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_s32 (int32x4_t __a) - { - return (uint64x2_t)__a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_u8 (uint8x16_t __a) - { - return (uint64x2_t)__a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_u16 (uint16x8_t __a) - { - return (uint64x2_t)__a; - } - --__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u64_u32 (uint32x4_t __a) - { - return (uint64x2_t)__a; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_p8 (poly8x16_t __a) - { - return (int8x16_t)__a; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_p16 (poly16x8_t __a) - { - return (int8x16_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_f16 (float16x8_t __a) - { - return (int8x16_t) __a; - } - #endif - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_f32 (float32x4_t __a) - { - return (int8x16_t)__a; -@@ -14186,82 +16267,95 @@ vreinterpretq_s8_f32 (float32x4_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_p64 (poly64x2_t __a) - { - return (int8x16_t)__a; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_p128 (poly128_t __a) - { - return (int8x16_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_s64 (int64x2_t __a) - { - return (int8x16_t)__a; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_u64 (uint64x2_t __a) - { - return (int8x16_t)__a; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_s16 (int16x8_t __a) - { - return (int8x16_t)__a; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_s32 (int32x4_t __a) - { - return (int8x16_t)__a; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_u8 (uint8x16_t __a) - { - return (int8x16_t)__a; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_u16 (uint16x8_t __a) - { - return (int8x16_t)__a; - } - --__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s8_u32 (uint32x4_t __a) - { - return (int8x16_t)__a; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_p8 (poly8x16_t __a) - { - return (int16x8_t)__a; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_p16 (poly16x8_t __a) - { - return (int16x8_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_f16 (float16x8_t __a) - { - return (int16x8_t) __a; - } - #endif - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_f32 (float32x4_t __a) - { - return (int16x8_t)__a; -@@ -14269,82 +16363,95 @@ vreinterpretq_s16_f32 (float32x4_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_p64 (poly64x2_t __a) - { - return (int16x8_t)__a; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_p128 (poly128_t __a) - { - return (int16x8_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_s64 (int64x2_t __a) - { - return (int16x8_t)__a; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_u64 (uint64x2_t __a) - { - return (int16x8_t)__a; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_s8 (int8x16_t __a) - { - return (int16x8_t)__a; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_s32 (int32x4_t __a) - { - return (int16x8_t)__a; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_u8 (uint8x16_t __a) - { - return (int16x8_t)__a; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_u16 (uint16x8_t __a) - { - return (int16x8_t)__a; - } - --__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s16_u32 (uint32x4_t __a) - { - return (int16x8_t)__a; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_p8 (poly8x16_t __a) - { - return (int32x4_t)__a; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_p16 (poly16x8_t __a) - { - return (int32x4_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_f16 (float16x8_t __a) - { - return (int32x4_t)__a; - } - #endif - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_f32 (float32x4_t __a) - { - return (int32x4_t)__a; -@@ -14352,82 +16459,95 @@ vreinterpretq_s32_f32 (float32x4_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_p64 (poly64x2_t __a) - { - return (int32x4_t)__a; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_p128 (poly128_t __a) - { - return (int32x4_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_s64 (int64x2_t __a) - { - return (int32x4_t)__a; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_u64 (uint64x2_t __a) - { - return (int32x4_t)__a; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_s8 (int8x16_t __a) - { - return (int32x4_t)__a; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_s16 (int16x8_t __a) - { - return (int32x4_t)__a; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_u8 (uint8x16_t __a) - { - return (int32x4_t)__a; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_u16 (uint16x8_t __a) - { - return (int32x4_t)__a; - } - --__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline int32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_s32_u32 (uint32x4_t __a) - { - return (int32x4_t)__a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_p8 (poly8x16_t __a) - { - return (uint8x16_t)__a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_p16 (poly16x8_t __a) - { - return (uint8x16_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_f16 (float16x8_t __a) - { - return (uint8x16_t) __a; - } - #endif - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_f32 (float32x4_t __a) - { - return (uint8x16_t)__a; -@@ -14435,82 +16555,95 @@ vreinterpretq_u8_f32 (float32x4_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_p64 (poly64x2_t __a) - { - return (uint8x16_t)__a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_p128 (poly128_t __a) - { - return (uint8x16_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_s64 (int64x2_t __a) - { - return (uint8x16_t)__a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_u64 (uint64x2_t __a) - { - return (uint8x16_t)__a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_s8 (int8x16_t __a) - { - return (uint8x16_t)__a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_s16 (int16x8_t __a) - { - return (uint8x16_t)__a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_s32 (int32x4_t __a) - { - return (uint8x16_t)__a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_u16 (uint16x8_t __a) - { - return (uint8x16_t)__a; - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u8_u32 (uint32x4_t __a) - { - return (uint8x16_t)__a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_p8 (poly8x16_t __a) - { - return (uint16x8_t)__a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_p16 (poly16x8_t __a) - { - return (uint16x8_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_f16 (float16x8_t __a) - { - return (uint16x8_t) __a; - } - #endif - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_f32 (float32x4_t __a) - { - return (uint16x8_t)__a; -@@ -14518,82 +16651,95 @@ vreinterpretq_u16_f32 (float32x4_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_p64 (poly64x2_t __a) - { - return (uint16x8_t)__a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_p128 (poly128_t __a) - { - return (uint16x8_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_s64 (int64x2_t __a) - { - return (uint16x8_t)__a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_u64 (uint64x2_t __a) - { - return (uint16x8_t)__a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_s8 (int8x16_t __a) - { - return (uint16x8_t)__a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_s16 (int16x8_t __a) - { - return (uint16x8_t)__a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_s32 (int32x4_t __a) - { - return (uint16x8_t)__a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_u8 (uint8x16_t __a) - { - return (uint16x8_t)__a; - } - --__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u16_u32 (uint32x4_t __a) - { - return (uint16x8_t)__a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_p8 (poly8x16_t __a) - { - return (uint32x4_t)__a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_p16 (poly16x8_t __a) - { - return (uint32x4_t)__a; - } - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_f16 (float16x8_t __a) - { - return (uint32x4_t) __a; - } - #endif - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_f32 (float32x4_t __a) - { - return (uint32x4_t)__a; -@@ -14601,56 +16747,65 @@ vreinterpretq_u32_f32 (float32x4_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_p64 (poly64x2_t __a) - { - return (uint32x4_t)__a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_p128 (poly128_t __a) - { - return (uint32x4_t)__a; - } - - #pragma GCC pop_options --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_s64 (int64x2_t __a) - { - return (uint32x4_t)__a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_u64 (uint64x2_t __a) - { - return (uint32x4_t)__a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_s8 (int8x16_t __a) - { - return (uint32x4_t)__a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_s16 (int16x8_t __a) - { - return (uint32x4_t)__a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_s32 (int32x4_t __a) - { - return (uint32x4_t)__a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_u8 (uint8x16_t __a) - { - return (uint32x4_t)__a; - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vreinterpretq_u32_u16 (uint16x8_t __a) - { - return (uint32x4_t)__a; -@@ -14659,7 +16814,8 @@ vreinterpretq_u32_u16 (uint16x8_t __a) - - #pragma GCC push_options - #pragma GCC target ("fpu=crypto-neon-fp-armv8") --__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vldrq_p128 (poly128_t const * __ptr) - { - #ifdef __ARM_BIG_ENDIAN -@@ -14672,7 +16828,8 @@ vldrq_p128 (poly128_t const * __ptr) - #endif - } - --__extension__ static __inline void __attribute__ ((__always_inline__)) -+__extension__ extern __inline void -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vstrq_p128 (poly128_t * __ptr, poly128_t __val) - { - #ifdef __ARM_BIG_ENDIAN -@@ -14695,7 +16852,8 @@ vstrq_p128 (poly128_t * __ptr, poly128_t __val) - If the result is all zeroes for any half then the whole result is zeroes. - This is what the pairwise min reduction achieves. */ - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vceq_p64 (poly64x1_t __a, poly64x1_t __b) - { - uint32x2_t __t_a = vreinterpret_u32_p64 (__a); -@@ -14710,7 +16868,8 @@ vceq_p64 (poly64x1_t __a, poly64x1_t __b) - a reduction with max since if any two corresponding bits - in the two poly64_t's match, then the whole result must be all ones. */ - --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint64x1_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vtst_p64 (poly64x1_t __a, poly64x1_t __b) - { - uint32x2_t __t_a = vreinterpret_u32_p64 (__a); -@@ -14720,31 +16879,36 @@ vtst_p64 (poly64x1_t __a, poly64x1_t __b) - return vreinterpret_u64_u32 (__m); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaeseq_u8 (uint8x16_t __data, uint8x16_t __key) - { - return __builtin_arm_crypto_aese (__data, __key); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaesdq_u8 (uint8x16_t __data, uint8x16_t __key) - { - return __builtin_arm_crypto_aesd (__data, __key); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaesmcq_u8 (uint8x16_t __data) - { - return __builtin_arm_crypto_aesmc (__data); - } - --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint8x16_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vaesimcq_u8 (uint8x16_t __data) - { - return __builtin_arm_crypto_aesimc (__data); - } - --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsha1h_u32 (uint32_t __hash_e) - { - uint32x4_t __t = vdupq_n_u32 (0); -@@ -14753,7 +16917,8 @@ vsha1h_u32 (uint32_t __hash_e) - return vgetq_lane_u32 (__t, 0); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsha1cq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) - { - uint32x4_t __t = vdupq_n_u32 (0); -@@ -14761,7 +16926,8 @@ vsha1cq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) - return __builtin_arm_crypto_sha1c (__hash_abcd, __t, __wk); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsha1pq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) - { - uint32x4_t __t = vdupq_n_u32 (0); -@@ -14769,7 +16935,8 @@ vsha1pq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) - return __builtin_arm_crypto_sha1p (__hash_abcd, __t, __wk); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsha1mq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) - { - uint32x4_t __t = vdupq_n_u32 (0); -@@ -14777,49 +16944,57 @@ vsha1mq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) - return __builtin_arm_crypto_sha1m (__hash_abcd, __t, __wk); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsha1su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7, uint32x4_t __w8_11) - { - return __builtin_arm_crypto_sha1su0 (__w0_3, __w4_7, __w8_11); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsha1su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w12_15) - { - return __builtin_arm_crypto_sha1su1 (__tw0_3, __w12_15); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsha256hq_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk) - { - return __builtin_arm_crypto_sha256h (__hash_abcd, __hash_efgh, __wk); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsha256h2q_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk) - { - return __builtin_arm_crypto_sha256h2 (__hash_abcd, __hash_efgh, __wk); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsha256su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7) - { - return __builtin_arm_crypto_sha256su0 (__w0_3, __w4_7); - } - --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline uint32x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vsha256su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w8_11, uint32x4_t __w12_15) - { - return __builtin_arm_crypto_sha256su1 (__tw0_3, __w8_11, __w12_15); - } - --__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmull_p64 (poly64_t __a, poly64_t __b) - { - return (poly128_t) __builtin_arm_crypto_vmullp64 ((uint64_t) __a, (uint64_t) __b); - } - --__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) -+__extension__ extern __inline poly128_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) - vmull_high_p64 (poly64x2_t __a, poly64x2_t __b) - { - poly64_t __t1 = vget_high_p64 (__a); -@@ -14830,6 +17005,984 @@ vmull_high_p64 (poly64x2_t __a, poly64x2_t __b) - - #pragma GCC pop_options - -+ /* Intrinsics for FP16 instructions. */ -+#pragma GCC push_options -+#pragma GCC target ("fpu=neon-fp-armv8") -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabd_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return __builtin_neon_vabdv4hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabdq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ return __builtin_neon_vabdv8hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabs_f16 (float16x4_t __a) -+{ -+ return __builtin_neon_vabsv4hf (__a); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vabsq_f16 (float16x8_t __a) -+{ -+ return __builtin_neon_vabsv8hf (__a); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vadd_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return __builtin_neon_vaddv4hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vaddq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ return __builtin_neon_vaddv8hf (__a, __b); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcage_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return (uint16x4_t)__builtin_neon_vcagev4hf (__a, __b); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcageq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ return (uint16x8_t)__builtin_neon_vcagev8hf (__a, __b); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcagt_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return (uint16x4_t)__builtin_neon_vcagtv4hf (__a, __b); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcagtq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ return (uint16x8_t)__builtin_neon_vcagtv8hf (__a, __b); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcale_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return (uint16x4_t)__builtin_neon_vcalev4hf (__a, __b); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcaleq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ return (uint16x8_t)__builtin_neon_vcalev8hf (__a, __b); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcalt_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return (uint16x4_t)__builtin_neon_vcaltv4hf (__a, __b); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcaltq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ return (uint16x8_t)__builtin_neon_vcaltv8hf (__a, __b); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceq_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return (uint16x4_t)__builtin_neon_vceqv4hf (__a, __b); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ return (uint16x8_t)__builtin_neon_vceqv8hf (__a, __b); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqz_f16 (float16x4_t __a) -+{ -+ return (uint16x4_t)__builtin_neon_vceqzv4hf (__a); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vceqzq_f16 (float16x8_t __a) -+{ -+ return (uint16x8_t)__builtin_neon_vceqzv8hf (__a); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcge_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return (uint16x4_t)__builtin_neon_vcgev4hf (__a, __b); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgeq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ return (uint16x8_t)__builtin_neon_vcgev8hf (__a, __b); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgez_f16 (float16x4_t __a) -+{ -+ return (uint16x4_t)__builtin_neon_vcgezv4hf (__a); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgezq_f16 (float16x8_t __a) -+{ -+ return (uint16x8_t)__builtin_neon_vcgezv8hf (__a); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgt_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return (uint16x4_t)__builtin_neon_vcgtv4hf (__a, __b); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ return (uint16x8_t)__builtin_neon_vcgtv8hf (__a, __b); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtz_f16 (float16x4_t __a) -+{ -+ return (uint16x4_t)__builtin_neon_vcgtzv4hf (__a); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcgtzq_f16 (float16x8_t __a) -+{ -+ return (uint16x8_t)__builtin_neon_vcgtzv8hf (__a); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcle_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return (uint16x4_t)__builtin_neon_vclev4hf (__a, __b); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcleq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ return (uint16x8_t)__builtin_neon_vclev8hf (__a, __b); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclez_f16 (float16x4_t __a) -+{ -+ return (uint16x4_t)__builtin_neon_vclezv4hf (__a); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclezq_f16 (float16x8_t __a) -+{ -+ return (uint16x8_t)__builtin_neon_vclezv8hf (__a); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vclt_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return (uint16x4_t)__builtin_neon_vcltv4hf (__a, __b); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ return (uint16x8_t)__builtin_neon_vcltv8hf (__a, __b); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltz_f16 (float16x4_t __a) -+{ -+ return (uint16x4_t)__builtin_neon_vcltzv4hf (__a); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcltzq_f16 (float16x8_t __a) -+{ -+ return (uint16x8_t)__builtin_neon_vcltzv8hf (__a); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_f16_s16 (int16x4_t __a) -+{ -+ return (float16x4_t)__builtin_neon_vcvtsv4hi (__a); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_f16_u16 (uint16x4_t __a) -+{ -+ return (float16x4_t)__builtin_neon_vcvtuv4hi ((int16x4_t)__a); -+} -+ -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_s16_f16 (float16x4_t __a) -+{ -+ return (int16x4_t)__builtin_neon_vcvtsv4hf (__a); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_u16_f16 (float16x4_t __a) -+{ -+ return (uint16x4_t)__builtin_neon_vcvtuv4hf (__a); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_f16_s16 (int16x8_t __a) -+{ -+ return (float16x8_t)__builtin_neon_vcvtsv8hi (__a); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_f16_u16 (uint16x8_t __a) -+{ -+ return (float16x8_t)__builtin_neon_vcvtuv8hi ((int16x8_t)__a); -+} -+ -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_s16_f16 (float16x8_t __a) -+{ -+ return (int16x8_t)__builtin_neon_vcvtsv8hf (__a); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_u16_f16 (float16x8_t __a) -+{ -+ return (uint16x8_t)__builtin_neon_vcvtuv8hf (__a); -+} -+ -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvta_s16_f16 (float16x4_t __a) -+{ -+ return __builtin_neon_vcvtasv4hf (__a); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvta_u16_f16 (float16x4_t __a) -+{ -+ return (uint16x4_t)__builtin_neon_vcvtauv4hf (__a); -+} -+ -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtaq_s16_f16 (float16x8_t __a) -+{ -+ return __builtin_neon_vcvtasv8hf (__a); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtaq_u16_f16 (float16x8_t __a) -+{ -+ return (uint16x8_t)__builtin_neon_vcvtauv8hf (__a); -+} -+ -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtm_s16_f16 (float16x4_t __a) -+{ -+ return __builtin_neon_vcvtmsv4hf (__a); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtm_u16_f16 (float16x4_t __a) -+{ -+ return (uint16x4_t)__builtin_neon_vcvtmuv4hf (__a); -+} -+ -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtmq_s16_f16 (float16x8_t __a) -+{ -+ return __builtin_neon_vcvtmsv8hf (__a); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtmq_u16_f16 (float16x8_t __a) -+{ -+ return (uint16x8_t)__builtin_neon_vcvtmuv8hf (__a); -+} -+ -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtn_s16_f16 (float16x4_t __a) -+{ -+ return __builtin_neon_vcvtnsv4hf (__a); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtn_u16_f16 (float16x4_t __a) -+{ -+ return (uint16x4_t)__builtin_neon_vcvtnuv4hf (__a); -+} -+ -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtnq_s16_f16 (float16x8_t __a) -+{ -+ return __builtin_neon_vcvtnsv8hf (__a); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtnq_u16_f16 (float16x8_t __a) -+{ -+ return (uint16x8_t)__builtin_neon_vcvtnuv8hf (__a); -+} -+ -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtp_s16_f16 (float16x4_t __a) -+{ -+ return __builtin_neon_vcvtpsv4hf (__a); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtp_u16_f16 (float16x4_t __a) -+{ -+ return (uint16x4_t)__builtin_neon_vcvtpuv4hf (__a); -+} -+ -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtpq_s16_f16 (float16x8_t __a) -+{ -+ return __builtin_neon_vcvtpsv8hf (__a); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtpq_u16_f16 (float16x8_t __a) -+{ -+ return (uint16x8_t)__builtin_neon_vcvtpuv8hf (__a); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_n_f16_s16 (int16x4_t __a, const int __b) -+{ -+ return __builtin_neon_vcvts_nv4hi (__a, __b); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_n_f16_u16 (uint16x4_t __a, const int __b) -+{ -+ return __builtin_neon_vcvtu_nv4hi ((int16x4_t)__a, __b); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_n_f16_s16 (int16x8_t __a, const int __b) -+{ -+ return __builtin_neon_vcvts_nv8hi (__a, __b); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_n_f16_u16 (uint16x8_t __a, const int __b) -+{ -+ return __builtin_neon_vcvtu_nv8hi ((int16x8_t)__a, __b); -+} -+ -+__extension__ extern __inline int16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_n_s16_f16 (float16x4_t __a, const int __b) -+{ -+ return __builtin_neon_vcvts_nv4hf (__a, __b); -+} -+ -+__extension__ extern __inline uint16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvt_n_u16_f16 (float16x4_t __a, const int __b) -+{ -+ return (uint16x4_t)__builtin_neon_vcvtu_nv4hf (__a, __b); -+} -+ -+__extension__ extern __inline int16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_n_s16_f16 (float16x8_t __a, const int __b) -+{ -+ return __builtin_neon_vcvts_nv8hf (__a, __b); -+} -+ -+__extension__ extern __inline uint16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vcvtq_n_u16_f16 (float16x8_t __a, const int __b) -+{ -+ return (uint16x8_t)__builtin_neon_vcvtu_nv8hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfma_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c) -+{ -+ return __builtin_neon_vfmav4hf (__a, __b, __c); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmaq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) -+{ -+ return __builtin_neon_vfmav8hf (__a, __b, __c); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfms_f16 (float16x4_t __a, float16x4_t __b, float16x4_t __c) -+{ -+ return __builtin_neon_vfmsv4hf (__a, __b, __c); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vfmsq_f16 (float16x8_t __a, float16x8_t __b, float16x8_t __c) -+{ -+ return __builtin_neon_vfmsv8hf (__a, __b, __c); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmax_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return __builtin_neon_vmaxfv4hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ return __builtin_neon_vmaxfv8hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxnm_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return __builtin_neon_vmaxnmv4hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmaxnmq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ return __builtin_neon_vmaxnmv8hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmin_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return __builtin_neon_vminfv4hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ return __builtin_neon_vminfv8hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminnm_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return __builtin_neon_vminnmv4hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vminnmq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ return __builtin_neon_vminnmv8hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return __builtin_neon_vmulfv4hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_lane_f16 (float16x4_t __a, float16x4_t __b, const int __c) -+{ -+ return __builtin_neon_vmul_lanev4hf (__a, __b, __c); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmul_n_f16 (float16x4_t __a, float16_t __b) -+{ -+ return __builtin_neon_vmul_nv4hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ return __builtin_neon_vmulfv8hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_lane_f16 (float16x8_t __a, float16x4_t __b, const int __c) -+{ -+ return __builtin_neon_vmul_lanev8hf (__a, __b, __c); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmulq_n_f16 (float16x8_t __a, float16_t __b) -+{ -+ return __builtin_neon_vmul_nv8hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vneg_f16 (float16x4_t __a) -+{ -+ return __builtin_neon_vnegv4hf (__a); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vnegq_f16 (float16x8_t __a) -+{ -+ return __builtin_neon_vnegv8hf (__a); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpadd_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return __builtin_neon_vpaddv4hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmax_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return __builtin_neon_vpmaxfv4hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vpmin_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return __builtin_neon_vpminfv4hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecpe_f16 (float16x4_t __a) -+{ -+ return __builtin_neon_vrecpev4hf (__a); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecpeq_f16 (float16x8_t __a) -+{ -+ return __builtin_neon_vrecpev8hf (__a); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnd_f16 (float16x4_t __a) -+{ -+ return __builtin_neon_vrndv4hf (__a); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndq_f16 (float16x8_t __a) -+{ -+ return __builtin_neon_vrndv8hf (__a); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrnda_f16 (float16x4_t __a) -+{ -+ return __builtin_neon_vrndav4hf (__a); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndaq_f16 (float16x8_t __a) -+{ -+ return __builtin_neon_vrndav8hf (__a); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndm_f16 (float16x4_t __a) -+{ -+ return __builtin_neon_vrndmv4hf (__a); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndmq_f16 (float16x8_t __a) -+{ -+ return __builtin_neon_vrndmv8hf (__a); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndn_f16 (float16x4_t __a) -+{ -+ return __builtin_neon_vrndnv4hf (__a); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndnq_f16 (float16x8_t __a) -+{ -+ return __builtin_neon_vrndnv8hf (__a); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndp_f16 (float16x4_t __a) -+{ -+ return __builtin_neon_vrndpv4hf (__a); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndpq_f16 (float16x8_t __a) -+{ -+ return __builtin_neon_vrndpv8hf (__a); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndx_f16 (float16x4_t __a) -+{ -+ return __builtin_neon_vrndxv4hf (__a); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrndxq_f16 (float16x8_t __a) -+{ -+ return __builtin_neon_vrndxv8hf (__a); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsqrte_f16 (float16x4_t __a) -+{ -+ return __builtin_neon_vrsqrtev4hf (__a); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsqrteq_f16 (float16x8_t __a) -+{ -+ return __builtin_neon_vrsqrtev8hf (__a); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecps_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return __builtin_neon_vrecpsv4hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrecpsq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ return __builtin_neon_vrecpsv8hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsqrts_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return __builtin_neon_vrsqrtsv4hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrsqrtsq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ return __builtin_neon_vrsqrtsv8hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsub_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ return __builtin_neon_vsubv4hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vsubq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ return __builtin_neon_vsubv8hf (__a, __b); -+} -+ -+#endif /* __ARM_FEATURE_VECTOR_FP16_ARITHMETIC. */ -+#pragma GCC pop_options -+ -+ /* Half-precision data processing intrinsics. */ -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbsl_f16 (uint16x4_t __a, float16x4_t __b, float16x4_t __c) -+{ -+ return __builtin_neon_vbslv4hf ((int16x4_t)__a, __b, __c); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vbslq_f16 (uint16x8_t __a, float16x8_t __b, float16x8_t __c) -+{ -+ return __builtin_neon_vbslv8hf ((int16x8_t)__a, __b, __c); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_n_f16 (float16_t __a) -+{ -+ return __builtin_neon_vdup_nv4hf (__a); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_n_f16 (float16_t __a) -+{ -+ return __builtin_neon_vdup_nv8hf (__a); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdup_lane_f16 (float16x4_t __a, const int __b) -+{ -+ return __builtin_neon_vdup_lanev4hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vdupq_lane_f16 (float16x4_t __a, const int __b) -+{ -+ return __builtin_neon_vdup_lanev8hf (__a, __b); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vext_f16 (float16x4_t __a, float16x4_t __b, const int __c) -+{ -+ return __builtin_neon_vextv4hf (__a, __b, __c); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vextq_f16 (float16x8_t __a, float16x8_t __b, const int __c) -+{ -+ return __builtin_neon_vextv8hf (__a, __b, __c); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmov_n_f16 (float16_t __a) -+{ -+ return __builtin_neon_vdup_nv4hf (__a); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vmovq_n_f16 (float16_t __a) -+{ -+ return __builtin_neon_vdup_nv8hf (__a); -+} -+ -+__extension__ extern __inline float16x4_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev64_f16 (float16x4_t __a) -+{ -+ return (float16x4_t)__builtin_shuffle (__a, (uint16x4_t){ 3, 2, 1, 0 }); -+} -+ -+__extension__ extern __inline float16x8_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vrev64q_f16 (float16x8_t __a) -+{ -+ return -+ (float16x8_t)__builtin_shuffle (__a, -+ (uint16x8_t){ 3, 2, 1, 0, 7, 6, 5, 4 }); -+} -+ -+__extension__ extern __inline float16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrn_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ float16x4x2_t __rv; -+#ifdef __ARM_BIG_ENDIAN -+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 5, 1, 7, 3 }); -+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 0, 6, 2 }); -+#else -+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 4, 2, 6 }); -+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 1, 5, 3, 7 }); -+#endif -+ return __rv; -+} -+ -+__extension__ extern __inline float16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vtrnq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ float16x8x2_t __rv; -+#ifdef __ARM_BIG_ENDIAN -+ __rv.val[0] = __builtin_shuffle (__a, __b, -+ (uint16x8_t){ 9, 1, 11, 3, 13, 5, 15, 7 }); -+ __rv.val[1] = __builtin_shuffle (__a, __b, -+ (uint16x8_t){ 8, 0, 10, 2, 12, 4, 14, 6 }); -+#else -+ __rv.val[0] = __builtin_shuffle (__a, __b, -+ (uint16x8_t){ 0, 8, 2, 10, 4, 12, 6, 14 }); -+ __rv.val[1] = __builtin_shuffle (__a, __b, -+ (uint16x8_t){ 1, 9, 3, 11, 5, 13, 7, 15 }); -+#endif -+ return __rv; -+} -+ -+__extension__ extern __inline float16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzp_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ float16x4x2_t __rv; -+#ifdef __ARM_BIG_ENDIAN -+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 5, 7, 1, 3 }); -+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 6, 0, 2 }); -+#else -+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 2, 4, 6 }); -+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 1, 3, 5, 7 }); -+#endif -+ return __rv; -+} -+ -+__extension__ extern __inline float16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vuzpq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ float16x8x2_t __rv; -+#ifdef __ARM_BIG_ENDIAN -+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) -+ { 5, 7, 1, 3, 13, 15, 9, 11 }); -+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) -+ { 4, 6, 0, 2, 12, 14, 8, 10 }); -+#else -+ __rv.val[0] = __builtin_shuffle (__a, __b, -+ (uint16x8_t){ 0, 2, 4, 6, 8, 10, 12, 14 }); -+ __rv.val[1] = __builtin_shuffle (__a, __b, -+ (uint16x8_t){ 1, 3, 5, 7, 9, 11, 13, 15 }); -+#endif -+ return __rv; -+} -+ -+__extension__ extern __inline float16x4x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzip_f16 (float16x4_t __a, float16x4_t __b) -+{ -+ float16x4x2_t __rv; -+#ifdef __ARM_BIG_ENDIAN -+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 6, 2, 7, 3 }); -+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 4, 0, 5, 1 }); -+#else -+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x4_t){ 0, 4, 1, 5 }); -+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x4_t){ 2, 6, 3, 7 }); -+#endif -+ return __rv; -+} -+ -+__extension__ extern __inline float16x8x2_t -+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) -+vzipq_f16 (float16x8_t __a, float16x8_t __b) -+{ -+ float16x8x2_t __rv; -+#ifdef __ARM_BIG_ENDIAN -+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t) -+ { 10, 2, 11, 3, 8, 0, 9, 1 }); -+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t) -+ { 14, 6, 15, 7, 12, 4, 13, 5 }); -+#else -+ __rv.val[0] = __builtin_shuffle (__a, __b, -+ (uint16x8_t){ 0, 8, 1, 9, 2, 10, 3, 11 }); -+ __rv.val[1] = __builtin_shuffle (__a, __b, -+ (uint16x8_t){ 4, 12, 5, 13, 6, 14, 7, 15 }); -+#endif -+ return __rv; -+} -+ -+#endif -+ - #ifdef __cplusplus - } - #endif ---- a/src/gcc/config/arm/arm_neon_builtins.def -+++ b/src/gcc/config/arm/arm_neon_builtins.def -@@ -19,6 +19,7 @@ - <http://www.gnu.org/licenses/>. */ - - VAR2 (BINOP, vadd, v2sf, v4sf) -+VAR2 (BINOP, vadd, v8hf, v4hf) - VAR3 (BINOP, vaddls, v8qi, v4hi, v2si) - VAR3 (BINOP, vaddlu, v8qi, v4hi, v2si) - VAR3 (BINOP, vaddws, v8qi, v4hi, v2si) -@@ -32,12 +33,15 @@ VAR8 (BINOP, vqaddu, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) - VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) - VAR3 (BINOP, vraddhn, v8hi, v4si, v2di) - VAR2 (BINOP, vmulf, v2sf, v4sf) -+VAR2 (BINOP, vmulf, v8hf, v4hf) - VAR2 (BINOP, vmulp, v8qi, v16qi) - VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) - VAR3 (TERNOP, vmlals, v8qi, v4hi, v2si) - VAR3 (TERNOP, vmlalu, v8qi, v4hi, v2si) - VAR2 (TERNOP, vfma, v2sf, v4sf) -+VAR2 (TERNOP, vfma, v4hf, v8hf) - VAR2 (TERNOP, vfms, v2sf, v4sf) -+VAR2 (TERNOP, vfms, v4hf, v8hf) - VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) - VAR3 (TERNOP, vmlsls, v8qi, v4hi, v2si) - VAR3 (TERNOP, vmlslu, v8qi, v4hi, v2si) -@@ -94,6 +98,7 @@ VAR8 (TERNOP_IMM, vsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) - VAR8 (TERNOP_IMM, vrsras_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) - VAR8 (TERNOP_IMM, vrsrau_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) - VAR2 (BINOP, vsub, v2sf, v4sf) -+VAR2 (BINOP, vsub, v8hf, v4hf) - VAR3 (BINOP, vsubls, v8qi, v4hi, v2si) - VAR3 (BINOP, vsublu, v8qi, v4hi, v2si) - VAR3 (BINOP, vsubws, v8qi, v4hi, v2si) -@@ -111,12 +116,27 @@ VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) - VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si) - VAR2 (BINOP, vcage, v2sf, v4sf) - VAR2 (BINOP, vcagt, v2sf, v4sf) -+VAR2 (BINOP, vcage, v4hf, v8hf) -+VAR2 (BINOP, vcagt, v4hf, v8hf) -+VAR2 (BINOP, vcale, v4hf, v8hf) -+VAR2 (BINOP, vcalt, v4hf, v8hf) -+VAR2 (BINOP, vceq, v4hf, v8hf) -+VAR2 (BINOP, vcge, v4hf, v8hf) -+VAR2 (BINOP, vcgt, v4hf, v8hf) -+VAR2 (BINOP, vcle, v4hf, v8hf) -+VAR2 (BINOP, vclt, v4hf, v8hf) -+VAR2 (UNOP, vceqz, v4hf, v8hf) -+VAR2 (UNOP, vcgez, v4hf, v8hf) -+VAR2 (UNOP, vcgtz, v4hf, v8hf) -+VAR2 (UNOP, vclez, v4hf, v8hf) -+VAR2 (UNOP, vcltz, v4hf, v8hf) - VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) - VAR6 (BINOP, vabds, v8qi, v4hi, v2si, v16qi, v8hi, v4si) - VAR6 (BINOP, vabdu, v8qi, v4hi, v2si, v16qi, v8hi, v4si) - VAR2 (BINOP, vabdf, v2sf, v4sf) - VAR3 (BINOP, vabdls, v8qi, v4hi, v2si) - VAR3 (BINOP, vabdlu, v8qi, v4hi, v2si) -+VAR2 (BINOP, vabd, v8hf, v4hf) - - VAR6 (TERNOP, vabas, v8qi, v4hi, v2si, v16qi, v8hi, v4si) - VAR6 (TERNOP, vabau, v8qi, v4hi, v2si, v16qi, v8hi, v4si) -@@ -126,27 +146,38 @@ VAR3 (TERNOP, vabalu, v8qi, v4hi, v2si) - VAR6 (BINOP, vmaxs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) - VAR6 (BINOP, vmaxu, v8qi, v4hi, v2si, v16qi, v8hi, v4si) - VAR2 (BINOP, vmaxf, v2sf, v4sf) -+VAR2 (BINOP, vmaxf, v8hf, v4hf) -+VAR4 (BINOP, vmaxnm, v2sf, v4sf, v4hf, v8hf) - VAR6 (BINOP, vmins, v8qi, v4hi, v2si, v16qi, v8hi, v4si) - VAR6 (BINOP, vminu, v8qi, v4hi, v2si, v16qi, v8hi, v4si) - VAR2 (BINOP, vminf, v2sf, v4sf) -+VAR2 (BINOP, vminf, v4hf, v8hf) -+VAR4 (BINOP, vminnm, v2sf, v4sf, v8hf, v4hf) - - VAR3 (BINOP, vpmaxs, v8qi, v4hi, v2si) - VAR3 (BINOP, vpmaxu, v8qi, v4hi, v2si) - VAR1 (BINOP, vpmaxf, v2sf) -+VAR1 (BINOP, vpmaxf, v4hf) - VAR3 (BINOP, vpmins, v8qi, v4hi, v2si) - VAR3 (BINOP, vpminu, v8qi, v4hi, v2si) - VAR1 (BINOP, vpminf, v2sf) -+VAR1 (BINOP, vpminf, v4hf) - - VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) -+VAR1 (BINOP, vpadd, v4hf) - VAR6 (UNOP, vpaddls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) - VAR6 (UNOP, vpaddlu, v8qi, v4hi, v2si, v16qi, v8hi, v4si) - VAR6 (BINOP, vpadals, v8qi, v4hi, v2si, v16qi, v8hi, v4si) - VAR6 (BINOP, vpadalu, v8qi, v4hi, v2si, v16qi, v8hi, v4si) - VAR2 (BINOP, vrecps, v2sf, v4sf) - VAR2 (BINOP, vrsqrts, v2sf, v4sf) -+VAR2 (BINOP, vrecps, v4hf, v8hf) -+VAR2 (BINOP, vrsqrts, v4hf, v8hf) - VAR8 (TERNOP_IMM, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) - VAR8 (TERNOP_IMM, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) - VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) -+VAR2 (UNOP, vabs, v8hf, v4hf) -+VAR2 (UNOP, vneg, v8hf, v4hf) - VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) - VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) - VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) -@@ -155,8 +186,16 @@ VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) - VAR5 (BSWAP, bswap, v4hi, v8hi, v2si, v4si, v2di) - VAR2 (UNOP, vcnt, v8qi, v16qi) - VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) -+VAR2 (UNOP, vrecpe, v8hf, v4hf) - VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) -+VAR2 (UNOP, vrsqrte, v4hf, v8hf) - VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) -+VAR2 (UNOP, vrnd, v8hf, v4hf) -+VAR2 (UNOP, vrnda, v8hf, v4hf) -+VAR2 (UNOP, vrndm, v8hf, v4hf) -+VAR2 (UNOP, vrndn, v8hf, v4hf) -+VAR2 (UNOP, vrndp, v8hf, v4hf) -+VAR2 (UNOP, vrndx, v8hf, v4hf) - /* FIXME: vget_lane supports more variants than this! */ - VAR10 (GETLANE, vget_lane, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) -@@ -166,8 +205,10 @@ VAR10 (SETLANE, vset_lane, - VAR5 (UNOP, vcreate, v8qi, v4hi, v2si, v2sf, di) - VAR10 (UNOP, vdup_n, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) -+VAR2 (UNOP, vdup_n, v8hf, v4hf) - VAR10 (GETLANE, vdup_lane, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) -+VAR2 (GETLANE, vdup_lane, v8hf, v4hf) - VAR6 (COMBINE, vcombine, v8qi, v4hi, v4hf, v2si, v2sf, di) - VAR6 (UNOP, vget_high, v16qi, v8hi, v8hf, v4si, v4sf, v2di) - VAR6 (UNOP, vget_low, v16qi, v8hi, v8hf, v4si, v4sf, v2di) -@@ -177,7 +218,7 @@ VAR3 (UNOP, vqmovnu, v8hi, v4si, v2di) - VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) - VAR3 (UNOP, vmovls, v8qi, v4hi, v2si) - VAR3 (UNOP, vmovlu, v8qi, v4hi, v2si) --VAR6 (SETLANE, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) -+VAR8 (SETLANE, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf, v4hf, v8hf) - VAR6 (MAC_LANE, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) - VAR2 (MAC_LANE, vmlals_lane, v4hi, v2si) - VAR2 (MAC_LANE, vmlalu_lane, v4hi, v2si) -@@ -186,7 +227,7 @@ VAR6 (MAC_LANE, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) - VAR2 (MAC_LANE, vmlsls_lane, v4hi, v2si) - VAR2 (MAC_LANE, vmlslu_lane, v4hi, v2si) - VAR2 (MAC_LANE, vqdmlsl_lane, v4hi, v2si) --VAR6 (BINOP, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) -+VAR8 (BINOP, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf, v4hf, v8hf) - VAR6 (MAC_N, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) - VAR2 (MAC_N, vmlals_n, v4hi, v2si) - VAR2 (MAC_N, vmlalu_n, v4hi, v2si) -@@ -197,17 +238,27 @@ VAR2 (MAC_N, vmlslu_n, v4hi, v2si) - VAR2 (MAC_N, vqdmlsl_n, v4hi, v2si) - VAR10 (SETLANE, vext, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) -+VAR2 (SETLANE, vext, v8hf, v4hf) - VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) - VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) - VAR2 (UNOP, vrev16, v8qi, v16qi) - VAR4 (UNOP, vcvts, v2si, v2sf, v4si, v4sf) -+VAR2 (UNOP, vcvts, v4hi, v8hi) -+VAR2 (UNOP, vcvts, v4hf, v8hf) -+VAR2 (UNOP, vcvtu, v4hi, v8hi) -+VAR2 (UNOP, vcvtu, v4hf, v8hf) - VAR4 (UNOP, vcvtu, v2si, v2sf, v4si, v4sf) - VAR4 (BINOP, vcvts_n, v2si, v2sf, v4si, v4sf) - VAR4 (BINOP, vcvtu_n, v2si, v2sf, v4si, v4sf) -+VAR2 (BINOP, vcvts_n, v4hf, v8hf) -+VAR2 (BINOP, vcvtu_n, v4hi, v8hi) -+VAR2 (BINOP, vcvts_n, v4hi, v8hi) -+VAR2 (BINOP, vcvtu_n, v4hf, v8hf) - VAR1 (UNOP, vcvtv4sf, v4hf) - VAR1 (UNOP, vcvtv4hf, v4sf) - VAR10 (TERNOP, vbsl, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) -+VAR2 (TERNOP, vbsl, v8hf, v4hf) - VAR2 (UNOP, copysignf, v2sf, v4sf) - VAR2 (UNOP, vrintn, v2sf, v4sf) - VAR2 (UNOP, vrinta, v2sf, v4sf) -@@ -219,6 +270,14 @@ VAR1 (UNOP, vcvtav2sf, v2si) - VAR1 (UNOP, vcvtav4sf, v4si) - VAR1 (UNOP, vcvtauv2sf, v2si) - VAR1 (UNOP, vcvtauv4sf, v4si) -+VAR2 (UNOP, vcvtas, v4hf, v8hf) -+VAR2 (UNOP, vcvtau, v4hf, v8hf) -+VAR2 (UNOP, vcvtms, v4hf, v8hf) -+VAR2 (UNOP, vcvtmu, v4hf, v8hf) -+VAR2 (UNOP, vcvtns, v4hf, v8hf) -+VAR2 (UNOP, vcvtnu, v4hf, v8hf) -+VAR2 (UNOP, vcvtps, v4hf, v8hf) -+VAR2 (UNOP, vcvtpu, v4hf, v8hf) - VAR1 (UNOP, vcvtpv2sf, v2si) - VAR1 (UNOP, vcvtpv4sf, v4si) - VAR1 (UNOP, vcvtpuv2sf, v2si) ---- /dev/null -+++ b/src/gcc/config/arm/arm_vfp_builtins.def -@@ -0,0 +1,51 @@ -+/* VFP instruction builtin definitions. -+ Copyright (C) 2016 Free Software Foundation, Inc. -+ Contributed by ARM Ltd. -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published -+ by the Free Software Foundation; either version 3, or (at your -+ option) any later version. -+ -+ GCC is distributed in the hope that it will be useful, but WITHOUT -+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public -+ License for more details. -+ -+ You should have received a copy of the GNU General Public License -+ along with GCC; see the file COPYING3. If not see -+ <http://www.gnu.org/licenses/>. */ -+ -+/* This file lists the builtins that may be available when VFP is enabled but -+ not NEON is enabled. The entries otherwise have the same requirements and -+ generate the same structures as those in the arm_neon_builtins.def. */ -+ -+/* FP16 Arithmetic instructions. */ -+VAR1 (UNOP, vabs, hf) -+VAR2 (UNOP, vcvths, hf, si) -+VAR2 (UNOP, vcvthu, hf, si) -+VAR1 (UNOP, vcvtahs, si) -+VAR1 (UNOP, vcvtahu, si) -+VAR1 (UNOP, vcvtmhs, si) -+VAR1 (UNOP, vcvtmhu, si) -+VAR1 (UNOP, vcvtnhs, si) -+VAR1 (UNOP, vcvtnhu, si) -+VAR1 (UNOP, vcvtphs, si) -+VAR1 (UNOP, vcvtphu, si) -+VAR1 (UNOP, vrnd, hf) -+VAR1 (UNOP, vrnda, hf) -+VAR1 (UNOP, vrndi, hf) -+VAR1 (UNOP, vrndm, hf) -+VAR1 (UNOP, vrndn, hf) -+VAR1 (UNOP, vrndp, hf) -+VAR1 (UNOP, vrndx, hf) -+VAR1 (UNOP, vsqrt, hf) -+ -+VAR2 (BINOP, vcvths_n, hf, si) -+VAR2 (BINOP, vcvthu_n, hf, si) -+VAR1 (BINOP, vmaxnm, hf) -+VAR1 (BINOP, vminnm, hf) -+ -+VAR1 (TERNOP, vfma, hf) -+VAR1 (TERNOP, vfms, hf) ---- a/src/gcc/config/arm/bpabi.h -+++ b/src/gcc/config/arm/bpabi.h -@@ -75,6 +75,9 @@ - |mcpu=cortex-a57.cortex-a53 \ - |mcpu=cortex-a72 \ - |mcpu=cortex-a72.cortex-a53 \ -+ |mcpu=cortex-a73 \ -+ |mcpu=cortex-a73.cortex-a35 \ -+ |mcpu=cortex-a73.cortex-a53 \ - |mcpu=exynos-m1 \ - |mcpu=qdf24xx \ - |mcpu=xgene1 \ -@@ -90,6 +93,11 @@ - |march=armv8-a+crc \ - |march=armv8.1-a \ - |march=armv8.1-a+crc \ -+ |march=armv8.2-a \ -+ |march=armv8.2-a+fp16 \ -+ |march=armv8-m.base|mcpu=cortex-m23 \ -+ |march=armv8-m.main \ -+ |march=armv8-m.main+dsp|mcpu=cortex-m33 \ - :%{!r:--be8}}}" - #else - #define BE8_LINK_SPEC \ -@@ -105,6 +113,9 @@ - |mcpu=cortex-a57.cortex-a53 \ - |mcpu=cortex-a72 \ - |mcpu=cortex-a72.cortex-a53 \ -+ |mcpu=cortex-a73 \ -+ |mcpu=cortex-a73.cortex-a35 \ -+ |mcpu=cortex-a73.cortex-a53 \ - |mcpu=exynos-m1 \ - |mcpu=qdf24xx \ - |mcpu=xgene1 \ -@@ -121,6 +132,11 @@ - |march=armv8-a+crc \ - |march=armv8.1-a \ - |march=armv8.1-a+crc \ -+ |march=armv8.2-a \ -+ |march=armv8.2-a+fp16 \ -+ |march=armv8-m.base|mcpu=cortex-m23 \ -+ |march=armv8-m.main \ -+ |march=armv8-m.main+dsp|mcpu=cortex-m33 \ - :%{!r:--be8}}}" - #endif - ---- a/src/gcc/config/arm/constraints.md -+++ b/src/gcc/config/arm/constraints.md -@@ -34,11 +34,13 @@ - ;; in ARM/Thumb-2 state: Da, Db, Dc, Dd, Dn, Dl, DL, Do, Dv, Dy, Di, Dt, Dp, Dz - ;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe - ;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py -+;; in all states: Pf - - ;; The following memory constraints have been used: --;; in ARM/Thumb-2 state: Q, Uh, Ut, Uv, Uy, Un, Um, Us -+;; in ARM/Thumb-2 state: Uh, Ut, Uv, Uy, Un, Um, Us - ;; in ARM state: Uq - ;; in Thumb state: Uu, Uw -+;; in all states: Q - - - (define_register_constraint "t" "TARGET_32BIT ? VFP_LO_REGS : NO_REGS" -@@ -66,7 +68,7 @@ - - (define_constraint "j" - "A constant suitable for a MOVW instruction. (ARM/Thumb-2)" -- (and (match_test "TARGET_32BIT && arm_arch_thumb2") -+ (and (match_test "TARGET_HAVE_MOVT") - (ior (and (match_code "high") - (match_test "arm_valid_symbolic_address_p (XEXP (op, 0))")) - (and (match_code "const_int") -@@ -180,6 +182,13 @@ - (and (match_code "const_int") - (match_test "TARGET_THUMB1 && ival >= 256 && ival <= 510"))) - -+(define_constraint "Pf" -+ "Memory models except relaxed, consume or release ones." -+ (and (match_code "const_int") -+ (match_test "!is_mm_relaxed (memmodel_from_int (ival)) -+ && !is_mm_consume (memmodel_from_int (ival)) -+ && !is_mm_release (memmodel_from_int (ival))"))) -+ - (define_constraint "Ps" - "@internal In Thumb-2 state a constant in the range -255 to +255" - (and (match_code "const_int") -@@ -333,13 +342,13 @@ - "@internal - In ARM/ Thumb2 a const_double which can be used with a vcvt.f32.s32 with fract bits operation" - (and (match_code "const_double") -- (match_test "TARGET_32BIT && TARGET_VFP && vfp3_const_double_for_fract_bits (op)"))) -+ (match_test "TARGET_32BIT && vfp3_const_double_for_fract_bits (op)"))) - - (define_constraint "Dp" - "@internal - In ARM/ Thumb2 a const_double which can be used with a vcvt.s32.f32 with bits operation" - (and (match_code "const_double") -- (match_test "TARGET_32BIT && TARGET_VFP -+ (match_test "TARGET_32BIT - && vfp3_const_double_for_bits (op) > 0"))) - - (define_register_constraint "Ts" "(arm_restrict_it) ? LO_REGS : GENERAL_REGS" -@@ -407,7 +416,7 @@ - - (define_memory_constraint "Q" - "@internal -- In ARM/Thumb-2 state an address that is a single base register." -+ An address that is a single base register." - (and (match_code "mem") - (match_test "REG_P (XEXP (op, 0))"))) - ---- a/src/gcc/config/arm/cortex-a53.md -+++ b/src/gcc/config/arm/cortex-a53.md -@@ -30,6 +30,7 @@ - - (define_cpu_unit "cortex_a53_slot0" "cortex_a53") - (define_cpu_unit "cortex_a53_slot1" "cortex_a53") -+(final_presence_set "cortex_a53_slot1" "cortex_a53_slot0") - - (define_reservation "cortex_a53_slot_any" - "cortex_a53_slot0\ -@@ -71,41 +72,43 @@ - - (define_insn_reservation "cortex_a53_shift" 2 - (and (eq_attr "tune" "cortexa53") -- (eq_attr "type" "adr,shift_imm,shift_reg,mov_imm,mvn_imm")) -+ (eq_attr "type" "adr,shift_imm,mov_imm,mvn_imm,mov_shift")) - "cortex_a53_slot_any") - --(define_insn_reservation "cortex_a53_alu_rotate_imm" 2 -+(define_insn_reservation "cortex_a53_shift_reg" 2 - (and (eq_attr "tune" "cortexa53") -- (eq_attr "type" "rotate_imm")) -- "(cortex_a53_slot1) -- | (cortex_a53_single_issue)") -+ (eq_attr "type" "shift_reg,mov_shift_reg")) -+ "cortex_a53_slot_any+cortex_a53_hazard") - - (define_insn_reservation "cortex_a53_alu" 3 - (and (eq_attr "tune" "cortexa53") - (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm, - alu_sreg,alus_sreg,logic_reg,logics_reg, - adc_imm,adcs_imm,adc_reg,adcs_reg, -- bfm,csel,clz,rbit,rev,alu_dsp_reg, -- mov_reg,mvn_reg, -- mrs,multiple,no_insn")) -+ csel,clz,rbit,rev,alu_dsp_reg, -+ mov_reg,mvn_reg,mrs,multiple,no_insn")) - "cortex_a53_slot_any") - - (define_insn_reservation "cortex_a53_alu_shift" 3 - (and (eq_attr "tune" "cortexa53") - (eq_attr "type" "alu_shift_imm,alus_shift_imm, - crc,logic_shift_imm,logics_shift_imm, -- alu_ext,alus_ext, -- extend,mov_shift,mvn_shift")) -+ alu_ext,alus_ext,bfm,bfx,extend,mvn_shift")) - "cortex_a53_slot_any") - - (define_insn_reservation "cortex_a53_alu_shift_reg" 3 - (and (eq_attr "tune" "cortexa53") - (eq_attr "type" "alu_shift_reg,alus_shift_reg, - logic_shift_reg,logics_shift_reg, -- mov_shift_reg,mvn_shift_reg")) -+ mvn_shift_reg")) - "cortex_a53_slot_any+cortex_a53_hazard") - --(define_insn_reservation "cortex_a53_mul" 3 -+(define_insn_reservation "cortex_a53_alu_extr" 3 -+ (and (eq_attr "tune" "cortexa53") -+ (eq_attr "type" "rotate_imm")) -+ "cortex_a53_slot1|cortex_a53_single_issue") -+ -+(define_insn_reservation "cortex_a53_mul" 4 - (and (eq_attr "tune" "cortexa53") - (ior (eq_attr "mul32" "yes") - (eq_attr "mul64" "yes"))) -@@ -189,49 +192,43 @@ - (define_insn_reservation "cortex_a53_branch" 0 - (and (eq_attr "tune" "cortexa53") - (eq_attr "type" "branch,call")) -- "cortex_a53_slot_any,cortex_a53_branch") -+ "cortex_a53_slot_any+cortex_a53_branch") - - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - ;; General-purpose register bypasses - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - --;; Model bypasses for unshifted operands to ALU instructions. -+;; Model bypasses for ALU to ALU instructions. -+ -+(define_bypass 0 "cortex_a53_shift*" -+ "cortex_a53_alu") - --(define_bypass 1 "cortex_a53_shift" -- "cortex_a53_shift") -+(define_bypass 1 "cortex_a53_shift*" -+ "cortex_a53_shift*,cortex_a53_alu_*") - --(define_bypass 1 "cortex_a53_alu, -- cortex_a53_alu_shift*, -- cortex_a53_alu_rotate_imm, -- cortex_a53_shift" -+(define_bypass 1 "cortex_a53_alu*" - "cortex_a53_alu") - --(define_bypass 2 "cortex_a53_alu, -- cortex_a53_alu_shift*" -+(define_bypass 1 "cortex_a53_alu*" - "cortex_a53_alu_shift*" - "aarch_forward_to_shift_is_not_shifted_reg") - --;; In our model, we allow any general-purpose register operation to --;; bypass to the accumulator operand of an integer MADD-like operation. -+(define_bypass 2 "cortex_a53_alu*" -+ "cortex_a53_alu_*,cortex_a53_shift*") -+ -+;; Model a bypass from MUL/MLA to MLA instructions. - --(define_bypass 1 "cortex_a53_alu*, -- cortex_a53_load*, -- cortex_a53_mul" -+(define_bypass 1 "cortex_a53_mul" - "cortex_a53_mul" - "aarch_accumulator_forwarding") - --;; Model a bypass from MLA/MUL to many ALU instructions. -+;; Model a bypass from MUL/MLA to ALU instructions. - - (define_bypass 2 "cortex_a53_mul" -- "cortex_a53_alu, -- cortex_a53_alu_shift*") -- --;; We get neater schedules by allowing an MLA/MUL to feed an --;; early load address dependency to a load. -+ "cortex_a53_alu") - --(define_bypass 2 "cortex_a53_mul" -- "cortex_a53_load*" -- "arm_early_load_addr_dep") -+(define_bypass 3 "cortex_a53_mul" -+ "cortex_a53_alu_*,cortex_a53_shift*") - - ;; Model bypasses for loads which are to be consumed by the ALU. - -@@ -239,47 +236,46 @@ - "cortex_a53_alu") - - (define_bypass 3 "cortex_a53_load1" -- "cortex_a53_alu_shift*") -+ "cortex_a53_alu_*,cortex_a53_shift*") -+ -+(define_bypass 3 "cortex_a53_load2" -+ "cortex_a53_alu") - - ;; Model a bypass for ALU instructions feeding stores. - --(define_bypass 1 "cortex_a53_alu*" -- "cortex_a53_store1, -- cortex_a53_store2, -- cortex_a53_store3plus" -+(define_bypass 0 "cortex_a53_alu*,cortex_a53_shift*" -+ "cortex_a53_store*" - "arm_no_early_store_addr_dep") - - ;; Model a bypass for load and multiply instructions feeding stores. - --(define_bypass 2 "cortex_a53_mul, -- cortex_a53_load1, -- cortex_a53_load2, -- cortex_a53_load3plus" -- "cortex_a53_store1, -- cortex_a53_store2, -- cortex_a53_store3plus" -+(define_bypass 1 "cortex_a53_mul, -+ cortex_a53_load*" -+ "cortex_a53_store*" - "arm_no_early_store_addr_dep") - - ;; Model a GP->FP register move as similar to stores. - --(define_bypass 1 "cortex_a53_alu*" -+(define_bypass 0 "cortex_a53_alu*,cortex_a53_shift*" - "cortex_a53_r2f") - --(define_bypass 2 "cortex_a53_mul, -+(define_bypass 1 "cortex_a53_mul, - cortex_a53_load1, -- cortex_a53_load2, -- cortex_a53_load3plus" -+ cortex_a53_load2" - "cortex_a53_r2f") - --;; Shifts feeding Load/Store addresses may not be ready in time. -+(define_bypass 2 "cortex_a53_alu*" -+ "cortex_a53_r2f_cvt") - --(define_bypass 3 "cortex_a53_shift" -- "cortex_a53_load*" -- "arm_early_load_addr_dep") -+(define_bypass 3 "cortex_a53_mul, -+ cortex_a53_load1, -+ cortex_a53_load2" -+ "cortex_a53_r2f_cvt") - --(define_bypass 3 "cortex_a53_shift" -- "cortex_a53_store*" -- "arm_early_store_addr_dep") -+;; Model flag forwarding to branches. -+ -+(define_bypass 0 "cortex_a53_alu*,cortex_a53_shift*" -+ "cortex_a53_branch") - - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - ;; Floating-point/Advanced SIMD. -@@ -535,19 +531,25 @@ - ;; Floating-point to/from core transfers. - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - --(define_insn_reservation "cortex_a53_r2f" 6 -+(define_insn_reservation "cortex_a53_r2f" 2 - (and (eq_attr "tune" "cortexa53") -- (eq_attr "type" "f_mcr,f_mcrr,f_cvti2f, -- neon_from_gp, neon_from_gp_q")) -- "cortex_a53_slot_any,cortex_a53_store, -- nothing,cortex_a53_fp_alu") -+ (eq_attr "type" "f_mcr,f_mcrr")) -+ "cortex_a53_slot_any,cortex_a53_fp_alu") - --(define_insn_reservation "cortex_a53_f2r" 6 -+(define_insn_reservation "cortex_a53_f2r" 4 - (and (eq_attr "tune" "cortexa53") -- (eq_attr "type" "f_mrc,f_mrrc,f_cvtf2i, -- neon_to_gp, neon_to_gp_q")) -- "cortex_a53_slot_any,cortex_a53_fp_alu, -- nothing,cortex_a53_store") -+ (eq_attr "type" "f_mrc,f_mrrc")) -+ "cortex_a53_slot_any,cortex_a53_fp_alu") -+ -+(define_insn_reservation "cortex_a53_r2f_cvt" 4 -+ (and (eq_attr "tune" "cortexa53") -+ (eq_attr "type" "f_cvti2f, neon_from_gp, neon_from_gp_q")) -+ "cortex_a53_slot_any,cortex_a53_fp_alu") -+ -+(define_insn_reservation "cortex_a53_f2r_cvt" 5 -+ (and (eq_attr "tune" "cortexa53") -+ (eq_attr "type" "f_cvtf2i, neon_to_gp, neon_to_gp_q")) -+ "cortex_a53_slot_any,cortex_a53_fp_alu") - - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - ;; Floating-point flag transfer. ---- a/src/gcc/config/arm/cortex-a57.md -+++ b/src/gcc/config/arm/cortex-a57.md -@@ -297,7 +297,7 @@ - (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ - alu_sreg,alus_sreg,logic_reg,logics_reg,\ - adc_imm,adcs_imm,adc_reg,adcs_reg,\ -- adr,bfm,clz,rbit,rev,alu_dsp_reg,\ -+ adr,bfx,extend,clz,rbit,rev,alu_dsp_reg,\ - rotate_imm,shift_imm,shift_reg,\ - mov_imm,mov_reg,\ - mvn_imm,mvn_reg,\ -@@ -307,7 +307,7 @@ - ;; ALU ops with immediate shift - (define_insn_reservation "cortex_a57_alu_shift" 3 - (and (eq_attr "tune" "cortexa57") -- (eq_attr "type" "extend,\ -+ (eq_attr "type" "bfm,\ - alu_shift_imm,alus_shift_imm,\ - crc,logic_shift_imm,logics_shift_imm,\ - mov_shift,mvn_shift")) -@@ -726,7 +726,7 @@ - - (define_insn_reservation "cortex_a57_fp_cpys" 4 - (and (eq_attr "tune" "cortexa57") -- (eq_attr "type" "fmov")) -+ (eq_attr "type" "fmov,fcsel")) - "(ca57_cx1|ca57_cx2)") - - (define_insn_reservation "cortex_a57_fp_divs" 12 ---- a/src/gcc/config/arm/cortex-a8-neon.md -+++ b/src/gcc/config/arm/cortex-a8-neon.md -@@ -357,30 +357,34 @@ - (eq_attr "type" "fmuls")) - "cortex_a8_vfp,cortex_a8_vfplite*11") - -+;; Don't model a reservation for more than 15 cycles as this explodes the -+;; state space of the automaton for little gain. It is unlikely that the -+;; scheduler will find enough instructions to hide the full latency of the -+;; instructions. - (define_insn_reservation "cortex_a8_vfp_muld" 17 - (and (eq_attr "tune" "cortexa8") - (eq_attr "type" "fmuld")) -- "cortex_a8_vfp,cortex_a8_vfplite*16") -+ "cortex_a8_vfp,cortex_a8_vfplite*15") - - (define_insn_reservation "cortex_a8_vfp_macs" 21 - (and (eq_attr "tune" "cortexa8") - (eq_attr "type" "fmacs,ffmas")) -- "cortex_a8_vfp,cortex_a8_vfplite*20") -+ "cortex_a8_vfp,cortex_a8_vfplite*15") - - (define_insn_reservation "cortex_a8_vfp_macd" 26 - (and (eq_attr "tune" "cortexa8") - (eq_attr "type" "fmacd,ffmad")) -- "cortex_a8_vfp,cortex_a8_vfplite*25") -+ "cortex_a8_vfp,cortex_a8_vfplite*15") - - (define_insn_reservation "cortex_a8_vfp_divs" 37 - (and (eq_attr "tune" "cortexa8") - (eq_attr "type" "fdivs, fsqrts")) -- "cortex_a8_vfp,cortex_a8_vfplite*36") -+ "cortex_a8_vfp,cortex_a8_vfplite*15") - - (define_insn_reservation "cortex_a8_vfp_divd" 65 - (and (eq_attr "tune" "cortexa8") - (eq_attr "type" "fdivd, fsqrtd")) -- "cortex_a8_vfp,cortex_a8_vfplite*64") -+ "cortex_a8_vfp,cortex_a8_vfplite*15") - - ;; Comparisons can actually take 7 cycles sometimes instead of four, - ;; but given all the other instructions lumped into type=ffarith that ---- a/src/gcc/config/arm/crypto.md -+++ b/src/gcc/config/arm/crypto.md -@@ -18,14 +18,27 @@ - ;; along with GCC; see the file COPYING3. If not see - ;; <http://www.gnu.org/licenses/>. - -+ -+;; When AES/AESMC fusion is enabled we want the register allocation to -+;; look like: -+;; AESE Vn, _ -+;; AESMC Vn, Vn -+;; So prefer to tie operand 1 to operand 0 when fusing. -+ - (define_insn "crypto_<crypto_pattern>" -- [(set (match_operand:<crypto_mode> 0 "register_operand" "=w") -+ [(set (match_operand:<crypto_mode> 0 "register_operand" "=w,w") - (unspec:<crypto_mode> [(match_operand:<crypto_mode> 1 -- "register_operand" "w")] -+ "register_operand" "0,w")] - CRYPTO_UNARY))] - "TARGET_CRYPTO" - "<crypto_pattern>.<crypto_size_sfx>\\t%q0, %q1" -- [(set_attr "type" "<crypto_type>")] -+ [(set_attr "type" "<crypto_type>") -+ (set_attr_alternative "enabled" -+ [(if_then_else (match_test -+ "arm_fusion_enabled_p (tune_params::FUSE_AES_AESMC)") -+ (const_string "yes" ) -+ (const_string "no")) -+ (const_string "yes")])] - ) - - (define_insn "crypto_<crypto_pattern>" ---- a/src/gcc/config/arm/driver-arm.c -+++ b/src/gcc/config/arm/driver-arm.c -@@ -46,6 +46,12 @@ static struct vendor_cpu arm_cpu_table[] = { - {"0xc0d", "armv7ve", "cortex-a12"}, - {"0xc0e", "armv7ve", "cortex-a17"}, - {"0xc0f", "armv7ve", "cortex-a15"}, -+ {"0xd01", "armv8-a+crc", "cortex-a32"}, -+ {"0xd04", "armv8-a+crc", "cortex-a35"}, -+ {"0xd03", "armv8-a+crc", "cortex-a53"}, -+ {"0xd07", "armv8-a+crc", "cortex-a57"}, -+ {"0xd08", "armv8-a+crc", "cortex-a72"}, -+ {"0xd09", "armv8-a+crc", "cortex-a73"}, - {"0xc14", "armv7-r", "cortex-r4"}, - {"0xc15", "armv7-r", "cortex-r5"}, - {"0xc20", "armv6-m", "cortex-m0"}, ---- a/src/gcc/config/arm/elf.h -+++ b/src/gcc/config/arm/elf.h -@@ -75,16 +75,7 @@ - - /* We might need a ARM specific header to function declarations. */ - #undef ASM_DECLARE_FUNCTION_NAME --#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \ -- do \ -- { \ -- ARM_DECLARE_FUNCTION_NAME (FILE, NAME, DECL); \ -- ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function"); \ -- ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL)); \ -- ASM_OUTPUT_LABEL(FILE, NAME); \ -- ARM_OUTPUT_FN_UNWIND (FILE, TRUE); \ -- } \ -- while (0) -+#define ASM_DECLARE_FUNCTION_NAME arm_asm_declare_function_name - - /* We might need an ARM specific trailer for function declarations. */ - #undef ASM_DECLARE_FUNCTION_SIZE -@@ -148,8 +139,9 @@ - while (0) - - /* Horrible hack: We want to prevent some libgcc routines being included -- for some multilibs. */ --#ifndef __ARM_ARCH_6M__ -+ for some multilibs. The condition should match the one in -+ libgcc/config/arm/lib1funcs.S. */ -+#if __ARM_ARCH_ISA_ARM || __ARM_ARCH_ISA_THUMB != 1 - #undef L_fixdfsi - #undef L_fixunsdfsi - #undef L_truncdfsf2 ---- a/src/gcc/config/arm/exynos-m1.md -+++ b/src/gcc/config/arm/exynos-m1.md -@@ -358,7 +358,7 @@ - (eq_attr "type" "alu_imm, alus_imm, logic_imm, logics_imm,\ - alu_sreg, alus_sreg, logic_reg, logics_reg,\ - adc_imm, adcs_imm, adc_reg, adcs_reg,\ -- adr, bfm, clz, rbit, rev, csel, alu_dsp_reg,\ -+ adr, bfm, bfx, clz, rbit, rev, csel, alu_dsp_reg,\ - shift_imm, shift_reg, rotate_imm, extend,\ - mov_imm, mov_reg,\ - mvn_imm, mvn_reg,\ -@@ -372,7 +372,7 @@ - (eq_attr "type" "alu_imm, alus_imm, logic_imm, logics_imm,\ - alu_sreg, alus_sreg, logic_reg, logics_reg,\ - adc_imm, adcs_imm, adc_reg, adcs_reg,\ -- adr, bfm, clz, rbit, rev, alu_dsp_reg,\ -+ adr, bfm, bfx, clz, rbit, rev, alu_dsp_reg,\ - shift_imm, shift_reg, rotate_imm, extend,\ - mov_imm, mov_reg,\ - mvn_imm, mvn_reg,\ ---- a/src/gcc/config/arm/iterators.md -+++ b/src/gcc/config/arm/iterators.md -@@ -46,7 +46,7 @@ - (define_mode_iterator SIDI [SI DI]) - - ;; A list of modes which the VFP unit can handle --(define_mode_iterator SDF [(SF "TARGET_VFP") (DF "TARGET_VFP_DOUBLE")]) -+(define_mode_iterator SDF [(SF "") (DF "TARGET_VFP_DOUBLE")]) - - ;; Integer element sizes implemented by IWMMXT. - (define_mode_iterator VMMX [V2SI V4HI V8QI]) -@@ -119,6 +119,10 @@ - ;; All supported vector modes (except those with 64-bit integer elements). - (define_mode_iterator VDQW [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF]) - -+;; All supported vector modes including 16-bit float modes. -+(define_mode_iterator VDQWH [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF -+ V8HF V4HF]) -+ - ;; Supported integer vector modes (not 64 bit elements). - (define_mode_iterator VDQIW [V8QI V16QI V4HI V8HI V2SI V4SI]) - -@@ -141,6 +145,9 @@ - ;; Vector modes form int->float conversions. - (define_mode_iterator VCVTI [V2SI V4SI]) - -+;; Vector modes for int->half conversions. -+(define_mode_iterator VCVTHI [V4HI V8HI]) -+ - ;; Vector modes for doubleword multiply-accumulate, etc. insns. - (define_mode_iterator VMD [V4HI V2SI V2SF]) - -@@ -174,6 +181,9 @@ - ;; Modes with 8-bit, 16-bit and 32-bit elements. - (define_mode_iterator VU [V16QI V8HI V4SI]) - -+;; Vector modes for 16-bit floating-point support. -+(define_mode_iterator VH [V8HF V4HF]) -+ - ;; Iterators used for fixed-point support. - (define_mode_iterator FIXED [QQ HQ SQ UQQ UHQ USQ HA SA UHA USA]) - -@@ -192,14 +202,17 @@ - ;; Code iterators - ;;---------------------------------------------------------------------------- - --;; A list of condition codes used in compare instructions where --;; the carry flag from the addition is used instead of doing the -+;; A list of condition codes used in compare instructions where -+;; the carry flag from the addition is used instead of doing the - ;; compare a second time. - (define_code_iterator LTUGEU [ltu geu]) - - ;; The signed gt, ge comparisons - (define_code_iterator GTGE [gt ge]) - -+;; The signed gt, ge, lt, le comparisons -+(define_code_iterator GLTE [gt ge lt le]) -+ - ;; The unsigned gt, ge comparisons - (define_code_iterator GTUGEU [gtu geu]) - -@@ -228,6 +241,12 @@ - ;; Binary operators whose second operand can be shifted. - (define_code_iterator SHIFTABLE_OPS [plus minus ior xor and]) - -+;; Operations on the sign of a number. -+(define_code_iterator ABSNEG [abs neg]) -+ -+;; Conversions. -+(define_code_iterator FCVT [unsigned_float float]) -+ - ;; plus and minus are the only SHIFTABLE_OPS for which Thumb2 allows - ;; a stack pointer opoerand. The minus operation is a candidate for an rsub - ;; and hence only plus is supported. -@@ -251,10 +270,14 @@ - (define_int_iterator VRINT [UNSPEC_VRINTZ UNSPEC_VRINTP UNSPEC_VRINTM - UNSPEC_VRINTR UNSPEC_VRINTX UNSPEC_VRINTA]) - --(define_int_iterator NEON_VCMP [UNSPEC_VCEQ UNSPEC_VCGT UNSPEC_VCGE UNSPEC_VCLT UNSPEC_VCLE]) -+(define_int_iterator NEON_VCMP [UNSPEC_VCEQ UNSPEC_VCGT UNSPEC_VCGE -+ UNSPEC_VCLT UNSPEC_VCLE]) - - (define_int_iterator NEON_VACMP [UNSPEC_VCAGE UNSPEC_VCAGT]) - -+(define_int_iterator NEON_VAGLTE [UNSPEC_VCAGE UNSPEC_VCAGT -+ UNSPEC_VCALE UNSPEC_VCALT]) -+ - (define_int_iterator VCVT [UNSPEC_VRINTP UNSPEC_VRINTM UNSPEC_VRINTA]) - - (define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM -@@ -323,6 +346,22 @@ - - (define_int_iterator VCVT_US_N [UNSPEC_VCVT_S_N UNSPEC_VCVT_U_N]) - -+(define_int_iterator VCVT_HF_US_N [UNSPEC_VCVT_HF_S_N UNSPEC_VCVT_HF_U_N]) -+ -+(define_int_iterator VCVT_SI_US_N [UNSPEC_VCVT_SI_S_N UNSPEC_VCVT_SI_U_N]) -+ -+(define_int_iterator VCVT_HF_US [UNSPEC_VCVTA_S UNSPEC_VCVTA_U -+ UNSPEC_VCVTM_S UNSPEC_VCVTM_U -+ UNSPEC_VCVTN_S UNSPEC_VCVTN_U -+ UNSPEC_VCVTP_S UNSPEC_VCVTP_U]) -+ -+(define_int_iterator VCVTH_US [UNSPEC_VCVTH_S UNSPEC_VCVTH_U]) -+ -+;; Operators for FP16 instructions. -+(define_int_iterator FP16_RND [UNSPEC_VRND UNSPEC_VRNDA -+ UNSPEC_VRNDM UNSPEC_VRNDN -+ UNSPEC_VRNDP UNSPEC_VRNDX]) -+ - (define_int_iterator VQMOVN [UNSPEC_VQMOVN_S UNSPEC_VQMOVN_U]) - - (define_int_iterator VMOVL [UNSPEC_VMOVL_S UNSPEC_VMOVL_U]) -@@ -366,6 +405,8 @@ - - (define_int_iterator VQRDMLH_AS [UNSPEC_VQRDMLAH UNSPEC_VQRDMLSH]) - -+(define_int_iterator VFM_LANE_AS [UNSPEC_VFMA_LANE UNSPEC_VFMS_LANE]) -+ - ;;---------------------------------------------------------------------------- - ;; Mode attributes - ;;---------------------------------------------------------------------------- -@@ -384,6 +425,10 @@ - (define_mode_attr V_cvtto [(V2SI "v2sf") (V2SF "v2si") - (V4SI "v4sf") (V4SF "v4si")]) - -+;; (Opposite) mode to convert to/from for vector-half mode conversions. -+(define_mode_attr VH_CVTTO [(V4HI "V4HF") (V4HF "V4HI") -+ (V8HI "V8HF") (V8HF "V8HI")]) -+ - ;; Define element mode for each vector mode. - (define_mode_attr V_elem [(V8QI "QI") (V16QI "QI") - (V4HI "HI") (V8HI "HI") -@@ -427,12 +472,13 @@ - - ;; Register width from element mode - (define_mode_attr V_reg [(V8QI "P") (V16QI "q") -- (V4HI "P") (V8HI "q") -- (V4HF "P") (V8HF "q") -- (V2SI "P") (V4SI "q") -- (V2SF "P") (V4SF "q") -- (DI "P") (V2DI "q") -- (SF "") (DF "P")]) -+ (V4HI "P") (V8HI "q") -+ (V4HF "P") (V8HF "q") -+ (V2SI "P") (V4SI "q") -+ (V2SF "P") (V4SF "q") -+ (DI "P") (V2DI "q") -+ (SF "") (DF "P") -+ (HF "")]) - - ;; Wider modes with the same number of elements. - (define_mode_attr V_widen [(V8QI "V8HI") (V4HI "V4SI") (V2SI "V2DI")]) -@@ -448,7 +494,7 @@ - (define_mode_attr V_HALF [(V16QI "V8QI") (V8HI "V4HI") - (V8HF "V4HF") (V4SI "V2SI") - (V4SF "V2SF") (V2DF "DF") -- (V2DI "DI")]) -+ (V2DI "DI") (V4HF "HF")]) - - ;; Same, but lower-case. - (define_mode_attr V_half [(V16QI "v8qi") (V8HI "v4hi") -@@ -475,9 +521,10 @@ - ;; Used for neon_vdup_lane, where the second operand is double-sized - ;; even when the first one is quad. - (define_mode_attr V_double_vector_mode [(V16QI "V8QI") (V8HI "V4HI") -- (V4SI "V2SI") (V4SF "V2SF") -- (V8QI "V8QI") (V4HI "V4HI") -- (V2SI "V2SI") (V2SF "V2SF")]) -+ (V4SI "V2SI") (V4SF "V2SF") -+ (V8QI "V8QI") (V4HI "V4HI") -+ (V2SI "V2SI") (V2SF "V2SF") -+ (V8HF "V4HF") (V4HF "V4HF")]) - - ;; Mode of result of comparison operations (and bit-select operand 1). - (define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI") -@@ -496,18 +543,22 @@ - ;; Get element type from double-width mode, for operations where we - ;; don't care about signedness. - (define_mode_attr V_if_elem [(V8QI "i8") (V16QI "i8") -- (V4HI "i16") (V8HI "i16") -- (V2SI "i32") (V4SI "i32") -- (DI "i64") (V2DI "i64") -- (V2SF "f32") (V4SF "f32") -- (SF "f32") (DF "f64")]) -+ (V4HI "i16") (V8HI "i16") -+ (V2SI "i32") (V4SI "i32") -+ (DI "i64") (V2DI "i64") -+ (V2SF "f32") (V4SF "f32") -+ (SF "f32") (DF "f64") -+ (HF "f16") (V4HF "f16") -+ (V8HF "f16")]) - - ;; Same, but for operations which work on signed values. - (define_mode_attr V_s_elem [(V8QI "s8") (V16QI "s8") -- (V4HI "s16") (V8HI "s16") -- (V2SI "s32") (V4SI "s32") -- (DI "s64") (V2DI "s64") -- (V2SF "f32") (V4SF "f32")]) -+ (V4HI "s16") (V8HI "s16") -+ (V2SI "s32") (V4SI "s32") -+ (DI "s64") (V2DI "s64") -+ (V2SF "f32") (V4SF "f32") -+ (HF "f16") (V4HF "f16") -+ (V8HF "f16")]) - - ;; Same, but for operations which work on unsigned values. - (define_mode_attr V_u_elem [(V8QI "u8") (V16QI "u8") -@@ -524,17 +575,22 @@ - (V2SF "32") (V4SF "32")]) - - (define_mode_attr V_sz_elem [(V8QI "8") (V16QI "8") -- (V4HI "16") (V8HI "16") -- (V2SI "32") (V4SI "32") -- (DI "64") (V2DI "64") -+ (V4HI "16") (V8HI "16") -+ (V2SI "32") (V4SI "32") -+ (DI "64") (V2DI "64") - (V4HF "16") (V8HF "16") -- (V2SF "32") (V4SF "32")]) -+ (V2SF "32") (V4SF "32")]) - - (define_mode_attr V_elem_ch [(V8QI "b") (V16QI "b") -- (V4HI "h") (V8HI "h") -- (V2SI "s") (V4SI "s") -- (DI "d") (V2DI "d") -- (V2SF "s") (V4SF "s")]) -+ (V4HI "h") (V8HI "h") -+ (V2SI "s") (V4SI "s") -+ (DI "d") (V2DI "d") -+ (V2SF "s") (V4SF "s") -+ (V2SF "s") (V4SF "s")]) -+ -+(define_mode_attr VH_elem_ch [(V4HI "s") (V8HI "s") -+ (V4HF "s") (V8HF "s") -+ (HF "s")]) - - ;; Element sizes for duplicating ARM registers to all elements of a vector. - (define_mode_attr VD_dup [(V8QI "8") (V4HI "16") (V2SI "32") (V2SF "32")]) -@@ -570,29 +626,30 @@ - ;; This mode attribute is used to obtain the correct register constraints. - - (define_mode_attr scalar_mul_constraint [(V4HI "x") (V2SI "t") (V2SF "t") -- (V8HI "x") (V4SI "t") (V4SF "t")]) -+ (V8HI "x") (V4SI "t") (V4SF "t") -+ (V8HF "x") (V4HF "x")]) - - ;; Predicates used for setting type for neon instructions - - (define_mode_attr Is_float_mode [(V8QI "false") (V16QI "false") -- (V4HI "false") (V8HI "false") -- (V2SI "false") (V4SI "false") -- (V4HF "true") (V8HF "true") -- (V2SF "true") (V4SF "true") -- (DI "false") (V2DI "false")]) -+ (V4HI "false") (V8HI "false") -+ (V2SI "false") (V4SI "false") -+ (V4HF "true") (V8HF "true") -+ (V2SF "true") (V4SF "true") -+ (DI "false") (V2DI "false")]) - - (define_mode_attr Scalar_mul_8_16 [(V8QI "true") (V16QI "true") -- (V4HI "true") (V8HI "true") -- (V2SI "false") (V4SI "false") -- (V2SF "false") (V4SF "false") -- (DI "false") (V2DI "false")]) -- -+ (V4HI "true") (V8HI "true") -+ (V2SI "false") (V4SI "false") -+ (V2SF "false") (V4SF "false") -+ (DI "false") (V2DI "false")]) - - (define_mode_attr Is_d_reg [(V8QI "true") (V16QI "false") -- (V4HI "true") (V8HI "false") -- (V2SI "true") (V4SI "false") -- (V2SF "true") (V4SF "false") -- (DI "true") (V2DI "false")]) -+ (V4HI "true") (V8HI "false") -+ (V2SI "true") (V4SI "false") -+ (V2SF "true") (V4SF "false") -+ (DI "true") (V2DI "false") -+ (V4HF "true") (V8HF "false")]) - - (define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16") - (V4HF "4") (V8HF "8") -@@ -637,12 +694,14 @@ - - ;; Mode attribute used to build the "type" attribute. - (define_mode_attr q [(V8QI "") (V16QI "_q") -- (V4HI "") (V8HI "_q") -- (V2SI "") (V4SI "_q") -+ (V4HI "") (V8HI "_q") -+ (V2SI "") (V4SI "_q") -+ (V4HF "") (V8HF "_q") -+ (V2SF "") (V4SF "_q") - (V4HF "") (V8HF "_q") -- (V2SF "") (V4SF "_q") -- (DI "") (V2DI "_q") -- (DF "") (V2DF "_q")]) -+ (DI "") (V2DI "_q") -+ (DF "") (V2DF "_q") -+ (HF "")]) - - (define_mode_attr pf [(V8QI "p") (V16QI "p") (V2SF "f") (V4SF "f")]) - -@@ -679,6 +738,16 @@ - (define_code_attr shift [(ashiftrt "ashr") (lshiftrt "lshr")]) - (define_code_attr shifttype [(ashiftrt "signed") (lshiftrt "unsigned")]) - -+;; String reprentations of operations on the sign of a number. -+(define_code_attr absneg_str [(abs "abs") (neg "neg")]) -+ -+;; Conversions. -+(define_code_attr FCVTI32typename [(unsigned_float "u32") (float "s32")]) -+ -+(define_code_attr float_sup [(unsigned_float "u") (float "s")]) -+ -+(define_code_attr float_SUP [(unsigned_float "U") (float "S")]) -+ - ;;---------------------------------------------------------------------------- - ;; Int attributes - ;;---------------------------------------------------------------------------- -@@ -710,7 +779,13 @@ - (UNSPEC_VPMAX "s") (UNSPEC_VPMAX_U "u") - (UNSPEC_VPMIN "s") (UNSPEC_VPMIN_U "u") - (UNSPEC_VCVT_S "s") (UNSPEC_VCVT_U "u") -+ (UNSPEC_VCVTA_S "s") (UNSPEC_VCVTA_U "u") -+ (UNSPEC_VCVTM_S "s") (UNSPEC_VCVTM_U "u") -+ (UNSPEC_VCVTN_S "s") (UNSPEC_VCVTN_U "u") -+ (UNSPEC_VCVTP_S "s") (UNSPEC_VCVTP_U "u") - (UNSPEC_VCVT_S_N "s") (UNSPEC_VCVT_U_N "u") -+ (UNSPEC_VCVT_HF_S_N "s") (UNSPEC_VCVT_HF_U_N "u") -+ (UNSPEC_VCVT_SI_S_N "s") (UNSPEC_VCVT_SI_U_N "u") - (UNSPEC_VQMOVN_S "s") (UNSPEC_VQMOVN_U "u") - (UNSPEC_VMOVL_S "s") (UNSPEC_VMOVL_U "u") - (UNSPEC_VSHL_S "s") (UNSPEC_VSHL_U "u") -@@ -725,13 +800,30 @@ - (UNSPEC_VSHLL_S_N "s") (UNSPEC_VSHLL_U_N "u") - (UNSPEC_VSRA_S_N "s") (UNSPEC_VSRA_U_N "u") - (UNSPEC_VRSRA_S_N "s") (UNSPEC_VRSRA_U_N "u") -- -+ (UNSPEC_VCVTH_S "s") (UNSPEC_VCVTH_U "u") - ]) - -+(define_int_attr vcvth_op -+ [(UNSPEC_VCVTA_S "a") (UNSPEC_VCVTA_U "a") -+ (UNSPEC_VCVTM_S "m") (UNSPEC_VCVTM_U "m") -+ (UNSPEC_VCVTN_S "n") (UNSPEC_VCVTN_U "n") -+ (UNSPEC_VCVTP_S "p") (UNSPEC_VCVTP_U "p")]) -+ -+(define_int_attr fp16_rnd_str -+ [(UNSPEC_VRND "rnd") (UNSPEC_VRNDA "rnda") -+ (UNSPEC_VRNDM "rndm") (UNSPEC_VRNDN "rndn") -+ (UNSPEC_VRNDP "rndp") (UNSPEC_VRNDX "rndx")]) -+ -+(define_int_attr fp16_rnd_insn -+ [(UNSPEC_VRND "vrintz") (UNSPEC_VRNDA "vrinta") -+ (UNSPEC_VRNDM "vrintm") (UNSPEC_VRNDN "vrintn") -+ (UNSPEC_VRNDP "vrintp") (UNSPEC_VRNDX "vrintx")]) -+ - (define_int_attr cmp_op_unsp [(UNSPEC_VCEQ "eq") (UNSPEC_VCGT "gt") -- (UNSPEC_VCGE "ge") (UNSPEC_VCLE "le") -- (UNSPEC_VCLT "lt") (UNSPEC_VCAGE "ge") -- (UNSPEC_VCAGT "gt")]) -+ (UNSPEC_VCGE "ge") (UNSPEC_VCLE "le") -+ (UNSPEC_VCLT "lt") (UNSPEC_VCAGE "ge") -+ (UNSPEC_VCAGT "gt") (UNSPEC_VCALE "le") -+ (UNSPEC_VCALT "lt")]) - - (define_int_attr r [ - (UNSPEC_VRHADD_S "r") (UNSPEC_VRHADD_U "r") -@@ -847,3 +939,7 @@ - - ;; Attributes for VQRDMLAH/VQRDMLSH - (define_int_attr neon_rdma_as [(UNSPEC_VQRDMLAH "a") (UNSPEC_VQRDMLSH "s")]) -+ -+;; Attributes for VFMA_LANE/ VFMS_LANE -+(define_int_attr neon_vfm_lane_as -+ [(UNSPEC_VFMA_LANE "a") (UNSPEC_VFMS_LANE "s")]) ---- a/src/gcc/config/arm/neon-testgen.ml -+++ b/src//dev/null -@@ -1,324 +0,0 @@ --(* Auto-generate ARM Neon intrinsics tests. -- Copyright (C) 2006-2016 Free Software Foundation, Inc. -- Contributed by CodeSourcery. -- -- This file is part of GCC. -- -- GCC is free software; you can redistribute it and/or modify it under -- the terms of the GNU General Public License as published by the Free -- Software Foundation; either version 3, or (at your option) any later -- version. -- -- GCC is distributed in the hope that it will be useful, but WITHOUT ANY -- WARRANTY; without even the implied warranty of MERCHANTABILITY or -- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- for more details. -- -- You should have received a copy of the GNU General Public License -- along with GCC; see the file COPYING3. If not see -- <http://www.gnu.org/licenses/>. -- -- This is an O'Caml program. The O'Caml compiler is available from: -- -- http://caml.inria.fr/ -- -- Or from your favourite OS's friendly packaging system. Tested with version -- 3.09.2, though other versions will probably work too. -- -- Compile with: -- ocamlc -c neon.ml -- ocamlc -o neon-testgen neon.cmo neon-testgen.ml -- -- Run with: -- cd /path/to/gcc/testsuite/gcc.target/arm/neon -- /path/to/neon-testgen --*) -- --open Neon -- --type c_type_flags = Pointer | Const -- --(* Open a test source file. *) --let open_test_file dir name = -- try -- open_out (dir ^ "/" ^ name ^ ".c") -- with Sys_error str -> -- failwith ("Could not create test source file " ^ name ^ ": " ^ str) -- --(* Emit prologue code to a test source file. *) --let emit_prologue chan test_name effective_target compile_test_optim = -- Printf.fprintf chan "/* Test the `%s' ARM Neon intrinsic. */\n" test_name; -- Printf.fprintf chan "/* This file was autogenerated by neon-testgen. */\n\n"; -- Printf.fprintf chan "/* { dg-do assemble } */\n"; -- Printf.fprintf chan "/* { dg-require-effective-target %s_ok } */\n" -- effective_target; -- Printf.fprintf chan "/* { dg-options \"-save-temps %s\" } */\n" compile_test_optim; -- Printf.fprintf chan "/* { dg-add-options %s } */\n" effective_target; -- Printf.fprintf chan "\n#include \"arm_neon.h\"\n\n" -- --(* Emit declarations of variables that are going to be passed -- to an intrinsic, together with one to take a returned value if needed. *) --let emit_variables chan c_types features spaces = -- let emit () = -- ignore ( -- List.fold_left (fun arg_number -> fun (flags, ty) -> -- let pointer_bit = -- if List.mem Pointer flags then "*" else "" -- in -- (* Const arguments to builtins are directly -- written in as constants. *) -- if not (List.mem Const flags) then -- Printf.fprintf chan "%s%s %sarg%d_%s;\n" -- spaces ty pointer_bit arg_number ty; -- arg_number + 1) -- 0 (List.tl c_types)) -- in -- match c_types with -- (_, return_ty) :: tys -> -- if return_ty <> "void" then begin -- (* The intrinsic returns a value. We need to do explicit register -- allocation for vget_low tests or they fail because of copy -- elimination. *) -- ((if List.mem Fixed_vector_reg features then -- Printf.fprintf chan "%sregister %s out_%s asm (\"d18\");\n" -- spaces return_ty return_ty -- else if List.mem Fixed_core_reg features then -- Printf.fprintf chan "%sregister %s out_%s asm (\"r0\");\n" -- spaces return_ty return_ty -- else -- Printf.fprintf chan "%s%s out_%s;\n" spaces return_ty return_ty); -- emit ()) -- end else -- (* The intrinsic does not return a value. *) -- emit () -- | _ -> assert false -- --(* Emit code to call an intrinsic. *) --let emit_call chan const_valuator c_types name elt_ty = -- (if snd (List.hd c_types) <> "void" then -- Printf.fprintf chan " out_%s = " (snd (List.hd c_types)) -- else -- Printf.fprintf chan " "); -- Printf.fprintf chan "%s_%s (" (intrinsic_name name) (string_of_elt elt_ty); -- let print_arg chan arg_number (flags, ty) = -- (* If the argument is of const type, then directly write in the -- constant now. *) -- if List.mem Const flags then -- match const_valuator with -- None -> -- if List.mem Pointer flags then -- Printf.fprintf chan "0" -- else -- Printf.fprintf chan "1" -- | Some f -> Printf.fprintf chan "%s" (string_of_int (f arg_number)) -- else -- Printf.fprintf chan "arg%d_%s" arg_number ty -- in -- let rec print_args arg_number tys = -- match tys with -- [] -> () -- | [ty] -> print_arg chan arg_number ty -- | ty::tys -> -- print_arg chan arg_number ty; -- Printf.fprintf chan ", "; -- print_args (arg_number + 1) tys -- in -- print_args 0 (List.tl c_types); -- Printf.fprintf chan ");\n" -- --(* Emit epilogue code to a test source file. *) --let emit_epilogue chan features regexps = -- let no_op = List.exists (fun feature -> feature = No_op) features in -- Printf.fprintf chan "}\n\n"; -- if not no_op then -- List.iter (fun regexp -> -- Printf.fprintf chan -- "/* { dg-final { scan-assembler \"%s\" } } */\n" regexp) -- regexps -- else -- () -- -- --(* Check a list of C types to determine which ones are pointers and which -- ones are const. *) --let check_types tys = -- let tys' = -- List.map (fun ty -> -- let len = String.length ty in -- if len > 2 && String.get ty (len - 2) = ' ' -- && String.get ty (len - 1) = '*' -- then ([Pointer], String.sub ty 0 (len - 2)) -- else ([], ty)) tys -- in -- List.map (fun (flags, ty) -> -- if String.length ty > 6 && String.sub ty 0 6 = "const " -- then (Const :: flags, String.sub ty 6 ((String.length ty) - 6)) -- else (flags, ty)) tys' -- --(* Work out what the effective target should be. *) --let effective_target features = -- try -- match List.find (fun feature -> -- match feature with Requires_feature _ -> true -- | Requires_arch _ -> true -- | Requires_FP_bit 1 -> true -- | _ -> false) -- features with -- Requires_feature "FMA" -> "arm_neonv2" -- | Requires_feature "CRYPTO" -> "arm_crypto" -- | Requires_arch 8 -> "arm_v8_neon" -- | Requires_FP_bit 1 -> "arm_neon_fp16" -- | _ -> assert false -- with Not_found -> "arm_neon" -- --(* Work out what the testcase optimization level should be, default to -O0. *) --let compile_test_optim features = -- try -- match List.find (fun feature -> -- match feature with Compiler_optim _ -> true -- | _ -> false) -- features with -- Compiler_optim opt -> opt -- | _ -> assert false -- with Not_found -> "-O0" -- --(* Given an intrinsic shape, produce a regexp that will match -- the right-hand sides of instructions generated by an intrinsic of -- that shape. *) --let rec analyze_shape shape = -- let rec n_things n thing = -- match n with -- 0 -> [] -- | n -> thing :: (n_things (n - 1) thing) -- in -- let rec analyze_shape_elt elt = -- match elt with -- Dreg -> "\\[dD\\]\\[0-9\\]+" -- | Qreg -> "\\[qQ\\]\\[0-9\\]+" -- | Corereg -> "\\[rR\\]\\[0-9\\]+" -- | Immed -> "#\\[0-9\\]+" -- | VecArray (1, elt) -> -- let elt_regexp = analyze_shape_elt elt in -- "((\\\\\\{" ^ elt_regexp ^ "\\\\\\})|(" ^ elt_regexp ^ "))" -- | VecArray (n, elt) -> -- let elt_regexp = analyze_shape_elt elt in -- let alt1 = elt_regexp ^ "-" ^ elt_regexp in -- let alt2 = commas (fun x -> x) (n_things n elt_regexp) "" in -- "\\\\\\{((" ^ alt1 ^ ")|(" ^ alt2 ^ "))\\\\\\}" -- | (PtrTo elt | CstPtrTo elt) -> -- "\\\\\\[" ^ (analyze_shape_elt elt) ^ "\\(:\\[0-9\\]+\\)?\\\\\\]" -- | Element_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]" -- | Element_of_qreg -> (analyze_shape_elt Qreg) ^ "\\\\\\[\\[0-9\\]+\\\\\\]" -- | All_elements_of_dreg -> (analyze_shape_elt Dreg) ^ "\\\\\\[\\\\\\]" -- | Alternatives (elts) -> "(" ^ (String.concat "|" (List.map analyze_shape_elt elts)) ^ ")" -- in -- match shape with -- All (n, elt) -> commas analyze_shape_elt (n_things n elt) "" -- | Long -> (analyze_shape_elt Qreg) ^ ", " ^ (analyze_shape_elt Dreg) ^ -- ", " ^ (analyze_shape_elt Dreg) -- | Long_noreg elt -> (analyze_shape_elt elt) ^ ", " ^ (analyze_shape_elt elt) -- | Wide -> (analyze_shape_elt Qreg) ^ ", " ^ (analyze_shape_elt Qreg) ^ -- ", " ^ (analyze_shape_elt Dreg) -- | Wide_noreg elt -> analyze_shape (Long_noreg elt) -- | Narrow -> (analyze_shape_elt Dreg) ^ ", " ^ (analyze_shape_elt Qreg) ^ -- ", " ^ (analyze_shape_elt Qreg) -- | Use_operands elts -> commas analyze_shape_elt (Array.to_list elts) "" -- | By_scalar Dreg -> -- analyze_shape (Use_operands [| Dreg; Dreg; Element_of_dreg |]) -- | By_scalar Qreg -> -- analyze_shape (Use_operands [| Qreg; Qreg; Element_of_dreg |]) -- | By_scalar _ -> assert false -- | Wide_lane -> -- analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |]) -- | Wide_scalar -> -- analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |]) -- | Pair_result elt -> -- let elt_regexp = analyze_shape_elt elt in -- elt_regexp ^ ", " ^ elt_regexp -- | Unary_scalar _ -> "FIXME Unary_scalar" -- | Binary_imm elt -> analyze_shape (Use_operands [| elt; elt; Immed |]) -- | Narrow_imm -> analyze_shape (Use_operands [| Dreg; Qreg; Immed |]) -- | Long_imm -> analyze_shape (Use_operands [| Qreg; Dreg; Immed |]) -- --(* Generate tests for one intrinsic. *) --let test_intrinsic dir opcode features shape name munge elt_ty = -- (* Open the test source file. *) -- let test_name = name ^ (string_of_elt elt_ty) in -- let chan = open_test_file dir test_name in -- (* Work out what argument and return types the intrinsic has. *) -- let c_arity, new_elt_ty = munge shape elt_ty in -- let c_types = check_types (strings_of_arity c_arity) in -- (* Extract any constant valuator (a function specifying what constant -- values are to be written into the intrinsic call) from the features -- list. *) -- let const_valuator = -- try -- match (List.find (fun feature -> match feature with -- Const_valuator _ -> true -- | _ -> false) features) with -- Const_valuator f -> Some f -- | _ -> assert false -- with Not_found -> None -- in -- (* Work out what instruction name(s) to expect. *) -- let insns = get_insn_names features name in -- let no_suffix = (new_elt_ty = NoElts) in -- let insns = -- if no_suffix then insns -- else List.map (fun insn -> -- let suffix = string_of_elt_dots new_elt_ty in -- insn ^ "\\." ^ suffix) insns -- in -- (* Construct a regexp to match against the expected instruction name(s). *) -- let insn_regexp = -- match insns with -- [] -> assert false -- | [insn] -> insn -- | _ -> -- let rec calc_regexp insns cur_regexp = -- match insns with -- [] -> cur_regexp -- | [insn] -> cur_regexp ^ "(" ^ insn ^ "))" -- | insn::insns -> calc_regexp insns (cur_regexp ^ "(" ^ insn ^ ")|") -- in calc_regexp insns "(" -- in -- (* Construct regexps to match against the instructions that this -- intrinsic expands to. Watch out for any writeback character and -- comments after the instruction. *) -- let regexps = List.map (fun regexp -> insn_regexp ^ "\\[ \t\\]+" ^ regexp ^ -- "!?\\(\\[ \t\\]+@\\[a-zA-Z0-9 \\]+\\)?\\n") -- (analyze_all_shapes features shape analyze_shape) -- in -- let effective_target = effective_target features in -- let compile_test_optim = compile_test_optim features -- in -- (* Emit file and function prologues. *) -- emit_prologue chan test_name effective_target compile_test_optim; -- -- if (compare compile_test_optim "-O0") <> 0 then -- (* Emit variable declarations. *) -- emit_variables chan c_types features ""; -- -- Printf.fprintf chan "void test_%s (void)\n{\n" test_name; -- -- if compare compile_test_optim "-O0" = 0 then -- (* Emit variable declarations. *) -- emit_variables chan c_types features " "; -- -- Printf.fprintf chan "\n"; -- (* Emit the call to the intrinsic. *) -- emit_call chan const_valuator c_types name elt_ty; -- (* Emit the function epilogue and the DejaGNU scan-assembler directives. *) -- emit_epilogue chan features regexps; -- (* Close the test file. *) -- close_out chan -- --(* Generate tests for one element of the "ops" table. *) --let test_intrinsic_group dir (opcode, features, shape, name, munge, types) = -- List.iter (test_intrinsic dir opcode features shape name munge) types -- --(* Program entry point. *) --let _ = -- let directory = if Array.length Sys.argv <> 1 then Sys.argv.(1) else "." in -- List.iter (test_intrinsic_group directory) (reinterp @ reinterpq @ ops) -- ---- a/src/gcc/config/arm/neon.md -+++ b/src/gcc/config/arm/neon.md -@@ -406,7 +406,7 @@ - (match_operand:SI 2 "immediate_operand" "")] - "TARGET_NEON" - { -- HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); -+ HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]); - emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1], - GEN_INT (elem), operands[0])); - DONE; -@@ -505,6 +505,20 @@ - (const_string "neon_add<q>")))] - ) - -+(define_insn "add<mode>3_fp16" -+ [(set -+ (match_operand:VH 0 "s_register_operand" "=w") -+ (plus:VH -+ (match_operand:VH 1 "s_register_operand" "w") -+ (match_operand:VH 2 "s_register_operand" "w")))] -+ "TARGET_NEON_FP16INST" -+ "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" -+ [(set (attr "type") -+ (if_then_else (match_test "<Is_float_mode>") -+ (const_string "neon_fp_addsub_s<q>") -+ (const_string "neon_add<q>")))] -+) -+ - (define_insn "adddi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r") - (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r") -@@ -543,6 +557,17 @@ - (const_string "neon_sub<q>")))] - ) - -+(define_insn "sub<mode>3_fp16" -+ [(set -+ (match_operand:VH 0 "s_register_operand" "=w") -+ (minus:VH -+ (match_operand:VH 1 "s_register_operand" "w") -+ (match_operand:VH 2 "s_register_operand" "w")))] -+ "TARGET_NEON_FP16INST" -+ "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" -+ [(set_attr "type" "neon_sub<q>")] -+) -+ - (define_insn "subdi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w") - (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w") -@@ -591,6 +616,16 @@ - (const_string "neon_mla_<V_elem_ch><q>")))] - ) - -+(define_insn "mul<mode>3add<mode>_neon" -+ [(set (match_operand:VH 0 "s_register_operand" "=w") -+ (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w") -+ (match_operand:VH 3 "s_register_operand" "w")) -+ (match_operand:VH 1 "s_register_operand" "0")))] -+ "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)" -+ "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3" -+ [(set_attr "type" "neon_fp_mla_s<q>")] -+) -+ - (define_insn "mul<mode>3neg<mode>add<mode>_neon" - [(set (match_operand:VDQW 0 "s_register_operand" "=w") - (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0") -@@ -629,6 +664,19 @@ - [(set_attr "type" "neon_fp_mla_s<q>")] - ) - -+;; There is limited support for unsafe-math optimizations using the NEON FP16 -+;; arithmetic instructions, so only the intrinsic is currently supported. -+(define_insn "fma<VH:mode>4_intrinsic" -+ [(set (match_operand:VH 0 "register_operand" "=w") -+ (fma:VH -+ (match_operand:VH 1 "register_operand" "w") -+ (match_operand:VH 2 "register_operand" "w") -+ (match_operand:VH 3 "register_operand" "0")))] -+ "TARGET_NEON_FP16INST" -+ "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" -+ [(set_attr "type" "neon_fp_mla_s<q>")] -+) -+ - (define_insn "*fmsub<VCVTF:mode>4" - [(set (match_operand:VCVTF 0 "register_operand" "=w") - (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) -@@ -640,13 +688,25 @@ - ) - - (define_insn "fmsub<VCVTF:mode>4_intrinsic" -- [(set (match_operand:VCVTF 0 "register_operand" "=w") -- (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) -- (match_operand:VCVTF 2 "register_operand" "w") -- (match_operand:VCVTF 3 "register_operand" "0")))] -- "TARGET_NEON && TARGET_FMA" -- "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" -- [(set_attr "type" "neon_fp_mla_s<q>")] -+ [(set (match_operand:VCVTF 0 "register_operand" "=w") -+ (fma:VCVTF -+ (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) -+ (match_operand:VCVTF 2 "register_operand" "w") -+ (match_operand:VCVTF 3 "register_operand" "0")))] -+ "TARGET_NEON && TARGET_FMA" -+ "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" -+ [(set_attr "type" "neon_fp_mla_s<q>")] -+) -+ -+(define_insn "fmsub<VH:mode>4_intrinsic" -+ [(set (match_operand:VH 0 "register_operand" "=w") -+ (fma:VH -+ (neg:VH (match_operand:VH 1 "register_operand" "w")) -+ (match_operand:VH 2 "register_operand" "w") -+ (match_operand:VH 3 "register_operand" "0")))] -+ "TARGET_NEON_FP16INST" -+ "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" -+ [(set_attr "type" "neon_fp_mla_s<q>")] - ) - - (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>" -@@ -860,6 +920,44 @@ - "" - ) - -+(define_insn "<absneg_str><mode>2" -+ [(set (match_operand:VH 0 "s_register_operand" "=w") -+ (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))] -+ "TARGET_NEON_FP16INST" -+ "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1" -+ [(set_attr "type" "neon_abs<q>")] -+) -+ -+(define_expand "neon_v<absneg_str><mode>" -+ [(set -+ (match_operand:VH 0 "s_register_operand") -+ (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))] -+ "TARGET_NEON_FP16INST" -+{ -+ emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1])); -+ DONE; -+}) -+ -+(define_insn "neon_v<fp16_rnd_str><mode>" -+ [(set (match_operand:VH 0 "s_register_operand" "=w") -+ (unspec:VH -+ [(match_operand:VH 1 "s_register_operand" "w")] -+ FP16_RND))] -+ "TARGET_NEON_FP16INST" -+ "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1" -+ [(set_attr "type" "neon_fp_round_s<q>")] -+) -+ -+(define_insn "neon_vrsqrte<mode>" -+ [(set (match_operand:VH 0 "s_register_operand" "=w") -+ (unspec:VH -+ [(match_operand:VH 1 "s_register_operand" "w")] -+ UNSPEC_VRSQRTE))] -+ "TARGET_NEON_FP16INST" -+ "vrsqrte.f16\t%<V_reg>0, %<V_reg>1" -+ [(set_attr "type" "neon_fp_rsqrte_s<q>")] -+) -+ - (define_insn "*umin<mode>3_neon" - [(set (match_operand:VDQIW 0 "s_register_operand" "=w") - (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w") -@@ -1208,16 +1306,133 @@ - - ;; Widening operations - -+(define_expand "widen_ssum<mode>3" -+ [(set (match_operand:<V_double_width> 0 "s_register_operand" "") -+ (plus:<V_double_width> -+ (sign_extend:<V_double_width> -+ (match_operand:VQI 1 "s_register_operand" "")) -+ (match_operand:<V_double_width> 2 "s_register_operand" "")))] -+ "TARGET_NEON" -+ { -+ machine_mode mode = GET_MODE (operands[1]); -+ rtx p1, p2; -+ -+ p1 = arm_simd_vect_par_cnst_half (mode, false); -+ p2 = arm_simd_vect_par_cnst_half (mode, true); -+ -+ if (operands[0] != operands[2]) -+ emit_move_insn (operands[0], operands[2]); -+ -+ emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0], -+ operands[1], -+ p1, -+ operands[0])); -+ emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0], -+ operands[1], -+ p2, -+ operands[0])); -+ DONE; -+ } -+) -+ -+(define_insn "vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3" -+ [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w") -+ (plus:<VW:V_widen> -+ (sign_extend:<VW:V_widen> -+ (vec_select:VW -+ (match_operand:VQI 1 "s_register_operand" "%w") -+ (match_operand:VQI 2 "vect_par_constant_low" ""))) -+ (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))] -+ "TARGET_NEON" -+{ -+ return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" : -+ "vaddw.<V_s_elem>\t%q0, %q3, %e1"; -+} -+ [(set_attr "type" "neon_add_widen")]) -+ -+(define_insn "vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3" -+ [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w") -+ (plus:<VW:V_widen> -+ (sign_extend:<VW:V_widen> -+ (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w") -+ (match_operand:VQI 2 "vect_par_constant_high" ""))) -+ (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))] -+ "TARGET_NEON" -+{ -+ return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" : -+ "vaddw.<V_s_elem>\t%q0, %q3, %f1"; -+} -+ [(set_attr "type" "neon_add_widen")]) -+ - (define_insn "widen_ssum<mode>3" - [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") -- (plus:<V_widen> (sign_extend:<V_widen> -- (match_operand:VW 1 "s_register_operand" "%w")) -- (match_operand:<V_widen> 2 "s_register_operand" "w")))] -+ (plus:<V_widen> -+ (sign_extend:<V_widen> -+ (match_operand:VW 1 "s_register_operand" "%w")) -+ (match_operand:<V_widen> 2 "s_register_operand" "w")))] - "TARGET_NEON" - "vaddw.<V_s_elem>\t%q0, %q2, %P1" - [(set_attr "type" "neon_add_widen")] - ) - -+(define_expand "widen_usum<mode>3" -+ [(set (match_operand:<V_double_width> 0 "s_register_operand" "") -+ (plus:<V_double_width> -+ (zero_extend:<V_double_width> -+ (match_operand:VQI 1 "s_register_operand" "")) -+ (match_operand:<V_double_width> 2 "s_register_operand" "")))] -+ "TARGET_NEON" -+ { -+ machine_mode mode = GET_MODE (operands[1]); -+ rtx p1, p2; -+ -+ p1 = arm_simd_vect_par_cnst_half (mode, false); -+ p2 = arm_simd_vect_par_cnst_half (mode, true); -+ -+ if (operands[0] != operands[2]) -+ emit_move_insn (operands[0], operands[2]); -+ -+ emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0], -+ operands[1], -+ p1, -+ operands[0])); -+ emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0], -+ operands[1], -+ p2, -+ operands[0])); -+ DONE; -+ } -+) -+ -+(define_insn "vec_sel_widen_usum_lo<VQI:mode><VW:mode>3" -+ [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w") -+ (plus:<VW:V_widen> -+ (zero_extend:<VW:V_widen> -+ (vec_select:VW -+ (match_operand:VQI 1 "s_register_operand" "%w") -+ (match_operand:VQI 2 "vect_par_constant_low" ""))) -+ (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))] -+ "TARGET_NEON" -+{ -+ return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" : -+ "vaddw.<V_u_elem>\t%q0, %q3, %e1"; -+} -+ [(set_attr "type" "neon_add_widen")]) -+ -+(define_insn "vec_sel_widen_usum_hi<VQI:mode><VW:mode>3" -+ [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w") -+ (plus:<VW:V_widen> -+ (zero_extend:<VW:V_widen> -+ (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w") -+ (match_operand:VQI 2 "vect_par_constant_high" ""))) -+ (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))] -+ "TARGET_NEON" -+{ -+ return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" : -+ "vaddw.<V_u_elem>\t%q0, %q3, %f1"; -+} -+ [(set_attr "type" "neon_add_widen")]) -+ - (define_insn "widen_usum<mode>3" - [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") - (plus:<V_widen> (zero_extend:<V_widen> -@@ -1488,6 +1703,17 @@ - (const_string "neon_reduc_add<q>")))] - ) - -+(define_insn "neon_vpaddv4hf" -+ [(set -+ (match_operand:V4HF 0 "s_register_operand" "=w") -+ (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w") -+ (match_operand:V4HF 2 "s_register_operand" "w")] -+ UNSPEC_VPADD))] -+ "TARGET_NEON_FP16INST" -+ "vpadd.f16\t%P0, %P1, %P2" -+ [(set_attr "type" "neon_reduc_add")] -+) -+ - (define_insn "neon_vpsmin<mode>" - [(set (match_operand:VD 0 "s_register_operand" "=w") - (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") -@@ -1836,6 +2062,26 @@ - DONE; - }) - -+(define_expand "neon_vadd<mode>" -+ [(match_operand:VH 0 "s_register_operand") -+ (match_operand:VH 1 "s_register_operand") -+ (match_operand:VH 2 "s_register_operand")] -+ "TARGET_NEON_FP16INST" -+{ -+ emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2])); -+ DONE; -+}) -+ -+(define_expand "neon_vsub<mode>" -+ [(match_operand:VH 0 "s_register_operand") -+ (match_operand:VH 1 "s_register_operand") -+ (match_operand:VH 2 "s_register_operand")] -+ "TARGET_NEON_FP16INST" -+{ -+ emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2])); -+ DONE; -+}) -+ - ; Note that NEON operations don't support the full IEEE 754 standard: in - ; particular, denormal values are flushed to zero. This means that GCC cannot - ; use those instructions for autovectorization, etc. unless -@@ -1927,6 +2173,17 @@ - (const_string "neon_mul_<V_elem_ch><q>")))] - ) - -+(define_insn "neon_vmulf<mode>" -+ [(set -+ (match_operand:VH 0 "s_register_operand" "=w") -+ (mult:VH -+ (match_operand:VH 1 "s_register_operand" "w") -+ (match_operand:VH 2 "s_register_operand" "w")))] -+ "TARGET_NEON_FP16INST" -+ "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2" -+ [(set_attr "type" "neon_mul_<VH_elem_ch><q>")] -+) -+ - (define_expand "neon_vmla<mode>" - [(match_operand:VDQW 0 "s_register_operand" "=w") - (match_operand:VDQW 1 "s_register_operand" "0") -@@ -1955,6 +2212,18 @@ - DONE; - }) - -+(define_expand "neon_vfma<VH:mode>" -+ [(match_operand:VH 0 "s_register_operand") -+ (match_operand:VH 1 "s_register_operand") -+ (match_operand:VH 2 "s_register_operand") -+ (match_operand:VH 3 "s_register_operand")] -+ "TARGET_NEON_FP16INST" -+{ -+ emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3], -+ operands[1])); -+ DONE; -+}) -+ - (define_expand "neon_vfms<VCVTF:mode>" - [(match_operand:VCVTF 0 "s_register_operand") - (match_operand:VCVTF 1 "s_register_operand") -@@ -1967,6 +2236,18 @@ - DONE; - }) - -+(define_expand "neon_vfms<VH:mode>" -+ [(match_operand:VH 0 "s_register_operand") -+ (match_operand:VH 1 "s_register_operand") -+ (match_operand:VH 2 "s_register_operand") -+ (match_operand:VH 3 "s_register_operand")] -+ "TARGET_NEON_FP16INST" -+{ -+ emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3], -+ operands[1])); -+ DONE; -+}) -+ - ; Used for intrinsics when flag_unsafe_math_optimizations is false. - - (define_insn "neon_vmla<mode>_unspec" -@@ -2267,6 +2548,72 @@ - [(set_attr "type" "neon_fp_compare_s<q>")] - ) - -+(define_expand "neon_vc<cmp_op><mode>" -+ [(match_operand:<V_cmp_result> 0 "s_register_operand") -+ (neg:<V_cmp_result> -+ (COMPARISONS:VH -+ (match_operand:VH 1 "s_register_operand") -+ (match_operand:VH 2 "reg_or_zero_operand")))] -+ "TARGET_NEON_FP16INST" -+{ -+ /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations -+ are enabled. */ -+ if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT -+ && !flag_unsafe_math_optimizations) -+ emit_insn -+ (gen_neon_vc<cmp_op><mode>_fp16insn_unspec -+ (operands[0], operands[1], operands[2])); -+ else -+ emit_insn -+ (gen_neon_vc<cmp_op><mode>_fp16insn -+ (operands[0], operands[1], operands[2])); -+ DONE; -+}) -+ -+(define_insn "neon_vc<cmp_op><mode>_fp16insn" -+ [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") -+ (neg:<V_cmp_result> -+ (COMPARISONS:<V_cmp_result> -+ (match_operand:VH 1 "s_register_operand" "w,w") -+ (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))] -+ "TARGET_NEON_FP16INST -+ && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT -+ && !flag_unsafe_math_optimizations)" -+{ -+ char pattern[100]; -+ sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0," -+ " %%<V_reg>1, %s", -+ GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT -+ ? "f" : "<cmp_type>", -+ which_alternative == 0 -+ ? "%<V_reg>2" : "#0"); -+ output_asm_insn (pattern, operands); -+ return ""; -+} -+ [(set (attr "type") -+ (if_then_else (match_operand 2 "zero_operand") -+ (const_string "neon_compare_zero<q>") -+ (const_string "neon_compare<q>")))]) -+ -+(define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec" -+ [(set -+ (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") -+ (unspec:<V_cmp_result> -+ [(match_operand:VH 1 "s_register_operand" "w,w") -+ (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")] -+ NEON_VCMP))] -+ "TARGET_NEON_FP16INST" -+{ -+ char pattern[100]; -+ sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0," -+ " %%<V_reg>1, %s", -+ which_alternative == 0 -+ ? "%<V_reg>2" : "#0"); -+ output_asm_insn (pattern, operands); -+ return ""; -+} -+ [(set_attr "type" "neon_fp_compare_s<q>")]) -+ - (define_insn "neon_vc<cmp_op>u<mode>" - [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") - (neg:<V_cmp_result> -@@ -2318,6 +2665,60 @@ - [(set_attr "type" "neon_fp_compare_s<q>")] - ) - -+(define_expand "neon_vca<cmp_op><mode>" -+ [(set -+ (match_operand:<V_cmp_result> 0 "s_register_operand") -+ (neg:<V_cmp_result> -+ (GLTE:<V_cmp_result> -+ (abs:VH (match_operand:VH 1 "s_register_operand")) -+ (abs:VH (match_operand:VH 2 "s_register_operand")))))] -+ "TARGET_NEON_FP16INST" -+{ -+ if (flag_unsafe_math_optimizations) -+ emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn -+ (operands[0], operands[1], operands[2])); -+ else -+ emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec -+ (operands[0], operands[1], operands[2])); -+ DONE; -+}) -+ -+(define_insn "neon_vca<cmp_op><mode>_fp16insn" -+ [(set -+ (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") -+ (neg:<V_cmp_result> -+ (GLTE:<V_cmp_result> -+ (abs:VH (match_operand:VH 1 "s_register_operand" "w")) -+ (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))] -+ "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations" -+ "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" -+ [(set_attr "type" "neon_fp_compare_s<q>")] -+) -+ -+(define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec" -+ [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") -+ (unspec:<V_cmp_result> -+ [(match_operand:VH 1 "s_register_operand" "w") -+ (match_operand:VH 2 "s_register_operand" "w")] -+ NEON_VAGLTE))] -+ "TARGET_NEON" -+ "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" -+ [(set_attr "type" "neon_fp_compare_s<q>")] -+) -+ -+(define_expand "neon_vc<cmp_op>z<mode>" -+ [(set -+ (match_operand:<V_cmp_result> 0 "s_register_operand") -+ (COMPARISONS:<V_cmp_result> -+ (match_operand:VH 1 "s_register_operand") -+ (const_int 0)))] -+ "TARGET_NEON_FP16INST" -+ { -+ emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1], -+ CONST0_RTX (<MODE>mode))); -+ DONE; -+}) -+ - (define_insn "neon_vtst<mode>" - [(set (match_operand:VDQIW 0 "s_register_operand" "=w") - (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") -@@ -2338,6 +2739,16 @@ - [(set_attr "type" "neon_abd<q>")] - ) - -+(define_insn "neon_vabd<mode>" -+ [(set (match_operand:VH 0 "s_register_operand" "=w") -+ (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") -+ (match_operand:VH 2 "s_register_operand" "w")] -+ UNSPEC_VABD_F))] -+ "TARGET_NEON_FP16INST" -+ "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" -+ [(set_attr "type" "neon_abd<q>")] -+) -+ - (define_insn "neon_vabdf<mode>" - [(set (match_operand:VCVTF 0 "s_register_operand" "=w") - (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") -@@ -2400,6 +2811,51 @@ - [(set_attr "type" "neon_fp_minmax_s<q>")] - ) - -+(define_insn "neon_v<maxmin>f<mode>" -+ [(set (match_operand:VH 0 "s_register_operand" "=w") -+ (unspec:VH -+ [(match_operand:VH 1 "s_register_operand" "w") -+ (match_operand:VH 2 "s_register_operand" "w")] -+ VMAXMINF))] -+ "TARGET_NEON_FP16INST" -+ "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" -+ [(set_attr "type" "neon_fp_minmax_s<q>")] -+) -+ -+(define_insn "neon_vp<maxmin>fv4hf" -+ [(set (match_operand:V4HF 0 "s_register_operand" "=w") -+ (unspec:V4HF -+ [(match_operand:V4HF 1 "s_register_operand" "w") -+ (match_operand:V4HF 2 "s_register_operand" "w")] -+ VPMAXMINF))] -+ "TARGET_NEON_FP16INST" -+ "vp<maxmin>.f16\t%P0, %P1, %P2" -+ [(set_attr "type" "neon_reduc_minmax")] -+) -+ -+(define_insn "neon_<fmaxmin_op><mode>" -+ [(set -+ (match_operand:VH 0 "s_register_operand" "=w") -+ (unspec:VH -+ [(match_operand:VH 1 "s_register_operand" "w") -+ (match_operand:VH 2 "s_register_operand" "w")] -+ VMAXMINFNM))] -+ "TARGET_NEON_FP16INST" -+ "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" -+ [(set_attr "type" "neon_fp_minmax_s<q>")] -+) -+ -+;; v<maxmin>nm intrinsics. -+(define_insn "neon_<fmaxmin_op><mode>" -+ [(set (match_operand:VCVTF 0 "s_register_operand" "=w") -+ (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") -+ (match_operand:VCVTF 2 "s_register_operand" "w")] -+ VMAXMINFNM))] -+ "TARGET_NEON && TARGET_FPU_ARMV8" -+ "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" -+ [(set_attr "type" "neon_fp_minmax_s<q>")] -+) -+ - ;; Vector forms for the IEEE-754 fmax()/fmin() functions - (define_insn "<fmaxmin><mode>3" - [(set (match_operand:VCVTF 0 "s_register_operand" "=w") -@@ -2471,6 +2927,17 @@ - [(set_attr "type" "neon_fp_recps_s<q>")] - ) - -+(define_insn "neon_vrecps<mode>" -+ [(set -+ (match_operand:VH 0 "s_register_operand" "=w") -+ (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") -+ (match_operand:VH 2 "s_register_operand" "w")] -+ UNSPEC_VRECPS))] -+ "TARGET_NEON_FP16INST" -+ "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" -+ [(set_attr "type" "neon_fp_recps_s<q>")] -+) -+ - (define_insn "neon_vrsqrts<mode>" - [(set (match_operand:VCVTF 0 "s_register_operand" "=w") - (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") -@@ -2481,6 +2948,17 @@ - [(set_attr "type" "neon_fp_rsqrts_s<q>")] - ) - -+(define_insn "neon_vrsqrts<mode>" -+ [(set -+ (match_operand:VH 0 "s_register_operand" "=w") -+ (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") -+ (match_operand:VH 2 "s_register_operand" "w")] -+ UNSPEC_VRSQRTS))] -+ "TARGET_NEON_FP16INST" -+ "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" -+ [(set_attr "type" "neon_fp_rsqrts_s<q>")] -+) -+ - (define_expand "neon_vabs<mode>" - [(match_operand:VDQW 0 "s_register_operand" "") - (match_operand:VDQW 1 "s_register_operand" "")] -@@ -2596,6 +3074,15 @@ - }) - - (define_insn "neon_vrecpe<mode>" -+ [(set (match_operand:VH 0 "s_register_operand" "=w") -+ (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")] -+ UNSPEC_VRECPE))] -+ "TARGET_NEON_FP16INST" -+ "vrecpe.f16\t%<V_reg>0, %<V_reg>1" -+ [(set_attr "type" "neon_fp_recpe_s<q>")] -+) -+ -+(define_insn "neon_vrecpe<mode>" - [(set (match_operand:V32 0 "s_register_operand" "=w") - (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")] - UNSPEC_VRECPE))] -@@ -2932,6 +3419,28 @@ if (BYTES_BIG_ENDIAN) - [(set_attr "type" "neon_dup<q>")] - ) - -+(define_insn "neon_vdup_lane<mode>_internal" -+ [(set (match_operand:VH 0 "s_register_operand" "=w") -+ (vec_duplicate:VH -+ (vec_select:<V_elem> -+ (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w") -+ (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] -+ "TARGET_NEON && TARGET_FP16" -+{ -+ if (BYTES_BIG_ENDIAN) -+ { -+ int elt = INTVAL (operands[2]); -+ elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt; -+ operands[2] = GEN_INT (elt); -+ } -+ if (<Is_d_reg>) -+ return "vdup.<V_sz_elem>\t%P0, %P1[%c2]"; -+ else -+ return "vdup.<V_sz_elem>\t%q0, %P1[%c2]"; -+} -+ [(set_attr "type" "neon_dup<q>")] -+) -+ - (define_expand "neon_vdup_lane<mode>" - [(match_operand:VDQW 0 "s_register_operand" "=w") - (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w") -@@ -2951,6 +3460,25 @@ if (BYTES_BIG_ENDIAN) - DONE; - }) - -+(define_expand "neon_vdup_lane<mode>" -+ [(match_operand:VH 0 "s_register_operand") -+ (match_operand:<V_double_vector_mode> 1 "s_register_operand") -+ (match_operand:SI 2 "immediate_operand")] -+ "TARGET_NEON && TARGET_FP16" -+{ -+ if (BYTES_BIG_ENDIAN) -+ { -+ unsigned int elt = INTVAL (operands[2]); -+ unsigned int reg_nelts -+ = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode); -+ elt ^= reg_nelts - 1; -+ operands[2] = GEN_INT (elt); -+ } -+ emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1], -+ operands[2])); -+ DONE; -+}) -+ - ; Scalar index is ignored, since only zero is valid here. - (define_expand "neon_vdup_lanedi" - [(match_operand:DI 0 "s_register_operand" "=w") -@@ -3097,6 +3625,28 @@ if (BYTES_BIG_ENDIAN) - [(set_attr "type" "neon_fp_cvt_narrow_s_q")] - ) - -+(define_insn "neon_vcvt<sup><mode>" -+ [(set -+ (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") -+ (unspec:<VH_CVTTO> -+ [(match_operand:VCVTHI 1 "s_register_operand" "w")] -+ VCVT_US))] -+ "TARGET_NEON_FP16INST" -+ "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1" -+ [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")] -+) -+ -+(define_insn "neon_vcvt<sup><mode>" -+ [(set -+ (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") -+ (unspec:<VH_CVTTO> -+ [(match_operand:VH 1 "s_register_operand" "w")] -+ VCVT_US))] -+ "TARGET_NEON_FP16INST" -+ "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1" -+ [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")] -+) -+ - (define_insn "neon_vcvt<sup>_n<mode>" - [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") - (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w") -@@ -3111,6 +3661,20 @@ if (BYTES_BIG_ENDIAN) - ) - - (define_insn "neon_vcvt<sup>_n<mode>" -+ [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") -+ (unspec:<VH_CVTTO> -+ [(match_operand:VH 1 "s_register_operand" "w") -+ (match_operand:SI 2 "immediate_operand" "i")] -+ VCVT_US_N))] -+ "TARGET_NEON_FP16INST" -+{ -+ neon_const_bounds (operands[2], 0, 17); -+ return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2"; -+} -+ [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")] -+) -+ -+(define_insn "neon_vcvt<sup>_n<mode>" - [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") - (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] -@@ -3123,6 +3687,31 @@ if (BYTES_BIG_ENDIAN) - [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")] - ) - -+(define_insn "neon_vcvt<sup>_n<mode>" -+ [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") -+ (unspec:<VH_CVTTO> -+ [(match_operand:VCVTHI 1 "s_register_operand" "w") -+ (match_operand:SI 2 "immediate_operand" "i")] -+ VCVT_US_N))] -+ "TARGET_NEON_FP16INST" -+{ -+ neon_const_bounds (operands[2], 0, 17); -+ return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2"; -+} -+ [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")] -+) -+ -+(define_insn "neon_vcvt<vcvth_op><sup><mode>" -+ [(set -+ (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w") -+ (unspec:<VH_CVTTO> -+ [(match_operand:VH 1 "s_register_operand" "w")] -+ VCVT_HF_US))] -+ "TARGET_NEON_FP16INST" -+ "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1" -+ [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")] -+) -+ - (define_insn "neon_vmovn<mode>" - [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w") - (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")] -@@ -3193,6 +3782,18 @@ if (BYTES_BIG_ENDIAN) - (const_string "neon_mul_<V_elem_ch>_scalar<q>")))] - ) - -+(define_insn "neon_vmul_lane<mode>" -+ [(set (match_operand:VH 0 "s_register_operand" "=w") -+ (unspec:VH [(match_operand:VH 1 "s_register_operand" "w") -+ (match_operand:V4HF 2 "s_register_operand" -+ "<scalar_mul_constraint>") -+ (match_operand:SI 3 "immediate_operand" "i")] -+ UNSPEC_VMUL_LANE))] -+ "TARGET_NEON_FP16INST" -+ "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]" -+ [(set_attr "type" "neon_fp_mul_s_scalar<q>")] -+) -+ - (define_insn "neon_vmull<sup>_lane<mode>" - [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") - (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w") -@@ -3447,6 +4048,19 @@ if (BYTES_BIG_ENDIAN) - DONE; - }) - -+(define_expand "neon_vmul_n<mode>" -+ [(match_operand:VH 0 "s_register_operand") -+ (match_operand:VH 1 "s_register_operand") -+ (match_operand:<V_elem> 2 "s_register_operand")] -+ "TARGET_NEON_FP16INST" -+{ -+ rtx tmp = gen_reg_rtx (V4HFmode); -+ emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx)); -+ emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp, -+ const0_rtx)); -+ DONE; -+}) -+ - (define_expand "neon_vmulls_n<mode>" - [(match_operand:<V_widen> 0 "s_register_operand" "") - (match_operand:VMDI 1 "s_register_operand" "") -@@ -4168,25 +4782,25 @@ if (BYTES_BIG_ENDIAN) - - (define_expand "neon_vtrn<mode>_internal" - [(parallel -- [(set (match_operand:VDQW 0 "s_register_operand" "") -- (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "") -- (match_operand:VDQW 2 "s_register_operand" "")] -+ [(set (match_operand:VDQWH 0 "s_register_operand") -+ (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") -+ (match_operand:VDQWH 2 "s_register_operand")] - UNSPEC_VTRN1)) -- (set (match_operand:VDQW 3 "s_register_operand" "") -- (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])] -+ (set (match_operand:VDQWH 3 "s_register_operand") -+ (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])] - "TARGET_NEON" - "" - ) - - ;; Note: Different operand numbering to handle tied registers correctly. - (define_insn "*neon_vtrn<mode>_insn" -- [(set (match_operand:VDQW 0 "s_register_operand" "=&w") -- (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") -- (match_operand:VDQW 3 "s_register_operand" "2")] -- UNSPEC_VTRN1)) -- (set (match_operand:VDQW 2 "s_register_operand" "=&w") -- (unspec:VDQW [(match_dup 1) (match_dup 3)] -- UNSPEC_VTRN2))] -+ [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") -+ (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") -+ (match_operand:VDQWH 3 "s_register_operand" "2")] -+ UNSPEC_VTRN1)) -+ (set (match_operand:VDQWH 2 "s_register_operand" "=&w") -+ (unspec:VDQWH [(match_dup 1) (match_dup 3)] -+ UNSPEC_VTRN2))] - "TARGET_NEON" - "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" - [(set_attr "type" "neon_permute<q>")] -@@ -4194,25 +4808,25 @@ if (BYTES_BIG_ENDIAN) - - (define_expand "neon_vzip<mode>_internal" - [(parallel -- [(set (match_operand:VDQW 0 "s_register_operand" "") -- (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "") -- (match_operand:VDQW 2 "s_register_operand" "")] -- UNSPEC_VZIP1)) -- (set (match_operand:VDQW 3 "s_register_operand" "") -- (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])] -+ [(set (match_operand:VDQWH 0 "s_register_operand") -+ (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") -+ (match_operand:VDQWH 2 "s_register_operand")] -+ UNSPEC_VZIP1)) -+ (set (match_operand:VDQWH 3 "s_register_operand") -+ (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])] - "TARGET_NEON" - "" - ) - - ;; Note: Different operand numbering to handle tied registers correctly. - (define_insn "*neon_vzip<mode>_insn" -- [(set (match_operand:VDQW 0 "s_register_operand" "=&w") -- (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") -- (match_operand:VDQW 3 "s_register_operand" "2")] -- UNSPEC_VZIP1)) -- (set (match_operand:VDQW 2 "s_register_operand" "=&w") -- (unspec:VDQW [(match_dup 1) (match_dup 3)] -- UNSPEC_VZIP2))] -+ [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") -+ (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") -+ (match_operand:VDQWH 3 "s_register_operand" "2")] -+ UNSPEC_VZIP1)) -+ (set (match_operand:VDQWH 2 "s_register_operand" "=&w") -+ (unspec:VDQWH [(match_dup 1) (match_dup 3)] -+ UNSPEC_VZIP2))] - "TARGET_NEON" - "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" - [(set_attr "type" "neon_zip<q>")] -@@ -4220,25 +4834,25 @@ if (BYTES_BIG_ENDIAN) - - (define_expand "neon_vuzp<mode>_internal" - [(parallel -- [(set (match_operand:VDQW 0 "s_register_operand" "") -- (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "") -- (match_operand:VDQW 2 "s_register_operand" "")] -+ [(set (match_operand:VDQWH 0 "s_register_operand") -+ (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand") -+ (match_operand:VDQWH 2 "s_register_operand")] - UNSPEC_VUZP1)) -- (set (match_operand:VDQW 3 "s_register_operand" "") -- (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])] -+ (set (match_operand:VDQWH 3 "s_register_operand" "") -+ (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])] - "TARGET_NEON" - "" - ) - - ;; Note: Different operand numbering to handle tied registers correctly. - (define_insn "*neon_vuzp<mode>_insn" -- [(set (match_operand:VDQW 0 "s_register_operand" "=&w") -- (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") -- (match_operand:VDQW 3 "s_register_operand" "2")] -- UNSPEC_VUZP1)) -- (set (match_operand:VDQW 2 "s_register_operand" "=&w") -- (unspec:VDQW [(match_dup 1) (match_dup 3)] -- UNSPEC_VUZP2))] -+ [(set (match_operand:VDQWH 0 "s_register_operand" "=&w") -+ (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0") -+ (match_operand:VDQWH 3 "s_register_operand" "2")] -+ UNSPEC_VUZP1)) -+ (set (match_operand:VDQWH 2 "s_register_operand" "=&w") -+ (unspec:VDQWH [(match_dup 1) (match_dup 3)] -+ UNSPEC_VUZP2))] - "TARGET_NEON" - "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" - [(set_attr "type" "neon_zip<q>")] ---- a/src/gcc/config/arm/neon.ml -+++ b/src//dev/null -@@ -1,2357 +0,0 @@ --(* Common code for ARM NEON header file, documentation and test case -- generators. -- -- Copyright (C) 2006-2016 Free Software Foundation, Inc. -- Contributed by CodeSourcery. -- -- This file is part of GCC. -- -- GCC is free software; you can redistribute it and/or modify it under -- the terms of the GNU General Public License as published by the Free -- Software Foundation; either version 3, or (at your option) any later -- version. -- -- GCC is distributed in the hope that it will be useful, but WITHOUT ANY -- WARRANTY; without even the implied warranty of MERCHANTABILITY or -- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- for more details. -- -- You should have received a copy of the GNU General Public License -- along with GCC; see the file COPYING3. If not see -- <http://www.gnu.org/licenses/>. *) -- --(* Shorthand types for vector elements. *) --type elts = S8 | S16 | S32 | S64 | F16 | F32 | U8 | U16 | U32 | U64 | P8 | P16 -- | P64 | P128 | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts -- | Cast of elts * elts | NoElts -- --type eltclass = Signed | Unsigned | Float | Poly | Int | Bits -- | ConvClass of eltclass * eltclass | NoType -- --(* These vector types correspond directly to C types. *) --type vectype = T_int8x8 | T_int8x16 -- | T_int16x4 | T_int16x8 -- | T_int32x2 | T_int32x4 -- | T_int64x1 | T_int64x2 -- | T_uint8x8 | T_uint8x16 -- | T_uint16x4 | T_uint16x8 -- | T_uint32x2 | T_uint32x4 -- | T_uint64x1 | T_uint64x2 -- | T_float16x4 -- | T_float32x2 | T_float32x4 -- | T_poly8x8 | T_poly8x16 -- | T_poly16x4 | T_poly16x8 -- | T_immediate of int * int -- | T_int8 | T_int16 -- | T_int32 | T_int64 -- | T_uint8 | T_uint16 -- | T_uint32 | T_uint64 -- | T_poly8 | T_poly16 -- | T_poly64 | T_poly64x1 -- | T_poly64x2 | T_poly128 -- | T_float16 | T_float32 -- | T_arrayof of int * vectype -- | T_ptrto of vectype | T_const of vectype -- | T_void | T_intQI -- | T_intHI | T_intSI -- | T_intDI | T_intTI -- | T_floatHF | T_floatSF -- --(* The meanings of the following are: -- TImode : "Tetra", two registers (four words). -- EImode : "hExa", three registers (six words). -- OImode : "Octa", four registers (eight words). -- CImode : "dodeCa", six registers (twelve words). -- XImode : "heXadeca", eight registers (sixteen words). --*) -- --type inttype = B_TImode | B_EImode | B_OImode | B_CImode | B_XImode -- --type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt -- | PtrTo of shape_elt | CstPtrTo of shape_elt -- (* These next ones are used only in the test generator. *) -- | Element_of_dreg (* Used for "lane" variants. *) -- | Element_of_qreg (* Likewise. *) -- | All_elements_of_dreg (* Used for "dup" variants. *) -- | Alternatives of shape_elt list (* Used for multiple valid operands *) -- --type shape_form = All of int * shape_elt -- | Long -- | Long_noreg of shape_elt -- | Wide -- | Wide_noreg of shape_elt -- | Narrow -- | Long_imm -- | Narrow_imm -- | Binary_imm of shape_elt -- | Use_operands of shape_elt array -- | By_scalar of shape_elt -- | Unary_scalar of shape_elt -- | Wide_lane -- | Wide_scalar -- | Pair_result of shape_elt -- --type arity = Arity0 of vectype -- | Arity1 of vectype * vectype -- | Arity2 of vectype * vectype * vectype -- | Arity3 of vectype * vectype * vectype * vectype -- | Arity4 of vectype * vectype * vectype * vectype * vectype -- --type vecmode = V8QI | V4HI | V4HF |V2SI | V2SF | DI -- | V16QI | V8HI | V4SI | V4SF | V2DI | TI -- | QI | HI | SI | SF -- --type opcode = -- (* Binary ops. *) -- Vadd -- | Vmul -- | Vmla -- | Vmls -- | Vfma -- | Vfms -- | Vsub -- | Vceq -- | Vcge -- | Vcgt -- | Vcle -- | Vclt -- | Vcage -- | Vcagt -- | Vcale -- | Vcalt -- | Vtst -- | Vabd -- | Vaba -- | Vmax -- | Vmin -- | Vpadd -- | Vpada -- | Vpmax -- | Vpmin -- | Vrecps -- | Vrsqrts -- | Vshl -- | Vshr_n -- | Vshl_n -- | Vsra_n -- | Vsri -- | Vsli -- (* Logic binops. *) -- | Vand -- | Vorr -- | Veor -- | Vbic -- | Vorn -- | Vbsl -- (* Ops with scalar. *) -- | Vmul_lane -- | Vmla_lane -- | Vmls_lane -- | Vmul_n -- | Vmla_n -- | Vmls_n -- | Vmull_n -- | Vmull_lane -- | Vqdmull_n -- | Vqdmull_lane -- | Vqdmulh_n -- | Vqdmulh_lane -- (* Unary ops. *) -- | Vrintn -- | Vrinta -- | Vrintp -- | Vrintm -- | Vrintz -- | Vabs -- | Vneg -- | Vcls -- | Vclz -- | Vcnt -- | Vrecpe -- | Vrsqrte -- | Vmvn -- (* Vector extract. *) -- | Vext -- (* Reverse elements. *) -- | Vrev64 -- | Vrev32 -- | Vrev16 -- (* Transposition ops. *) -- | Vtrn -- | Vzip -- | Vuzp -- (* Loads and stores (VLD1/VST1/VLD2...), elements and structures. *) -- | Vldx of int -- | Vstx of int -- | Vldx_lane of int -- | Vldx_dup of int -- | Vstx_lane of int -- (* Set/extract lanes from a vector. *) -- | Vget_lane -- | Vset_lane -- (* Initialize vector from bit pattern. *) -- | Vcreate -- (* Set all lanes to same value. *) -- | Vdup_n -- | Vmov_n (* Is this the same? *) -- (* Duplicate scalar to all lanes of vector. *) -- | Vdup_lane -- (* Combine vectors. *) -- | Vcombine -- (* Get quadword high/low parts. *) -- | Vget_high -- | Vget_low -- (* Convert vectors. *) -- | Vcvt -- | Vcvt_n -- (* Narrow/lengthen vectors. *) -- | Vmovn -- | Vmovl -- (* Table lookup. *) -- | Vtbl of int -- | Vtbx of int -- (* Reinterpret casts. *) -- | Vreinterp -- --let rev_elems revsize elsize nelts _ = -- let mask = (revsize / elsize) - 1 in -- let arr = Array.init nelts -- (fun i -> i lxor mask) in -- Array.to_list arr -- --let permute_range i stride nelts increment = -- let rec build i = function -- 0 -> [] -- | nelts -> i :: (i + stride) :: build (i + increment) (pred nelts) in -- build i nelts -- --(* Generate a list of integers suitable for vzip. *) --let zip_range i stride nelts = permute_range i stride nelts 1 -- --(* Generate a list of integers suitable for vunzip. *) --let uzip_range i stride nelts = permute_range i stride nelts 4 -- --(* Generate a list of integers suitable for trn. *) --let trn_range i stride nelts = permute_range i stride nelts 2 -- --let zip_elems _ nelts part = -- match part with -- `lo -> zip_range 0 nelts (nelts / 2) -- | `hi -> zip_range (nelts / 2) nelts (nelts / 2) -- --let uzip_elems _ nelts part = -- match part with -- `lo -> uzip_range 0 2 (nelts / 2) -- | `hi -> uzip_range 1 2 (nelts / 2) -- --let trn_elems _ nelts part = -- match part with -- `lo -> trn_range 0 nelts (nelts / 2) -- | `hi -> trn_range 1 nelts (nelts / 2) -- --(* Features used for documentation, to distinguish between some instruction -- variants, and to signal special requirements (e.g. swapping arguments). *) -- --type features = -- Halving -- | Rounding -- | Saturating -- | Dst_unsign -- | High_half -- | Doubling -- | Flipped of string (* Builtin name to use with flipped arguments. *) -- | InfoWord (* Pass an extra word for signage/rounding etc. (always passed -- for All _, Long, Wide, Narrow shape_forms. *) -- (* Implement builtin as shuffle. The parameter is a function which returns -- masks suitable for __builtin_shuffle: arguments are (element size, -- number of elements, high/low part selector). *) -- | Use_shuffle of (int -> int -> [`lo|`hi] -> int list) -- (* A specification as to the shape of instruction expected upon -- disassembly, used if it differs from the shape used to build the -- intrinsic prototype. Multiple entries in the constructor's argument -- indicate that the intrinsic expands to more than one assembly -- instruction, each with a corresponding shape specified here. *) -- | Disassembles_as of shape_form list -- | Builtin_name of string (* Override the name of the builtin. *) -- (* Override the name of the instruction. If more than one name -- is specified, it means that the instruction can have any of those -- names. *) -- | Instruction_name of string list -- (* Mark that the intrinsic yields no instructions, or expands to yield -- behavior that the test generator cannot test. *) -- | No_op -- (* Mark that the intrinsic has constant arguments that cannot be set -- to the defaults (zero for pointers and one otherwise) in the test -- cases. The function supplied must return the integer to be written -- into the testcase for the argument number (0-based) supplied to it. *) -- | Const_valuator of (int -> int) -- | Fixed_vector_reg -- | Fixed_core_reg -- (* Mark that the intrinsic requires __ARM_FEATURE_string to be defined. *) -- | Requires_feature of string -- (* Mark that the intrinsic requires a particular architecture version. *) -- | Requires_arch of int -- (* Mark that the intrinsic requires a particular bit in __ARM_FP to -- be set. *) -- | Requires_FP_bit of int -- (* Compiler optimization level for the test. *) -- | Compiler_optim of string -- --exception MixedMode of elts * elts -- --let rec elt_width = function -- S8 | U8 | P8 | I8 | B8 -> 8 -- | S16 | U16 | P16 | I16 | B16 | F16 -> 16 -- | S32 | F32 | U32 | I32 | B32 -> 32 -- | S64 | U64 | P64 | I64 | B64 -> 64 -- | P128 -> 128 -- | Conv (a, b) -> -- let wa = elt_width a and wb = elt_width b in -- if wa = wb then wa else raise (MixedMode (a, b)) -- | Cast (a, b) -> raise (MixedMode (a, b)) -- | NoElts -> failwith "No elts" -- --let rec elt_class = function -- S8 | S16 | S32 | S64 -> Signed -- | U8 | U16 | U32 | U64 -> Unsigned -- | P8 | P16 | P64 | P128 -> Poly -- | F16 | F32 -> Float -- | I8 | I16 | I32 | I64 -> Int -- | B8 | B16 | B32 | B64 -> Bits -- | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b) -- | NoElts -> NoType -- --let elt_of_class_width c w = -- match c, w with -- Signed, 8 -> S8 -- | Signed, 16 -> S16 -- | Signed, 32 -> S32 -- | Signed, 64 -> S64 -- | Float, 16 -> F16 -- | Float, 32 -> F32 -- | Unsigned, 8 -> U8 -- | Unsigned, 16 -> U16 -- | Unsigned, 32 -> U32 -- | Unsigned, 64 -> U64 -- | Poly, 8 -> P8 -- | Poly, 16 -> P16 -- | Poly, 64 -> P64 -- | Poly, 128 -> P128 -- | Int, 8 -> I8 -- | Int, 16 -> I16 -- | Int, 32 -> I32 -- | Int, 64 -> I64 -- | Bits, 8 -> B8 -- | Bits, 16 -> B16 -- | Bits, 32 -> B32 -- | Bits, 64 -> B64 -- | _ -> failwith "Bad element type" -- --(* Return unsigned integer element the same width as argument. *) --let unsigned_of_elt elt = -- elt_of_class_width Unsigned (elt_width elt) -- --let signed_of_elt elt = -- elt_of_class_width Signed (elt_width elt) -- --(* Return untyped bits element the same width as argument. *) --let bits_of_elt elt = -- elt_of_class_width Bits (elt_width elt) -- --let non_signed_variant = function -- S8 -> I8 -- | S16 -> I16 -- | S32 -> I32 -- | S64 -> I64 -- | U8 -> I8 -- | U16 -> I16 -- | U32 -> I32 -- | U64 -> I64 -- | x -> x -- --let poly_unsigned_variant v = -- let elclass = match elt_class v with -- Poly -> Unsigned -- | x -> x in -- elt_of_class_width elclass (elt_width v) -- --let widen_elt elt = -- let w = elt_width elt -- and c = elt_class elt in -- elt_of_class_width c (w * 2) -- --let narrow_elt elt = -- let w = elt_width elt -- and c = elt_class elt in -- elt_of_class_width c (w / 2) -- --(* If we're trying to find a mode from a "Use_operands" instruction, use the -- last vector operand as the dominant mode used to invoke the correct builtin. -- We must stick to this rule in neon.md. *) --let find_key_operand operands = -- let rec scan opno = -- match operands.(opno) with -- Qreg -> Qreg -- | Dreg -> Dreg -- | VecArray (_, Qreg) -> Qreg -- | VecArray (_, Dreg) -> Dreg -- | _ -> scan (opno-1) -- in -- scan ((Array.length operands) - 1) -- --(* Find a vecmode from a shape_elt ELT for an instruction with shape_form -- SHAPE. For a Use_operands shape, if ARGPOS is passed then return the mode -- for the given argument position, else determine which argument to return a -- mode for automatically. *) -- --let rec mode_of_elt ?argpos elt shape = -- let flt = match elt_class elt with -- Float | ConvClass(_, Float) -> true | _ -> false in -- let idx = -- match elt_width elt with -- 8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3 | 128 -> 4 -- | _ -> failwith "Bad element width" -- in match shape with -- All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg -- | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg -> -- if flt then -- [| V8QI; V4HF; V2SF; DI |].(idx) -- else -- [| V8QI; V4HI; V2SI; DI |].(idx) -- | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg -- | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg -> -- [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI; TI|].(idx) -- | All (_, (Corereg | PtrTo _ | CstPtrTo _)) -> -- [| QI; HI; if flt then SF else SI; DI |].(idx) -- | Long | Wide | Wide_lane | Wide_scalar -- | Long_imm -> -- [| V8QI; V4HI; V2SI; DI |].(idx) -- | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx) -- | Use_operands ops -> -- begin match argpos with -- None -> mode_of_elt ?argpos elt (All (0, (find_key_operand ops))) -- | Some pos -> mode_of_elt ?argpos elt (All (0, ops.(pos))) -- end -- | _ -> failwith "invalid shape" -- --(* Modify an element type dependent on the shape of the instruction and the -- operand number. *) -- --let shapemap shape no = -- let ident = fun x -> x in -- match shape with -- All _ | Use_operands _ | By_scalar _ | Pair_result _ | Unary_scalar _ -- | Binary_imm _ -> ident -- | Long | Long_noreg _ | Wide_scalar | Long_imm -> -- [| widen_elt; ident; ident |].(no) -- | Wide | Wide_noreg _ -> [| widen_elt; widen_elt; ident |].(no) -- | Wide_lane -> [| widen_elt; ident; ident; ident |].(no) -- | Narrow | Narrow_imm -> [| narrow_elt; ident; ident |].(no) -- --(* Register type (D/Q) of an operand, based on shape and operand number. *) -- --let regmap shape no = -- match shape with -- All (_, reg) | Long_noreg reg | Wide_noreg reg -> reg -- | Long -> [| Qreg; Dreg; Dreg |].(no) -- | Wide -> [| Qreg; Qreg; Dreg |].(no) -- | Narrow -> [| Dreg; Qreg; Qreg |].(no) -- | Wide_lane -> [| Qreg; Dreg; Dreg; Immed |].(no) -- | Wide_scalar -> [| Qreg; Dreg; Corereg |].(no) -- | By_scalar reg -> [| reg; reg; Dreg; Immed |].(no) -- | Unary_scalar reg -> [| reg; Dreg; Immed |].(no) -- | Pair_result reg -> [| VecArray (2, reg); reg; reg |].(no) -- | Binary_imm reg -> [| reg; reg; Immed |].(no) -- | Long_imm -> [| Qreg; Dreg; Immed |].(no) -- | Narrow_imm -> [| Dreg; Qreg; Immed |].(no) -- | Use_operands these -> these.(no) -- --let type_for_elt shape elt no = -- let elt = (shapemap shape no) elt in -- let reg = regmap shape no in -- let rec type_for_reg_elt reg elt = -- match reg with -- Dreg -> -- begin match elt with -- S8 -> T_int8x8 -- | S16 -> T_int16x4 -- | S32 -> T_int32x2 -- | S64 -> T_int64x1 -- | U8 -> T_uint8x8 -- | U16 -> T_uint16x4 -- | U32 -> T_uint32x2 -- | U64 -> T_uint64x1 -- | P64 -> T_poly64x1 -- | P128 -> T_poly128 -- | F16 -> T_float16x4 -- | F32 -> T_float32x2 -- | P8 -> T_poly8x8 -- | P16 -> T_poly16x4 -- | _ -> failwith "Bad elt type for Dreg" -- end -- | Qreg -> -- begin match elt with -- S8 -> T_int8x16 -- | S16 -> T_int16x8 -- | S32 -> T_int32x4 -- | S64 -> T_int64x2 -- | U8 -> T_uint8x16 -- | U16 -> T_uint16x8 -- | U32 -> T_uint32x4 -- | U64 -> T_uint64x2 -- | F32 -> T_float32x4 -- | P8 -> T_poly8x16 -- | P16 -> T_poly16x8 -- | P64 -> T_poly64x2 -- | P128 -> T_poly128 -- | _ -> failwith "Bad elt type for Qreg" -- end -- | Corereg -> -- begin match elt with -- S8 -> T_int8 -- | S16 -> T_int16 -- | S32 -> T_int32 -- | S64 -> T_int64 -- | U8 -> T_uint8 -- | U16 -> T_uint16 -- | U32 -> T_uint32 -- | U64 -> T_uint64 -- | P8 -> T_poly8 -- | P16 -> T_poly16 -- | P64 -> T_poly64 -- | P128 -> T_poly128 -- | F32 -> T_float32 -- | _ -> failwith "Bad elt type for Corereg" -- end -- | Immed -> -- T_immediate (0, 0) -- | VecArray (num, sub) -> -- T_arrayof (num, type_for_reg_elt sub elt) -- | PtrTo x -> -- T_ptrto (type_for_reg_elt x elt) -- | CstPtrTo x -> -- T_ptrto (T_const (type_for_reg_elt x elt)) -- (* Anything else is solely for the use of the test generator. *) -- | _ -> assert false -- in -- type_for_reg_elt reg elt -- --(* Return size of a vector type, in bits. *) --let vectype_size = function -- T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1 -- | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1 -- | T_float32x2 | T_poly8x8 | T_poly64x1 | T_poly16x4 | T_float16x4 -> 64 -- | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2 -- | T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2 -- | T_float32x4 | T_poly8x16 | T_poly64x2 | T_poly16x8 -> 128 -- | _ -> raise Not_found -- --let inttype_for_array num elttype = -- let eltsize = vectype_size elttype in -- let numwords = (num * eltsize) / 32 in -- match numwords with -- 4 -> B_TImode -- | 6 -> B_EImode -- | 8 -> B_OImode -- | 12 -> B_CImode -- | 16 -> B_XImode -- | _ -> failwith ("no int type for size " ^ string_of_int numwords) -- --(* These functions return pairs of (internal, external) types, where "internal" -- types are those seen by GCC, and "external" are those seen by the assembler. -- These types aren't necessarily the same, since the intrinsics can munge more -- than one C type into each assembler opcode. *) -- --let make_sign_invariant func shape elt = -- let arity, elt' = func shape elt in -- arity, non_signed_variant elt' -- --(* Don't restrict any types. *) -- --let elts_same make_arity shape elt = -- let vtype = type_for_elt shape elt in -- make_arity vtype, elt -- --(* As sign_invar_*, but when sign matters. *) --let elts_same_io_lane = -- elts_same (fun vtype -> Arity4 (vtype 0, vtype 0, vtype 1, vtype 2, vtype 3)) -- --let elts_same_io = -- elts_same (fun vtype -> Arity3 (vtype 0, vtype 0, vtype 1, vtype 2)) -- --let elts_same_2_lane = -- elts_same (fun vtype -> Arity3 (vtype 0, vtype 1, vtype 2, vtype 3)) -- --let elts_same_3 = elts_same_2_lane -- --let elts_same_2 = -- elts_same (fun vtype -> Arity2 (vtype 0, vtype 1, vtype 2)) -- --let elts_same_1 = -- elts_same (fun vtype -> Arity1 (vtype 0, vtype 1)) -- --(* Use for signed/unsigned invariant operations (i.e. where the operation -- doesn't depend on the sign of the data. *) -- --let sign_invar_io_lane = make_sign_invariant elts_same_io_lane --let sign_invar_io = make_sign_invariant elts_same_io --let sign_invar_2_lane = make_sign_invariant elts_same_2_lane --let sign_invar_2 = make_sign_invariant elts_same_2 --let sign_invar_1 = make_sign_invariant elts_same_1 -- --(* Sign-sensitive comparison. *) -- --let cmp_sign_matters shape elt = -- let vtype = type_for_elt shape elt -- and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in -- Arity2 (rtype, vtype 1, vtype 2), elt -- --(* Signed/unsigned invariant comparison. *) -- --let cmp_sign_invar shape elt = -- let shape', elt' = cmp_sign_matters shape elt in -- let elt'' = -- match non_signed_variant elt' with -- P8 -> I8 -- | x -> x -- in -- shape', elt'' -- --(* Comparison (VTST) where only the element width matters. *) -- --let cmp_bits shape elt = -- let vtype = type_for_elt shape elt -- and rtype = type_for_elt shape (unsigned_of_elt elt) 0 -- and bits_only = bits_of_elt elt in -- Arity2 (rtype, vtype 1, vtype 2), bits_only -- --let reg_shift shape elt = -- let vtype = type_for_elt shape elt -- and op2type = type_for_elt shape (signed_of_elt elt) 2 in -- Arity2 (vtype 0, vtype 1, op2type), elt -- --(* Genericised constant-shift type-generating function. *) -- --let const_shift mkimm ?arity ?result shape elt = -- let op2type = (shapemap shape 2) elt in -- let op2width = elt_width op2type in -- let op2 = mkimm op2width -- and op1 = type_for_elt shape elt 1 -- and r_elt = -- match result with -- None -> elt -- | Some restriction -> restriction elt in -- let rtype = type_for_elt shape r_elt 0 in -- match arity with -- None -> Arity2 (rtype, op1, op2), elt -- | Some mkarity -> mkarity rtype op1 op2, elt -- --(* Use for immediate right-shifts. *) -- --let shift_right shape elt = -- const_shift (fun imm -> T_immediate (1, imm)) shape elt -- --let shift_right_acc shape elt = -- const_shift (fun imm -> T_immediate (1, imm)) -- ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt -- --(* Use for immediate right-shifts when the operation doesn't care about -- signedness. *) -- --let shift_right_sign_invar = -- make_sign_invariant shift_right -- --(* Immediate right-shift; result is unsigned even when operand is signed. *) -- --let shift_right_to_uns shape elt = -- const_shift (fun imm -> T_immediate (1, imm)) ~result:unsigned_of_elt -- shape elt -- --(* Immediate left-shift. *) -- --let shift_left shape elt = -- const_shift (fun imm -> T_immediate (0, imm - 1)) shape elt -- --(* Immediate left-shift, unsigned result. *) -- --let shift_left_to_uns shape elt = -- const_shift (fun imm -> T_immediate (0, imm - 1)) ~result:unsigned_of_elt -- shape elt -- --(* Immediate left-shift, don't care about signs. *) -- --let shift_left_sign_invar = -- make_sign_invariant shift_left -- --(* Shift left/right and insert: only element size matters. *) -- --let shift_insert shape elt = -- let arity, elt = -- const_shift (fun imm -> T_immediate (1, imm)) -- ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt in -- arity, bits_of_elt elt -- --(* Get/set lane. *) -- --let get_lane shape elt = -- let vtype = type_for_elt shape elt in -- Arity2 (vtype 0, vtype 1, vtype 2), -- (match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x) -- --let set_lane shape elt = -- let vtype = type_for_elt shape elt in -- Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt -- --let set_lane_notype shape elt = -- let vtype = type_for_elt shape elt in -- Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), NoElts -- --let create_vector shape elt = -- let vtype = type_for_elt shape U64 1 -- and rtype = type_for_elt shape elt 0 in -- Arity1 (rtype, vtype), elt -- --let conv make_arity shape elt = -- let edest, esrc = match elt with -- Conv (edest, esrc) | Cast (edest, esrc) -> edest, esrc -- | _ -> failwith "Non-conversion element in conversion" in -- let vtype = type_for_elt shape esrc -- and rtype = type_for_elt shape edest 0 in -- make_arity rtype vtype, elt -- --let conv_1 = conv (fun rtype vtype -> Arity1 (rtype, vtype 1)) --let conv_2 = conv (fun rtype vtype -> Arity2 (rtype, vtype 1, vtype 2)) -- --(* Operation has an unsigned result even if operands are signed. *) -- --let dst_unsign make_arity shape elt = -- let vtype = type_for_elt shape elt -- and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in -- make_arity rtype vtype, elt -- --let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1 (rtype, vtype 1)) -- --let make_bits_only func shape elt = -- let arity, elt' = func shape elt in -- arity, bits_of_elt elt' -- --(* Extend operation. *) -- --let extend shape elt = -- let vtype = type_for_elt shape elt in -- Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt -- --(* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned -- integer ops respectively, or unsigned for polynomial ops. *) -- --let table mkarity shape elt = -- let vtype = type_for_elt shape elt in -- let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in -- mkarity vtype op2, bits_of_elt elt -- --let table_2 = table (fun vtype op2 -> Arity2 (vtype 0, vtype 1, op2)) --let table_io = table (fun vtype op2 -> Arity3 (vtype 0, vtype 0, vtype 1, op2)) -- --(* Operations where only bits matter. *) -- --let bits_1 = make_bits_only elts_same_1 --let bits_2 = make_bits_only elts_same_2 --let bits_3 = make_bits_only elts_same_3 -- --(* Store insns. *) --let store_1 shape elt = -- let vtype = type_for_elt shape elt in -- Arity2 (T_void, vtype 0, vtype 1), bits_of_elt elt -- --let store_3 shape elt = -- let vtype = type_for_elt shape elt in -- Arity3 (T_void, vtype 0, vtype 1, vtype 2), bits_of_elt elt -- --let make_notype func shape elt = -- let arity, _ = func shape elt in -- arity, NoElts -- --let notype_1 = make_notype elts_same_1 --let notype_2 = make_notype elts_same_2 --let notype_3 = make_notype elts_same_3 -- --(* Bit-select operations (first operand is unsigned int). *) -- --let bit_select shape elt = -- let vtype = type_for_elt shape elt -- and itype = type_for_elt shape (unsigned_of_elt elt) in -- Arity3 (vtype 0, itype 1, vtype 2, vtype 3), NoElts -- --(* Common lists of supported element types. *) -- --let s_8_32 = [S8; S16; S32] --let u_8_32 = [U8; U16; U32] --let su_8_32 = [S8; S16; S32; U8; U16; U32] --let su_8_64 = S64 :: U64 :: su_8_32 --let su_16_64 = [S16; S32; S64; U16; U32; U64] --let pf_su_8_16 = [P8; P16; S8; S16; U8; U16] --let pf_su_8_32 = P8 :: P16 :: F32 :: su_8_32 --let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64 --let suf_32 = [S32; U32; F32] -- --let ops = -- [ -- (* Addition. *) -- Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_32; -- Vadd, [No_op], All (3, Dreg), "vadd", sign_invar_2, [S64; U64]; -- Vadd, [], All (3, Qreg), "vaddQ", sign_invar_2, F32 :: su_8_64; -- Vadd, [], Long, "vaddl", elts_same_2, su_8_32; -- Vadd, [], Wide, "vaddw", elts_same_2, su_8_32; -- Vadd, [Halving], All (3, Dreg), "vhadd", elts_same_2, su_8_32; -- Vadd, [Halving], All (3, Qreg), "vhaddQ", elts_same_2, su_8_32; -- Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving], -- All (3, Dreg), "vRhadd", elts_same_2, su_8_32; -- Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving], -- All (3, Qreg), "vRhaddQ", elts_same_2, su_8_32; -- Vadd, [Saturating], All (3, Dreg), "vqadd", elts_same_2, su_8_64; -- Vadd, [Saturating], All (3, Qreg), "vqaddQ", elts_same_2, su_8_64; -- Vadd, [High_half], Narrow, "vaddhn", sign_invar_2, su_16_64; -- Vadd, [Instruction_name ["vraddhn"]; Rounding; High_half], -- Narrow, "vRaddhn", sign_invar_2, su_16_64; -- -- (* Multiplication. *) -- Vmul, [], All (3, Dreg), "vmul", sign_invar_2, P8 :: F32 :: su_8_32; -- Vmul, [], All (3, Qreg), "vmulQ", sign_invar_2, P8 :: F32 :: su_8_32; -- Vmul, [Saturating; Doubling; High_half], All (3, Dreg), "vqdmulh", -- elts_same_2, [S16; S32]; -- Vmul, [Saturating; Doubling; High_half], All (3, Qreg), "vqdmulhQ", -- elts_same_2, [S16; S32]; -- Vmul, -- [Saturating; Rounding; Doubling; High_half; -- Instruction_name ["vqrdmulh"]], -- All (3, Dreg), "vqRdmulh", -- elts_same_2, [S16; S32]; -- Vmul, -- [Saturating; Rounding; Doubling; High_half; -- Instruction_name ["vqrdmulh"]], -- All (3, Qreg), "vqRdmulhQ", -- elts_same_2, [S16; S32]; -- Vmul, [], Long, "vmull", elts_same_2, P8 :: su_8_32; -- Vmul, [Saturating; Doubling], Long, "vqdmull", elts_same_2, [S16; S32]; -- -- (* Multiply-accumulate. *) -- Vmla, [], All (3, Dreg), "vmla", sign_invar_io, F32 :: su_8_32; -- Vmla, [], All (3, Qreg), "vmlaQ", sign_invar_io, F32 :: su_8_32; -- Vmla, [], Long, "vmlal", elts_same_io, su_8_32; -- Vmla, [Saturating; Doubling], Long, "vqdmlal", elts_same_io, [S16; S32]; -- -- (* Multiply-subtract. *) -- Vmls, [], All (3, Dreg), "vmls", sign_invar_io, F32 :: su_8_32; -- Vmls, [], All (3, Qreg), "vmlsQ", sign_invar_io, F32 :: su_8_32; -- Vmls, [], Long, "vmlsl", elts_same_io, su_8_32; -- Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32]; -- -- (* Fused-multiply-accumulate. *) -- Vfma, [Requires_feature "FMA"], All (3, Dreg), "vfma", elts_same_io, [F32]; -- Vfma, [Requires_feature "FMA"], All (3, Qreg), "vfmaQ", elts_same_io, [F32]; -- Vfms, [Requires_feature "FMA"], All (3, Dreg), "vfms", elts_same_io, [F32]; -- Vfms, [Requires_feature "FMA"], All (3, Qreg), "vfmsQ", elts_same_io, [F32]; -- -- (* Round to integral. *) -- Vrintn, [Builtin_name "vrintn"; Requires_arch 8], Use_operands [| Dreg; Dreg |], -- "vrndn", elts_same_1, [F32]; -- Vrintn, [Builtin_name "vrintn"; Requires_arch 8], Use_operands [| Qreg; Qreg |], -- "vrndqn", elts_same_1, [F32]; -- Vrinta, [Builtin_name "vrinta"; Requires_arch 8], Use_operands [| Dreg; Dreg |], -- "vrnda", elts_same_1, [F32]; -- Vrinta, [Builtin_name "vrinta"; Requires_arch 8], Use_operands [| Qreg; Qreg |], -- "vrndqa", elts_same_1, [F32]; -- Vrintp, [Builtin_name "vrintp"; Requires_arch 8], Use_operands [| Dreg; Dreg |], -- "vrndp", elts_same_1, [F32]; -- Vrintp, [Builtin_name "vrintp"; Requires_arch 8], Use_operands [| Qreg; Qreg |], -- "vrndqp", elts_same_1, [F32]; -- Vrintm, [Builtin_name "vrintm"; Requires_arch 8], Use_operands [| Dreg; Dreg |], -- "vrndm", elts_same_1, [F32]; -- Vrintm, [Builtin_name "vrintm"; Requires_arch 8], Use_operands [| Qreg; Qreg |], -- "vrndqm", elts_same_1, [F32]; -- Vrintz, [Builtin_name "vrintz"; Requires_arch 8], Use_operands [| Dreg; Dreg |], -- "vrnd", elts_same_1, [F32]; -- Vrintz, [Builtin_name "vrintz"; Requires_arch 8], Use_operands [| Qreg; Qreg |], -- "vrndq", elts_same_1, [F32]; -- (* Subtraction. *) -- Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_32; -- Vsub, [No_op], All (3, Dreg), "vsub", sign_invar_2, [S64; U64]; -- Vsub, [], All (3, Qreg), "vsubQ", sign_invar_2, F32 :: su_8_64; -- Vsub, [], Long, "vsubl", elts_same_2, su_8_32; -- Vsub, [], Wide, "vsubw", elts_same_2, su_8_32; -- Vsub, [Halving], All (3, Dreg), "vhsub", elts_same_2, su_8_32; -- Vsub, [Halving], All (3, Qreg), "vhsubQ", elts_same_2, su_8_32; -- Vsub, [Saturating], All (3, Dreg), "vqsub", elts_same_2, su_8_64; -- Vsub, [Saturating], All (3, Qreg), "vqsubQ", elts_same_2, su_8_64; -- Vsub, [High_half], Narrow, "vsubhn", sign_invar_2, su_16_64; -- Vsub, [Instruction_name ["vrsubhn"]; Rounding; High_half], -- Narrow, "vRsubhn", sign_invar_2, su_16_64; -- -- (* Comparison, equal. *) -- Vceq, [], All (3, Dreg), "vceq", cmp_sign_invar, P8 :: F32 :: su_8_32; -- Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32; -- -- (* Comparison, greater-than or equal. *) -- Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: s_8_32; -- Vcge, [Instruction_name ["vcge"]; Builtin_name "vcgeu"], -- All (3, Dreg), "vcge", cmp_sign_matters, -- u_8_32; -- Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: s_8_32; -- Vcge, [Instruction_name ["vcge"]; Builtin_name "vcgeu"], -- All (3, Qreg), "vcgeQ", cmp_sign_matters, -- u_8_32; -- -- (* Comparison, less-than or equal. *) -- Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters, -- F32 :: s_8_32; -- Vcle, [Instruction_name ["vcge"]; Flipped "vcgeu"], -- All (3, Dreg), "vcle", cmp_sign_matters, -- u_8_32; -- Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"], -- All (3, Qreg), "vcleQ", cmp_sign_matters, -- F32 :: s_8_32; -- Vcle, [Instruction_name ["vcge"]; Flipped "vcgeuQ"], -- All (3, Qreg), "vcleQ", cmp_sign_matters, -- u_8_32; -- -- (* Comparison, greater-than. *) -- Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: s_8_32; -- Vcgt, [Instruction_name ["vcgt"]; Builtin_name "vcgtu"], -- All (3, Dreg), "vcgt", cmp_sign_matters, -- u_8_32; -- Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: s_8_32; -- Vcgt, [Instruction_name ["vcgt"]; Builtin_name "vcgtu"], -- All (3, Qreg), "vcgtQ", cmp_sign_matters, -- u_8_32; -- -- (* Comparison, less-than. *) -- Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters, -- F32 :: s_8_32; -- Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtu"], -- All (3, Dreg), "vclt", cmp_sign_matters, -- u_8_32; -- Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"], -- All (3, Qreg), "vcltQ", cmp_sign_matters, -- F32 :: s_8_32; -- Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtuQ"], -- All (3, Qreg), "vcltQ", cmp_sign_matters, -- u_8_32; -- -- (* Compare absolute greater-than or equal. *) -- Vcage, [Instruction_name ["vacge"]], -- All (3, Dreg), "vcage", cmp_sign_matters, [F32]; -- Vcage, [Instruction_name ["vacge"]], -- All (3, Qreg), "vcageQ", cmp_sign_matters, [F32]; -- -- (* Compare absolute less-than or equal. *) -- Vcale, [Instruction_name ["vacge"]; Flipped "vcage"], -- All (3, Dreg), "vcale", cmp_sign_matters, [F32]; -- Vcale, [Instruction_name ["vacge"]; Flipped "vcageQ"], -- All (3, Qreg), "vcaleQ", cmp_sign_matters, [F32]; -- -- (* Compare absolute greater-than or equal. *) -- Vcagt, [Instruction_name ["vacgt"]], -- All (3, Dreg), "vcagt", cmp_sign_matters, [F32]; -- Vcagt, [Instruction_name ["vacgt"]], -- All (3, Qreg), "vcagtQ", cmp_sign_matters, [F32]; -- -- (* Compare absolute less-than or equal. *) -- Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagt"], -- All (3, Dreg), "vcalt", cmp_sign_matters, [F32]; -- Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagtQ"], -- All (3, Qreg), "vcaltQ", cmp_sign_matters, [F32]; -- -- (* Test bits. *) -- Vtst, [], All (3, Dreg), "vtst", cmp_bits, P8 :: su_8_32; -- Vtst, [], All (3, Qreg), "vtstQ", cmp_bits, P8 :: su_8_32; -- -- (* Absolute difference. *) -- Vabd, [], All (3, Dreg), "vabd", elts_same_2, F32 :: su_8_32; -- Vabd, [], All (3, Qreg), "vabdQ", elts_same_2, F32 :: su_8_32; -- Vabd, [], Long, "vabdl", elts_same_2, su_8_32; -- -- (* Absolute difference and accumulate. *) -- Vaba, [], All (3, Dreg), "vaba", elts_same_io, su_8_32; -- Vaba, [], All (3, Qreg), "vabaQ", elts_same_io, su_8_32; -- Vaba, [], Long, "vabal", elts_same_io, su_8_32; -- -- (* Max. *) -- Vmax, [], All (3, Dreg), "vmax", elts_same_2, F32 :: su_8_32; -- Vmax, [], All (3, Qreg), "vmaxQ", elts_same_2, F32 :: su_8_32; -- -- (* Min. *) -- Vmin, [], All (3, Dreg), "vmin", elts_same_2, F32 :: su_8_32; -- Vmin, [], All (3, Qreg), "vminQ", elts_same_2, F32 :: su_8_32; -- -- (* Pairwise add. *) -- Vpadd, [], All (3, Dreg), "vpadd", sign_invar_2, F32 :: su_8_32; -- Vpadd, [], Long_noreg Dreg, "vpaddl", elts_same_1, su_8_32; -- Vpadd, [], Long_noreg Qreg, "vpaddlQ", elts_same_1, su_8_32; -- -- (* Pairwise add, widen and accumulate. *) -- Vpada, [], Wide_noreg Dreg, "vpadal", elts_same_2, su_8_32; -- Vpada, [], Wide_noreg Qreg, "vpadalQ", elts_same_2, su_8_32; -- -- (* Folding maximum, minimum. *) -- Vpmax, [], All (3, Dreg), "vpmax", elts_same_2, F32 :: su_8_32; -- Vpmin, [], All (3, Dreg), "vpmin", elts_same_2, F32 :: su_8_32; -- -- (* Reciprocal step. *) -- Vrecps, [], All (3, Dreg), "vrecps", elts_same_2, [F32]; -- Vrecps, [], All (3, Qreg), "vrecpsQ", elts_same_2, [F32]; -- Vrsqrts, [], All (3, Dreg), "vrsqrts", elts_same_2, [F32]; -- Vrsqrts, [], All (3, Qreg), "vrsqrtsQ", elts_same_2, [F32]; -- -- (* Vector shift left. *) -- Vshl, [], All (3, Dreg), "vshl", reg_shift, su_8_64; -- Vshl, [], All (3, Qreg), "vshlQ", reg_shift, su_8_64; -- Vshl, [Instruction_name ["vrshl"]; Rounding], -- All (3, Dreg), "vRshl", reg_shift, su_8_64; -- Vshl, [Instruction_name ["vrshl"]; Rounding], -- All (3, Qreg), "vRshlQ", reg_shift, su_8_64; -- Vshl, [Saturating], All (3, Dreg), "vqshl", reg_shift, su_8_64; -- Vshl, [Saturating], All (3, Qreg), "vqshlQ", reg_shift, su_8_64; -- Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding], -- All (3, Dreg), "vqRshl", reg_shift, su_8_64; -- Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding], -- All (3, Qreg), "vqRshlQ", reg_shift, su_8_64; -- -- (* Vector shift right by constant. *) -- Vshr_n, [], Binary_imm Dreg, "vshr_n", shift_right, su_8_64; -- Vshr_n, [], Binary_imm Qreg, "vshrQ_n", shift_right, su_8_64; -- Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Dreg, -- "vRshr_n", shift_right, su_8_64; -- Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Qreg, -- "vRshrQ_n", shift_right, su_8_64; -- Vshr_n, [], Narrow_imm, "vshrn_n", shift_right_sign_invar, su_16_64; -- Vshr_n, [Instruction_name ["vrshrn"]; Rounding], Narrow_imm, "vRshrn_n", -- shift_right_sign_invar, su_16_64; -- Vshr_n, [Saturating], Narrow_imm, "vqshrn_n", shift_right, su_16_64; -- Vshr_n, [Instruction_name ["vqrshrn"]; Saturating; Rounding], Narrow_imm, -- "vqRshrn_n", shift_right, su_16_64; -- Vshr_n, [Saturating; Dst_unsign], Narrow_imm, "vqshrun_n", -- shift_right_to_uns, [S16; S32; S64]; -- Vshr_n, [Instruction_name ["vqrshrun"]; Saturating; Dst_unsign; Rounding], -- Narrow_imm, "vqRshrun_n", shift_right_to_uns, [S16; S32; S64]; -- -- (* Vector shift left by constant. *) -- Vshl_n, [], Binary_imm Dreg, "vshl_n", shift_left_sign_invar, su_8_64; -- Vshl_n, [], Binary_imm Qreg, "vshlQ_n", shift_left_sign_invar, su_8_64; -- Vshl_n, [Saturating], Binary_imm Dreg, "vqshl_n", shift_left, su_8_64; -- Vshl_n, [Saturating], Binary_imm Qreg, "vqshlQ_n", shift_left, su_8_64; -- Vshl_n, [Saturating; Dst_unsign], Binary_imm Dreg, "vqshlu_n", -- shift_left_to_uns, [S8; S16; S32; S64]; -- Vshl_n, [Saturating; Dst_unsign], Binary_imm Qreg, "vqshluQ_n", -- shift_left_to_uns, [S8; S16; S32; S64]; -- Vshl_n, [], Long_imm, "vshll_n", shift_left, su_8_32; -- -- (* Vector shift right by constant and accumulate. *) -- Vsra_n, [], Binary_imm Dreg, "vsra_n", shift_right_acc, su_8_64; -- Vsra_n, [], Binary_imm Qreg, "vsraQ_n", shift_right_acc, su_8_64; -- Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Dreg, -- "vRsra_n", shift_right_acc, su_8_64; -- Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Qreg, -- "vRsraQ_n", shift_right_acc, su_8_64; -- -- (* Vector shift right and insert. *) -- Vsri, [Requires_feature "CRYPTO"], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert, -- [P64]; -- Vsri, [], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert, -- P8 :: P16 :: su_8_64; -- Vsri, [Requires_feature "CRYPTO"], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert, -- [P64]; -- Vsri, [], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert, -- P8 :: P16 :: su_8_64; -- -- (* Vector shift left and insert. *) -- Vsli, [Requires_feature "CRYPTO"], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert, -- [P64]; -- Vsli, [], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert, -- P8 :: P16 :: su_8_64; -- Vsli, [Requires_feature "CRYPTO"], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert, -- [P64]; -- Vsli, [], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert, -- P8 :: P16 :: su_8_64; -- -- (* Absolute value. *) -- Vabs, [], All (2, Dreg), "vabs", elts_same_1, [S8; S16; S32; F32]; -- Vabs, [], All (2, Qreg), "vabsQ", elts_same_1, [S8; S16; S32; F32]; -- Vabs, [Saturating], All (2, Dreg), "vqabs", elts_same_1, [S8; S16; S32]; -- Vabs, [Saturating], All (2, Qreg), "vqabsQ", elts_same_1, [S8; S16; S32]; -- -- (* Negate. *) -- Vneg, [], All (2, Dreg), "vneg", elts_same_1, [S8; S16; S32; F32]; -- Vneg, [], All (2, Qreg), "vnegQ", elts_same_1, [S8; S16; S32; F32]; -- Vneg, [Saturating], All (2, Dreg), "vqneg", elts_same_1, [S8; S16; S32]; -- Vneg, [Saturating], All (2, Qreg), "vqnegQ", elts_same_1, [S8; S16; S32]; -- -- (* Bitwise not. *) -- Vmvn, [], All (2, Dreg), "vmvn", notype_1, P8 :: su_8_32; -- Vmvn, [], All (2, Qreg), "vmvnQ", notype_1, P8 :: su_8_32; -- -- (* Count leading sign bits. *) -- Vcls, [], All (2, Dreg), "vcls", elts_same_1, [S8; S16; S32]; -- Vcls, [], All (2, Qreg), "vclsQ", elts_same_1, [S8; S16; S32]; -- -- (* Count leading zeros. *) -- Vclz, [], All (2, Dreg), "vclz", sign_invar_1, su_8_32; -- Vclz, [], All (2, Qreg), "vclzQ", sign_invar_1, su_8_32; -- -- (* Count number of set bits. *) -- Vcnt, [], All (2, Dreg), "vcnt", bits_1, [P8; S8; U8]; -- Vcnt, [], All (2, Qreg), "vcntQ", bits_1, [P8; S8; U8]; -- -- (* Reciprocal estimate. *) -- Vrecpe, [], All (2, Dreg), "vrecpe", elts_same_1, [U32; F32]; -- Vrecpe, [], All (2, Qreg), "vrecpeQ", elts_same_1, [U32; F32]; -- -- (* Reciprocal square-root estimate. *) -- Vrsqrte, [], All (2, Dreg), "vrsqrte", elts_same_1, [U32; F32]; -- Vrsqrte, [], All (2, Qreg), "vrsqrteQ", elts_same_1, [U32; F32]; -- -- (* Get lanes from a vector. *) -- Vget_lane, -- [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]]; -- Instruction_name ["vmov"]], -- Use_operands [| Corereg; Dreg; Immed |], -- "vget_lane", get_lane, pf_su_8_32; -- Vget_lane, -- [No_op; -- InfoWord; -- Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]]; -- Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], -- Use_operands [| Corereg; Dreg; Immed |], -- "vget_lane", notype_2, [S64; U64]; -- Vget_lane, -- [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]]; -- Instruction_name ["vmov"]], -- Use_operands [| Corereg; Qreg; Immed |], -- "vgetQ_lane", get_lane, pf_su_8_32; -- Vget_lane, -- [InfoWord; -- Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]]; -- Instruction_name ["vmov"; "fmrrd"]; Const_valuator (fun _ -> 0); -- Fixed_core_reg], -- Use_operands [| Corereg; Qreg; Immed |], -- "vgetQ_lane", notype_2, [S64; U64]; -- -- (* Set lanes in a vector. *) -- Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]]; -- Instruction_name ["vmov"]], -- Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane", -- set_lane, pf_su_8_32; -- Vset_lane, [No_op; -- Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]; -- Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], -- Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane", -- set_lane_notype, [S64; U64]; -- Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]]; -- Instruction_name ["vmov"]], -- Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane", -- set_lane, pf_su_8_32; -- Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]; -- Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], -- Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane", -- set_lane_notype, [S64; U64]; -- -- (* Create vector from literal bit pattern. *) -- Vcreate, -- [Requires_feature "CRYPTO"; No_op], (* Not really, but it can yield various things that are too -- hard for the test generator at this time. *) -- Use_operands [| Dreg; Corereg |], "vcreate", create_vector, -- [P64]; -- Vcreate, -- [No_op], (* Not really, but it can yield various things that are too -- hard for the test generator at this time. *) -- Use_operands [| Dreg; Corereg |], "vcreate", create_vector, -- pf_su_8_64; -- -- (* Set all lanes to the same value. *) -- Vdup_n, -- [Disassembles_as [Use_operands [| Dreg; -- Alternatives [ Corereg; -- Element_of_dreg ] |]]], -- Use_operands [| Dreg; Corereg |], "vdup_n", bits_1, -- pf_su_8_32; -- Vdup_n, -- [No_op; Requires_feature "CRYPTO"; -- Instruction_name ["vmov"]; -- Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]], -- Use_operands [| Dreg; Corereg |], "vdup_n", notype_1, -- [P64]; -- Vdup_n, -- [No_op; -- Instruction_name ["vmov"]; -- Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]], -- Use_operands [| Dreg; Corereg |], "vdup_n", notype_1, -- [S64; U64]; -- Vdup_n, -- [No_op; Requires_feature "CRYPTO"; -- Disassembles_as [Use_operands [| Qreg; -- Alternatives [ Corereg; -- Element_of_dreg ] |]]], -- Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1, -- [P64]; -- Vdup_n, -- [Disassembles_as [Use_operands [| Qreg; -- Alternatives [ Corereg; -- Element_of_dreg ] |]]], -- Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1, -- pf_su_8_32; -- Vdup_n, -- [No_op; -- Instruction_name ["vmov"]; -- Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]; -- Use_operands [| Dreg; Corereg; Corereg |]]], -- Use_operands [| Qreg; Corereg |], "vdupQ_n", notype_1, -- [S64; U64]; -- -- (* These are just aliases for the above. *) -- Vmov_n, -- [Builtin_name "vdup_n"; -- Disassembles_as [Use_operands [| Dreg; -- Alternatives [ Corereg; -- Element_of_dreg ] |]]], -- Use_operands [| Dreg; Corereg |], -- "vmov_n", bits_1, pf_su_8_32; -- Vmov_n, -- [No_op; -- Builtin_name "vdup_n"; -- Instruction_name ["vmov"]; -- Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]], -- Use_operands [| Dreg; Corereg |], -- "vmov_n", notype_1, [S64; U64]; -- Vmov_n, -- [Builtin_name "vdupQ_n"; -- Disassembles_as [Use_operands [| Qreg; -- Alternatives [ Corereg; -- Element_of_dreg ] |]]], -- Use_operands [| Qreg; Corereg |], -- "vmovQ_n", bits_1, pf_su_8_32; -- Vmov_n, -- [No_op; -- Builtin_name "vdupQ_n"; -- Instruction_name ["vmov"]; -- Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]; -- Use_operands [| Dreg; Corereg; Corereg |]]], -- Use_operands [| Qreg; Corereg |], -- "vmovQ_n", notype_1, [S64; U64]; -- -- (* Duplicate, lane version. We can't use Use_operands here because the -- rightmost register (always Dreg) would be picked up by find_key_operand, -- when we want the leftmost register to be used in this case (otherwise -- the modes are indistinguishable in neon.md, etc. *) -- Vdup_lane, -- [Disassembles_as [Use_operands [| Dreg; Element_of_dreg |]]], -- Unary_scalar Dreg, "vdup_lane", bits_2, pf_su_8_32; -- Vdup_lane, -- [No_op; Requires_feature "CRYPTO"; Const_valuator (fun _ -> 0)], -- Unary_scalar Dreg, "vdup_lane", bits_2, [P64]; -- Vdup_lane, -- [No_op; Const_valuator (fun _ -> 0)], -- Unary_scalar Dreg, "vdup_lane", bits_2, [S64; U64]; -- Vdup_lane, -- [Disassembles_as [Use_operands [| Qreg; Element_of_dreg |]]], -- Unary_scalar Qreg, "vdupQ_lane", bits_2, pf_su_8_32; -- Vdup_lane, -- [No_op; Requires_feature "CRYPTO"; Const_valuator (fun _ -> 0)], -- Unary_scalar Qreg, "vdupQ_lane", bits_2, [P64]; -- Vdup_lane, -- [No_op; Const_valuator (fun _ -> 0)], -- Unary_scalar Qreg, "vdupQ_lane", bits_2, [S64; U64]; -- -- (* Combining vectors. *) -- Vcombine, [Requires_feature "CRYPTO"; No_op], -- Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2, -- [P64]; -- Vcombine, [No_op], -- Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2, -- pf_su_8_64; -- -- (* Splitting vectors. *) -- Vget_high, [Requires_feature "CRYPTO"; No_op], -- Use_operands [| Dreg; Qreg |], "vget_high", -- notype_1, [P64]; -- Vget_high, [No_op], -- Use_operands [| Dreg; Qreg |], "vget_high", -- notype_1, pf_su_8_64; -- Vget_low, [Instruction_name ["vmov"]; -- Disassembles_as [Use_operands [| Dreg; Dreg |]]; -- Fixed_vector_reg], -- Use_operands [| Dreg; Qreg |], "vget_low", -- notype_1, pf_su_8_32; -- Vget_low, [Requires_feature "CRYPTO"; No_op], -- Use_operands [| Dreg; Qreg |], "vget_low", -- notype_1, [P64]; -- Vget_low, [No_op], -- Use_operands [| Dreg; Qreg |], "vget_low", -- notype_1, [S64; U64]; -- -- (* Conversions. *) -- Vcvt, [InfoWord], All (2, Dreg), "vcvt", conv_1, -- [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; -- Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1, -- [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; -- Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1], -- Use_operands [| Dreg; Qreg; |], "vcvt", conv_1, [Conv (F16, F32)]; -- Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1], -- Use_operands [| Qreg; Dreg; |], "vcvt", conv_1, [Conv (F32, F16)]; -- Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2, -- [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; -- Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2, -- [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; -- -- (* Move, narrowing. *) -- Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]], -- Narrow, "vmovn", sign_invar_1, su_16_64; -- Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating], -- Narrow, "vqmovn", elts_same_1, su_16_64; -- Vmovn, -- [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating; Dst_unsign], -- Narrow, "vqmovun", dst_unsign_1, -- [S16; S32; S64]; -- -- (* Move, long. *) -- Vmovl, [Disassembles_as [Use_operands [| Qreg; Dreg |]]], -- Long, "vmovl", elts_same_1, su_8_32; -- -- (* Table lookup. *) -- Vtbl 1, -- [Instruction_name ["vtbl"]; -- Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]], -- Use_operands [| Dreg; Dreg; Dreg |], "vtbl1", table_2, [U8; S8; P8]; -- Vtbl 2, [Instruction_name ["vtbl"]], -- Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbl2", table_2, -- [U8; S8; P8]; -- Vtbl 3, [Instruction_name ["vtbl"]], -- Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbl3", table_2, -- [U8; S8; P8]; -- Vtbl 4, [Instruction_name ["vtbl"]], -- Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbl4", table_2, -- [U8; S8; P8]; -- -- (* Extended table lookup. *) -- Vtbx 1, -- [Instruction_name ["vtbx"]; -- Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]], -- Use_operands [| Dreg; Dreg; Dreg |], "vtbx1", table_io, [U8; S8; P8]; -- Vtbx 2, [Instruction_name ["vtbx"]], -- Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbx2", table_io, -- [U8; S8; P8]; -- Vtbx 3, [Instruction_name ["vtbx"]], -- Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbx3", table_io, -- [U8; S8; P8]; -- Vtbx 4, [Instruction_name ["vtbx"]], -- Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbx4", table_io, -- [U8; S8; P8]; -- -- (* Multiply, lane. (note: these were undocumented at the time of -- writing). *) -- Vmul_lane, [], By_scalar Dreg, "vmul_lane", sign_invar_2_lane, -- [S16; S32; U16; U32; F32]; -- Vmul_lane, [], By_scalar Qreg, "vmulQ_lane", sign_invar_2_lane, -- [S16; S32; U16; U32; F32]; -- -- (* Multiply-accumulate, lane. *) -- Vmla_lane, [], By_scalar Dreg, "vmla_lane", sign_invar_io_lane, -- [S16; S32; U16; U32; F32]; -- Vmla_lane, [], By_scalar Qreg, "vmlaQ_lane", sign_invar_io_lane, -- [S16; S32; U16; U32; F32]; -- Vmla_lane, [], Wide_lane, "vmlal_lane", elts_same_io_lane, -- [S16; S32; U16; U32]; -- Vmla_lane, [Saturating; Doubling], Wide_lane, "vqdmlal_lane", -- elts_same_io_lane, [S16; S32]; -- -- (* Multiply-subtract, lane. *) -- Vmls_lane, [], By_scalar Dreg, "vmls_lane", sign_invar_io_lane, -- [S16; S32; U16; U32; F32]; -- Vmls_lane, [], By_scalar Qreg, "vmlsQ_lane", sign_invar_io_lane, -- [S16; S32; U16; U32; F32]; -- Vmls_lane, [], Wide_lane, "vmlsl_lane", elts_same_io_lane, -- [S16; S32; U16; U32]; -- Vmls_lane, [Saturating; Doubling], Wide_lane, "vqdmlsl_lane", -- elts_same_io_lane, [S16; S32]; -- -- (* Long multiply, lane. *) -- Vmull_lane, [], -- Wide_lane, "vmull_lane", elts_same_2_lane, [S16; S32; U16; U32]; -- -- (* Saturating doubling long multiply, lane. *) -- Vqdmull_lane, [Saturating; Doubling], -- Wide_lane, "vqdmull_lane", elts_same_2_lane, [S16; S32]; -- -- (* Saturating doubling long multiply high, lane. *) -- Vqdmulh_lane, [Saturating; Halving], -- By_scalar Qreg, "vqdmulhQ_lane", elts_same_2_lane, [S16; S32]; -- Vqdmulh_lane, [Saturating; Halving], -- By_scalar Dreg, "vqdmulh_lane", elts_same_2_lane, [S16; S32]; -- Vqdmulh_lane, [Saturating; Halving; Rounding; -- Instruction_name ["vqrdmulh"]], -- By_scalar Qreg, "vqRdmulhQ_lane", elts_same_2_lane, [S16; S32]; -- Vqdmulh_lane, [Saturating; Halving; Rounding; -- Instruction_name ["vqrdmulh"]], -- By_scalar Dreg, "vqRdmulh_lane", elts_same_2_lane, [S16; S32]; -- -- (* Vector multiply by scalar. *) -- Vmul_n, [InfoWord; -- Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], -- Use_operands [| Dreg; Dreg; Corereg |], "vmul_n", -- sign_invar_2, [S16; S32; U16; U32; F32]; -- Vmul_n, [InfoWord; -- Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], -- Use_operands [| Qreg; Qreg; Corereg |], "vmulQ_n", -- sign_invar_2, [S16; S32; U16; U32; F32]; -- -- (* Vector long multiply by scalar. *) -- Vmull_n, [Instruction_name ["vmull"]; -- Disassembles_as [Use_operands [| Qreg; Dreg; Element_of_dreg |]]], -- Wide_scalar, "vmull_n", -- elts_same_2, [S16; S32; U16; U32]; -- -- (* Vector saturating doubling long multiply by scalar. *) -- Vqdmull_n, [Saturating; Doubling; -- Disassembles_as [Use_operands [| Qreg; Dreg; -- Element_of_dreg |]]], -- Wide_scalar, "vqdmull_n", -- elts_same_2, [S16; S32]; -- -- (* Vector saturating doubling long multiply high by scalar. *) -- Vqdmulh_n, -- [Saturating; Halving; InfoWord; -- Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], -- Use_operands [| Qreg; Qreg; Corereg |], -- "vqdmulhQ_n", elts_same_2, [S16; S32]; -- Vqdmulh_n, -- [Saturating; Halving; InfoWord; -- Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], -- Use_operands [| Dreg; Dreg; Corereg |], -- "vqdmulh_n", elts_same_2, [S16; S32]; -- Vqdmulh_n, -- [Saturating; Halving; Rounding; InfoWord; -- Instruction_name ["vqrdmulh"]; -- Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], -- Use_operands [| Qreg; Qreg; Corereg |], -- "vqRdmulhQ_n", elts_same_2, [S16; S32]; -- Vqdmulh_n, -- [Saturating; Halving; Rounding; InfoWord; -- Instruction_name ["vqrdmulh"]; -- Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], -- Use_operands [| Dreg; Dreg; Corereg |], -- "vqRdmulh_n", elts_same_2, [S16; S32]; -- -- (* Vector multiply-accumulate by scalar. *) -- Vmla_n, [InfoWord; -- Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], -- Use_operands [| Dreg; Dreg; Corereg |], "vmla_n", -- sign_invar_io, [S16; S32; U16; U32; F32]; -- Vmla_n, [InfoWord; -- Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], -- Use_operands [| Qreg; Qreg; Corereg |], "vmlaQ_n", -- sign_invar_io, [S16; S32; U16; U32; F32]; -- Vmla_n, [], Wide_scalar, "vmlal_n", elts_same_io, [S16; S32; U16; U32]; -- Vmla_n, [Saturating; Doubling], Wide_scalar, "vqdmlal_n", elts_same_io, -- [S16; S32]; -- -- (* Vector multiply subtract by scalar. *) -- Vmls_n, [InfoWord; -- Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], -- Use_operands [| Dreg; Dreg; Corereg |], "vmls_n", -- sign_invar_io, [S16; S32; U16; U32; F32]; -- Vmls_n, [InfoWord; -- Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], -- Use_operands [| Qreg; Qreg; Corereg |], "vmlsQ_n", -- sign_invar_io, [S16; S32; U16; U32; F32]; -- Vmls_n, [], Wide_scalar, "vmlsl_n", elts_same_io, [S16; S32; U16; U32]; -- Vmls_n, [Saturating; Doubling], Wide_scalar, "vqdmlsl_n", elts_same_io, -- [S16; S32]; -- -- (* Vector extract. *) -- Vext, [Requires_feature "CRYPTO"; Const_valuator (fun _ -> 0)], -- Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend, -- [P64]; -- Vext, [Const_valuator (fun _ -> 0)], -- Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend, -- pf_su_8_64; -- Vext, [Requires_feature "CRYPTO"; Const_valuator (fun _ -> 0)], -- Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend, -- [P64]; -- Vext, [Const_valuator (fun _ -> 0)], -- Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend, -- pf_su_8_64; -- -- (* Reverse elements. *) -- Vrev64, [Use_shuffle (rev_elems 64)], All (2, Dreg), "vrev64", bits_1, -- P8 :: P16 :: F32 :: su_8_32; -- Vrev64, [Use_shuffle (rev_elems 64)], All (2, Qreg), "vrev64Q", bits_1, -- P8 :: P16 :: F32 :: su_8_32; -- Vrev32, [Use_shuffle (rev_elems 32)], All (2, Dreg), "vrev32", bits_1, -- [P8; P16; S8; U8; S16; U16]; -- Vrev32, [Use_shuffle (rev_elems 32)], All (2, Qreg), "vrev32Q", bits_1, -- [P8; P16; S8; U8; S16; U16]; -- Vrev16, [Use_shuffle (rev_elems 16)], All (2, Dreg), "vrev16", bits_1, -- [P8; S8; U8]; -- Vrev16, [Use_shuffle (rev_elems 16)], All (2, Qreg), "vrev16Q", bits_1, -- [P8; S8; U8]; -- -- (* Bit selection. *) -- Vbsl, -- [Requires_feature "CRYPTO"; Instruction_name ["vbsl"; "vbit"; "vbif"]; -- Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]], -- Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select, -- [P64]; -- Vbsl, -- [Instruction_name ["vbsl"; "vbit"; "vbif"]; -- Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]], -- Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select, -- pf_su_8_64; -- Vbsl, -- [Requires_feature "CRYPTO"; Instruction_name ["vbsl"; "vbit"; "vbif"]; -- Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]], -- Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select, -- [P64]; -- Vbsl, -- [Instruction_name ["vbsl"; "vbit"; "vbif"]; -- Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]], -- Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select, -- pf_su_8_64; -- -- Vtrn, [Use_shuffle trn_elems], Pair_result Dreg, "vtrn", bits_2, pf_su_8_16; -- Vtrn, [Use_shuffle trn_elems; Instruction_name ["vuzp"]], Pair_result Dreg, "vtrn", bits_2, suf_32; -- Vtrn, [Use_shuffle trn_elems], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32; -- (* Zip elements. *) -- Vzip, [Use_shuffle zip_elems], Pair_result Dreg, "vzip", bits_2, pf_su_8_16; -- Vzip, [Use_shuffle zip_elems; Instruction_name ["vuzp"]], Pair_result Dreg, "vzip", bits_2, suf_32; -- Vzip, [Use_shuffle zip_elems], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32; -- -- (* Unzip elements. *) -- Vuzp, [Use_shuffle uzip_elems], Pair_result Dreg, "vuzp", bits_2, -- pf_su_8_32; -- Vuzp, [Use_shuffle uzip_elems], Pair_result Qreg, "vuzpQ", bits_2, -- pf_su_8_32; -- -- (* Element/structure loads. VLD1 variants. *) -- Vldx 1, -- [Requires_feature "CRYPTO"; -- Disassembles_as [Use_operands [| VecArray (1, Dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1, -- [P64]; -- Vldx 1, -- [Disassembles_as [Use_operands [| VecArray (1, Dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1, -- pf_su_8_64; -- Vldx 1, [Requires_feature "CRYPTO"; -- Disassembles_as [Use_operands [| VecArray (2, Dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1, -- [P64]; -- Vldx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1, -- pf_su_8_64; -- -- Vldx_lane 1, -- [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |], -- "vld1_lane", bits_3, pf_su_8_32; -- Vldx_lane 1, -- [Requires_feature "CRYPTO"; -- Disassembles_as [Use_operands [| VecArray (1, Dreg); -- CstPtrTo Corereg |]]; -- Const_valuator (fun _ -> 0)], -- Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |], -- "vld1_lane", bits_3, [P64]; -- Vldx_lane 1, -- [Disassembles_as [Use_operands [| VecArray (1, Dreg); -- CstPtrTo Corereg |]]; -- Const_valuator (fun _ -> 0)], -- Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |], -- "vld1_lane", bits_3, [S64; U64]; -- Vldx_lane 1, -- [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |], -- "vld1Q_lane", bits_3, pf_su_8_32; -- Vldx_lane 1, -- [Requires_feature "CRYPTO"; -- Disassembles_as [Use_operands [| VecArray (1, Dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |], -- "vld1Q_lane", bits_3, [P64]; -- Vldx_lane 1, -- [Disassembles_as [Use_operands [| VecArray (1, Dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |], -- "vld1Q_lane", bits_3, [S64; U64]; -- -- Vldx_dup 1, -- [Disassembles_as [Use_operands [| VecArray (1, All_elements_of_dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup", -- bits_1, pf_su_8_32; -- Vldx_dup 1, -- [Requires_feature "CRYPTO"; -- Disassembles_as [Use_operands [| VecArray (1, Dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup", -- bits_1, [P64]; -- Vldx_dup 1, -- [Disassembles_as [Use_operands [| VecArray (1, Dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup", -- bits_1, [S64; U64]; -- Vldx_dup 1, -- [Disassembles_as [Use_operands [| VecArray (2, All_elements_of_dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup", -- bits_1, pf_su_8_32; -- (* Treated identically to vld1_dup above as we now -- do a single load followed by a duplicate. *) -- Vldx_dup 1, -- [Requires_feature "CRYPTO"; -- Disassembles_as [Use_operands [| VecArray (1, Dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup", -- bits_1, [P64]; -- Vldx_dup 1, -- [Disassembles_as [Use_operands [| VecArray (1, Dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup", -- bits_1, [S64; U64]; -- -- (* VST1 variants. *) -- Vstx 1, [Requires_feature "CRYPTO"; -- Disassembles_as [Use_operands [| VecArray (1, Dreg); -- PtrTo Corereg |]]], -- Use_operands [| PtrTo Corereg; Dreg |], "vst1", -- store_1, [P64]; -- Vstx 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg); -- PtrTo Corereg |]]], -- Use_operands [| PtrTo Corereg; Dreg |], "vst1", -- store_1, pf_su_8_64; -- Vstx 1, [Requires_feature "CRYPTO"; -- Disassembles_as [Use_operands [| VecArray (2, Dreg); -- PtrTo Corereg |]]], -- Use_operands [| PtrTo Corereg; Qreg |], "vst1Q", -- store_1, [P64]; -- Vstx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg); -- PtrTo Corereg |]]], -- Use_operands [| PtrTo Corereg; Qreg |], "vst1Q", -- store_1, pf_su_8_64; -- -- Vstx_lane 1, -- [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| PtrTo Corereg; Dreg; Immed |], -- "vst1_lane", store_3, pf_su_8_32; -- Vstx_lane 1, -- [Requires_feature "CRYPTO"; -- Disassembles_as [Use_operands [| VecArray (1, Dreg); -- CstPtrTo Corereg |]]; -- Const_valuator (fun _ -> 0)], -- Use_operands [| PtrTo Corereg; Dreg; Immed |], -- "vst1_lane", store_3, [P64]; -- Vstx_lane 1, -- [Disassembles_as [Use_operands [| VecArray (1, Dreg); -- CstPtrTo Corereg |]]; -- Const_valuator (fun _ -> 0)], -- Use_operands [| PtrTo Corereg; Dreg; Immed |], -- "vst1_lane", store_3, [U64; S64]; -- Vstx_lane 1, -- [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| PtrTo Corereg; Qreg; Immed |], -- "vst1Q_lane", store_3, pf_su_8_32; -- Vstx_lane 1, -- [Requires_feature "CRYPTO"; -- Disassembles_as [Use_operands [| VecArray (1, Dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| PtrTo Corereg; Qreg; Immed |], -- "vst1Q_lane", store_3, [P64]; -- Vstx_lane 1, -- [Disassembles_as [Use_operands [| VecArray (1, Dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| PtrTo Corereg; Qreg; Immed |], -- "vst1Q_lane", store_3, [U64; S64]; -- -- (* VLD2 variants. *) -- Vldx 2, [], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], -- "vld2", bits_1, pf_su_8_32; -- Vldx 2, [Requires_feature "CRYPTO"; Instruction_name ["vld1"]], -- Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], -- "vld2", bits_1, [P64]; -- Vldx 2, [Instruction_name ["vld1"]], -- Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], -- "vld2", bits_1, [S64; U64]; -- Vldx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); -- CstPtrTo Corereg |]; -- Use_operands [| VecArray (2, Dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg |], -- "vld2Q", bits_1, pf_su_8_32; -- -- Vldx_lane 2, -- [Disassembles_as [Use_operands -- [| VecArray (2, Element_of_dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg; -- VecArray (2, Dreg); Immed |], -- "vld2_lane", bits_3, P8 :: P16 :: F32 :: su_8_32; -- Vldx_lane 2, -- [Disassembles_as [Use_operands -- [| VecArray (2, Element_of_dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg; -- VecArray (2, Qreg); Immed |], -- "vld2Q_lane", bits_3, [P16; F32; U16; U32; S16; S32]; -- -- Vldx_dup 2, -- [Disassembles_as [Use_operands -- [| VecArray (2, All_elements_of_dreg); CstPtrTo Corereg |]]], -- Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], -- "vld2_dup", bits_1, pf_su_8_32; -- Vldx_dup 2, -- [Requires_feature "CRYPTO"; -- Instruction_name ["vld1"]; Disassembles_as [Use_operands -- [| VecArray (2, Dreg); CstPtrTo Corereg |]]], -- Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], -- "vld2_dup", bits_1, [P64]; -- Vldx_dup 2, -- [Instruction_name ["vld1"]; Disassembles_as [Use_operands -- [| VecArray (2, Dreg); CstPtrTo Corereg |]]], -- Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], -- "vld2_dup", bits_1, [S64; U64]; -- -- (* VST2 variants. *) -- Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); -- PtrTo Corereg |]]], -- Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2", -- store_1, pf_su_8_32; -- Vstx 2, [Requires_feature "CRYPTO"; -- Disassembles_as [Use_operands [| VecArray (2, Dreg); -- PtrTo Corereg |]]; -- Instruction_name ["vst1"]], -- Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2", -- store_1, [P64]; -- Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); -- PtrTo Corereg |]]; -- Instruction_name ["vst1"]], -- Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2", -- store_1, [S64; U64]; -- Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); -- PtrTo Corereg |]; -- Use_operands [| VecArray (2, Dreg); -- PtrTo Corereg |]]], -- Use_operands [| PtrTo Corereg; VecArray (2, Qreg) |], "vst2Q", -- store_1, pf_su_8_32; -- -- Vstx_lane 2, -- [Disassembles_as [Use_operands -- [| VecArray (2, Element_of_dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| PtrTo Corereg; VecArray (2, Dreg); Immed |], "vst2_lane", -- store_3, P8 :: P16 :: F32 :: su_8_32; -- Vstx_lane 2, -- [Disassembles_as [Use_operands -- [| VecArray (2, Element_of_dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| PtrTo Corereg; VecArray (2, Qreg); Immed |], "vst2Q_lane", -- store_3, [P16; F32; U16; U32; S16; S32]; -- -- (* VLD3 variants. *) -- Vldx 3, [], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], -- "vld3", bits_1, pf_su_8_32; -- Vldx 3, [Requires_feature "CRYPTO"; Instruction_name ["vld1"]], -- Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], -- "vld3", bits_1, [P64]; -- Vldx 3, [Instruction_name ["vld1"]], -- Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], -- "vld3", bits_1, [S64; U64]; -- Vldx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg); -- CstPtrTo Corereg |]; -- Use_operands [| VecArray (3, Dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg |], -- "vld3Q", bits_1, P8 :: P16 :: F32 :: su_8_32; -- -- Vldx_lane 3, -- [Disassembles_as [Use_operands -- [| VecArray (3, Element_of_dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg; -- VecArray (3, Dreg); Immed |], -- "vld3_lane", bits_3, P8 :: P16 :: F32 :: su_8_32; -- Vldx_lane 3, -- [Disassembles_as [Use_operands -- [| VecArray (3, Element_of_dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg; -- VecArray (3, Qreg); Immed |], -- "vld3Q_lane", bits_3, [P16; F32; U16; U32; S16; S32]; -- -- Vldx_dup 3, -- [Disassembles_as [Use_operands -- [| VecArray (3, All_elements_of_dreg); CstPtrTo Corereg |]]], -- Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], -- "vld3_dup", bits_1, pf_su_8_32; -- Vldx_dup 3, -- [Requires_feature "CRYPTO"; -- Instruction_name ["vld1"]; Disassembles_as [Use_operands -- [| VecArray (3, Dreg); CstPtrTo Corereg |]]], -- Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], -- "vld3_dup", bits_1, [P64]; -- Vldx_dup 3, -- [Instruction_name ["vld1"]; Disassembles_as [Use_operands -- [| VecArray (3, Dreg); CstPtrTo Corereg |]]], -- Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], -- "vld3_dup", bits_1, [S64; U64]; -- -- (* VST3 variants. *) -- Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg); -- PtrTo Corereg |]]], -- Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3", -- store_1, pf_su_8_32; -- Vstx 3, [Requires_feature "CRYPTO"; -- Disassembles_as [Use_operands [| VecArray (4, Dreg); -- PtrTo Corereg |]]; -- Instruction_name ["vst1"]], -- Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3", -- store_1, [P64]; -- Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg); -- PtrTo Corereg |]]; -- Instruction_name ["vst1"]], -- Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3", -- store_1, [S64; U64]; -- Vstx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg); -- PtrTo Corereg |]; -- Use_operands [| VecArray (3, Dreg); -- PtrTo Corereg |]]], -- Use_operands [| PtrTo Corereg; VecArray (3, Qreg) |], "vst3Q", -- store_1, pf_su_8_32; -- -- Vstx_lane 3, -- [Disassembles_as [Use_operands -- [| VecArray (3, Element_of_dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| PtrTo Corereg; VecArray (3, Dreg); Immed |], "vst3_lane", -- store_3, P8 :: P16 :: F32 :: su_8_32; -- Vstx_lane 3, -- [Disassembles_as [Use_operands -- [| VecArray (3, Element_of_dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| PtrTo Corereg; VecArray (3, Qreg); Immed |], "vst3Q_lane", -- store_3, [P16; F32; U16; U32; S16; S32]; -- -- (* VLD4/VST4 variants. *) -- Vldx 4, [], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], -- "vld4", bits_1, pf_su_8_32; -- Vldx 4, [Requires_feature "CRYPTO"; Instruction_name ["vld1"]], -- Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], -- "vld4", bits_1, [P64]; -- Vldx 4, [Instruction_name ["vld1"]], -- Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], -- "vld4", bits_1, [S64; U64]; -- Vldx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); -- CstPtrTo Corereg |]; -- Use_operands [| VecArray (4, Dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg |], -- "vld4Q", bits_1, P8 :: P16 :: F32 :: su_8_32; -- -- Vldx_lane 4, -- [Disassembles_as [Use_operands -- [| VecArray (4, Element_of_dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg; -- VecArray (4, Dreg); Immed |], -- "vld4_lane", bits_3, P8 :: P16 :: F32 :: su_8_32; -- Vldx_lane 4, -- [Disassembles_as [Use_operands -- [| VecArray (4, Element_of_dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg; -- VecArray (4, Qreg); Immed |], -- "vld4Q_lane", bits_3, [P16; F32; U16; U32; S16; S32]; -- -- Vldx_dup 4, -- [Disassembles_as [Use_operands -- [| VecArray (4, All_elements_of_dreg); CstPtrTo Corereg |]]], -- Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], -- "vld4_dup", bits_1, pf_su_8_32; -- Vldx_dup 4, -- [Requires_feature "CRYPTO"; -- Instruction_name ["vld1"]; Disassembles_as [Use_operands -- [| VecArray (4, Dreg); CstPtrTo Corereg |]]], -- Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], -- "vld4_dup", bits_1, [P64]; -- Vldx_dup 4, -- [Instruction_name ["vld1"]; Disassembles_as [Use_operands -- [| VecArray (4, Dreg); CstPtrTo Corereg |]]], -- Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], -- "vld4_dup", bits_1, [S64; U64]; -- -- Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); -- PtrTo Corereg |]]], -- Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4", -- store_1, pf_su_8_32; -- Vstx 4, [Requires_feature "CRYPTO"; -- Disassembles_as [Use_operands [| VecArray (4, Dreg); -- PtrTo Corereg |]]; -- Instruction_name ["vst1"]], -- Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4", -- store_1, [P64]; -- Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); -- PtrTo Corereg |]]; -- Instruction_name ["vst1"]], -- Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4", -- store_1, [S64; U64]; -- Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); -- PtrTo Corereg |]; -- Use_operands [| VecArray (4, Dreg); -- PtrTo Corereg |]]], -- Use_operands [| PtrTo Corereg; VecArray (4, Qreg) |], "vst4Q", -- store_1, pf_su_8_32; -- -- Vstx_lane 4, -- [Disassembles_as [Use_operands -- [| VecArray (4, Element_of_dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| PtrTo Corereg; VecArray (4, Dreg); Immed |], "vst4_lane", -- store_3, P8 :: P16 :: F32 :: su_8_32; -- Vstx_lane 4, -- [Disassembles_as [Use_operands -- [| VecArray (4, Element_of_dreg); -- CstPtrTo Corereg |]]], -- Use_operands [| PtrTo Corereg; VecArray (4, Qreg); Immed |], "vst4Q_lane", -- store_3, [P16; F32; U16; U32; S16; S32]; -- -- (* Logical operations. And. *) -- Vand, [], All (3, Dreg), "vand", notype_2, su_8_32; -- Vand, [No_op], All (3, Dreg), "vand", notype_2, [S64; U64]; -- Vand, [], All (3, Qreg), "vandQ", notype_2, su_8_64; -- -- (* Or. *) -- Vorr, [], All (3, Dreg), "vorr", notype_2, su_8_32; -- Vorr, [No_op], All (3, Dreg), "vorr", notype_2, [S64; U64]; -- Vorr, [], All (3, Qreg), "vorrQ", notype_2, su_8_64; -- -- (* Eor. *) -- Veor, [], All (3, Dreg), "veor", notype_2, su_8_32; -- Veor, [No_op], All (3, Dreg), "veor", notype_2, [S64; U64]; -- Veor, [], All (3, Qreg), "veorQ", notype_2, su_8_64; -- -- (* Bic (And-not). *) -- Vbic, [Compiler_optim "-O2"], All (3, Dreg), "vbic", notype_2, su_8_32; -- Vbic, [No_op; Compiler_optim "-O2"], All (3, Dreg), "vbic", notype_2, [S64; U64]; -- Vbic, [Compiler_optim "-O2"], All (3, Qreg), "vbicQ", notype_2, su_8_64; -- -- (* Or-not. *) -- Vorn, [Compiler_optim "-O2"], All (3, Dreg), "vorn", notype_2, su_8_32; -- Vorn, [No_op; Compiler_optim "-O2"], All (3, Dreg), "vorn", notype_2, [S64; U64]; -- Vorn, [Compiler_optim "-O2"], All (3, Qreg), "vornQ", notype_2, su_8_64; -- ] -- --let type_in_crypto_only t -- = (t == P64) || (t == P128) -- --let cross_product s1 s2 -- = List.filter (fun (e, e') -> e <> e') -- (List.concat (List.map (fun e1 -> List.map (fun e2 -> (e1,e2)) s1) s2)) -- --let reinterp = -- let elems = P8 :: P16 :: F32 :: P64 :: su_8_64 in -- let casts = cross_product elems elems in -- List.map -- (fun (convto, convfrom) -> -- Vreinterp, (if (type_in_crypto_only convto) || (type_in_crypto_only convfrom) -- then [Requires_feature "CRYPTO"] else []) @ [No_op], Use_operands [| Dreg; Dreg |], -- "vreinterpret", conv_1, [Cast (convto, convfrom)]) -- casts -- --let reinterpq = -- let elems = P8 :: P16 :: F32 :: P64 :: P128 :: su_8_64 in -- let casts = cross_product elems elems in -- List.map -- (fun (convto, convfrom) -> -- Vreinterp, (if (type_in_crypto_only convto) || (type_in_crypto_only convfrom) -- then [Requires_feature "CRYPTO"] else []) @ [No_op], Use_operands [| Qreg; Qreg |], -- "vreinterpretQ", conv_1, [Cast (convto, convfrom)]) -- casts -- --(* Output routines. *) -- --let rec string_of_elt = function -- S8 -> "s8" | S16 -> "s16" | S32 -> "s32" | S64 -> "s64" -- | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64" -- | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64" -- | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64" -- | F16 -> "f16" | F32 -> "f32" | P8 -> "p8" | P16 -> "p16" -- | P64 -> "p64" | P128 -> "p128" -- | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b -- | NoElts -> failwith "No elts" -- --let string_of_elt_dots elt = -- match elt with -- Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "." ^ string_of_elt b -- | _ -> string_of_elt elt -- --let string_of_vectype vt = -- let rec name affix = function -- T_int8x8 -> affix "int8x8" -- | T_int8x16 -> affix "int8x16" -- | T_int16x4 -> affix "int16x4" -- | T_int16x8 -> affix "int16x8" -- | T_int32x2 -> affix "int32x2" -- | T_int32x4 -> affix "int32x4" -- | T_int64x1 -> affix "int64x1" -- | T_int64x2 -> affix "int64x2" -- | T_uint8x8 -> affix "uint8x8" -- | T_uint8x16 -> affix "uint8x16" -- | T_uint16x4 -> affix "uint16x4" -- | T_uint16x8 -> affix "uint16x8" -- | T_uint32x2 -> affix "uint32x2" -- | T_uint32x4 -> affix "uint32x4" -- | T_uint64x1 -> affix "uint64x1" -- | T_uint64x2 -> affix "uint64x2" -- | T_float16x4 -> affix "float16x4" -- | T_float32x2 -> affix "float32x2" -- | T_float32x4 -> affix "float32x4" -- | T_poly8x8 -> affix "poly8x8" -- | T_poly8x16 -> affix "poly8x16" -- | T_poly16x4 -> affix "poly16x4" -- | T_poly16x8 -> affix "poly16x8" -- | T_int8 -> affix "int8" -- | T_int16 -> affix "int16" -- | T_int32 -> affix "int32" -- | T_int64 -> affix "int64" -- | T_uint8 -> affix "uint8" -- | T_uint16 -> affix "uint16" -- | T_uint32 -> affix "uint32" -- | T_uint64 -> affix "uint64" -- | T_poly8 -> affix "poly8" -- | T_poly16 -> affix "poly16" -- | T_poly64 -> affix "poly64" -- | T_poly64x1 -> affix "poly64x1" -- | T_poly64x2 -> affix "poly64x2" -- | T_poly128 -> affix "poly128" -- | T_float16 -> affix "float16" -- | T_float32 -> affix "float32" -- | T_immediate _ -> "const int" -- | T_void -> "void" -- | T_intQI -> "__builtin_neon_qi" -- | T_intHI -> "__builtin_neon_hi" -- | T_intSI -> "__builtin_neon_si" -- | T_intDI -> "__builtin_neon_di" -- | T_intTI -> "__builtin_neon_ti" -- | T_floatHF -> "__builtin_neon_hf" -- | T_floatSF -> "__builtin_neon_sf" -- | T_arrayof (num, base) -> -- let basename = name (fun x -> x) base in -- affix (Printf.sprintf "%sx%d" basename num) -- | T_ptrto x -> -- let basename = name affix x in -- Printf.sprintf "%s *" basename -- | T_const x -> -- let basename = name affix x in -- Printf.sprintf "const %s" basename -- in -- name (fun x -> x ^ "_t") vt -- --let string_of_inttype = function -- B_TImode -> "__builtin_neon_ti" -- | B_EImode -> "__builtin_neon_ei" -- | B_OImode -> "__builtin_neon_oi" -- | B_CImode -> "__builtin_neon_ci" -- | B_XImode -> "__builtin_neon_xi" -- --let string_of_mode = function -- V8QI -> "v8qi" | V4HI -> "v4hi" | V4HF -> "v4hf" | V2SI -> "v2si" -- | V2SF -> "v2sf" | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" -- | V4SI -> "v4si" | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" -- | HI -> "hi" | SI -> "si" | SF -> "sf" | TI -> "ti" -- --(* Use uppercase chars for letters which form part of the intrinsic name, but -- should be omitted from the builtin name (the info is passed in an extra -- argument, instead). *) --let intrinsic_name name = String.lowercase name -- --(* Allow the name of the builtin to be overridden by things (e.g. Flipped) -- found in the features list. *) --let builtin_name features name = -- let name = List.fold_right -- (fun el name -> -- match el with -- Flipped x | Builtin_name x -> x -- | _ -> name) -- features name in -- let islower x = let str = String.make 1 x in (String.lowercase str) = str -- and buf = Buffer.create (String.length name) in -- String.iter (fun c -> if islower c then Buffer.add_char buf c) name; -- Buffer.contents buf -- --(* Transform an arity into a list of strings. *) --let strings_of_arity a = -- match a with -- | Arity0 vt -> [string_of_vectype vt] -- | Arity1 (vt1, vt2) -> [string_of_vectype vt1; string_of_vectype vt2] -- | Arity2 (vt1, vt2, vt3) -> [string_of_vectype vt1; -- string_of_vectype vt2; -- string_of_vectype vt3] -- | Arity3 (vt1, vt2, vt3, vt4) -> [string_of_vectype vt1; -- string_of_vectype vt2; -- string_of_vectype vt3; -- string_of_vectype vt4] -- | Arity4 (vt1, vt2, vt3, vt4, vt5) -> [string_of_vectype vt1; -- string_of_vectype vt2; -- string_of_vectype vt3; -- string_of_vectype vt4; -- string_of_vectype vt5] -- --(* Suffixes on the end of builtin names that are to be stripped in order -- to obtain the name used as an instruction. They are only stripped if -- preceded immediately by an underscore. *) --let suffixes_to_strip = [ "n"; "lane"; "dup" ] -- --(* Get the possible names of an instruction corresponding to a "name" from the -- ops table. This is done by getting the equivalent builtin name and -- stripping any suffixes from the list at the top of this file, unless -- the features list presents with an Instruction_name entry, in which -- case that is used; or unless the features list presents with a Flipped -- entry, in which case that is used. If both such entries are present, -- the first in the list will be chosen. *) --let get_insn_names features name = -- let names = try -- begin -- match List.find (fun feature -> match feature with -- Instruction_name _ -> true -- | Flipped _ -> true -- | _ -> false) features -- with -- Instruction_name names -> names -- | Flipped name -> [name] -- | _ -> assert false -- end -- with Not_found -> [builtin_name features name] -- in -- begin -- List.map (fun name' -> -- try -- let underscore = String.rindex name' '_' in -- let our_suffix = String.sub name' (underscore + 1) -- ((String.length name') - underscore - 1) -- in -- let rec strip remaining_suffixes = -- match remaining_suffixes with -- [] -> name' -- | s::ss when our_suffix = s -> String.sub name' 0 underscore -- | _::ss -> strip ss -- in -- strip suffixes_to_strip -- with (Not_found | Invalid_argument _) -> name') names -- end -- --(* Apply a function to each element of a list and then comma-separate -- the resulting strings. *) --let rec commas f elts acc = -- match elts with -- [] -> acc -- | [elt] -> acc ^ (f elt) -- | elt::elts -> -- commas f elts (acc ^ (f elt) ^ ", ") -- --(* Given a list of features and the shape specified in the "ops" table, apply -- a function to each possible shape that the instruction may have. -- By default, this is the "shape" entry in "ops". If the features list -- contains a Disassembles_as entry, the shapes contained in that entry are -- mapped to corresponding outputs and returned in a list. If there is more -- than one Disassembles_as entry, only the first is used. *) --let analyze_all_shapes features shape f = -- try -- match List.find (fun feature -> -- match feature with Disassembles_as _ -> true -- | _ -> false) -- features with -- Disassembles_as shapes -> List.map f shapes -- | _ -> assert false -- with Not_found -> [f shape] -- --(* The crypto intrinsics have unconventional shapes and are not that -- numerous to be worth the trouble of encoding here. We implement them -- explicitly here. *) --let crypto_intrinsics = --" --#ifdef __ARM_FEATURE_CRYPTO -- --__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) --vldrq_p128 (poly128_t const * __ptr) --{ --#ifdef __ARM_BIG_ENDIAN -- poly64_t* __ptmp = (poly64_t*) __ptr; -- poly64_t __d0 = vld1_p64 (__ptmp); -- poly64_t __d1 = vld1_p64 (__ptmp + 1); -- return vreinterpretq_p128_p64 (vcombine_p64 (__d1, __d0)); --#else -- return vreinterpretq_p128_p64 (vld1q_p64 ((poly64_t*) __ptr)); --#endif --} -- --__extension__ static __inline void __attribute__ ((__always_inline__)) --vstrq_p128 (poly128_t * __ptr, poly128_t __val) --{ --#ifdef __ARM_BIG_ENDIAN -- poly64x2_t __tmp = vreinterpretq_p64_p128 (__val); -- poly64_t __d0 = vget_high_p64 (__tmp); -- poly64_t __d1 = vget_low_p64 (__tmp); -- vst1q_p64 ((poly64_t*) __ptr, vcombine_p64 (__d0, __d1)); --#else -- vst1q_p64 ((poly64_t*) __ptr, vreinterpretq_p64_p128 (__val)); --#endif --} -- --/* The vceq_p64 intrinsic does not map to a single instruction. -- Instead we emulate it by performing a 32-bit variant of the vceq -- and applying a pairwise min reduction to the result. -- vceq_u32 will produce two 32-bit halves, each of which will contain either -- all ones or all zeros depending on whether the corresponding 32-bit -- halves of the poly64_t were equal. The whole poly64_t values are equal -- if and only if both halves are equal, i.e. vceq_u32 returns all ones. -- If the result is all zeroes for any half then the whole result is zeroes. -- This is what the pairwise min reduction achieves. */ -- --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vceq_p64 (poly64x1_t __a, poly64x1_t __b) --{ -- uint32x2_t __t_a = vreinterpret_u32_p64 (__a); -- uint32x2_t __t_b = vreinterpret_u32_p64 (__b); -- uint32x2_t __c = vceq_u32 (__t_a, __t_b); -- uint32x2_t __m = vpmin_u32 (__c, __c); -- return vreinterpret_u64_u32 (__m); --} -- --/* The vtst_p64 intrinsic does not map to a single instruction. -- We emulate it in way similar to vceq_p64 above but here we do -- a reduction with max since if any two corresponding bits -- in the two poly64_t's match, then the whole result must be all ones. */ -- --__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) --vtst_p64 (poly64x1_t __a, poly64x1_t __b) --{ -- uint32x2_t __t_a = vreinterpret_u32_p64 (__a); -- uint32x2_t __t_b = vreinterpret_u32_p64 (__b); -- uint32x2_t __c = vtst_u32 (__t_a, __t_b); -- uint32x2_t __m = vpmax_u32 (__c, __c); -- return vreinterpret_u64_u32 (__m); --} -- --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vaeseq_u8 (uint8x16_t __data, uint8x16_t __key) --{ -- return __builtin_arm_crypto_aese (__data, __key); --} -- --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vaesdq_u8 (uint8x16_t __data, uint8x16_t __key) --{ -- return __builtin_arm_crypto_aesd (__data, __key); --} -- --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vaesmcq_u8 (uint8x16_t __data) --{ -- return __builtin_arm_crypto_aesmc (__data); --} -- --__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) --vaesimcq_u8 (uint8x16_t __data) --{ -- return __builtin_arm_crypto_aesimc (__data); --} -- --__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) --vsha1h_u32 (uint32_t __hash_e) --{ -- uint32x4_t __t = vdupq_n_u32 (0); -- __t = vsetq_lane_u32 (__hash_e, __t, 0); -- __t = __builtin_arm_crypto_sha1h (__t); -- return vgetq_lane_u32 (__t, 0); --} -- --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vsha1cq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) --{ -- uint32x4_t __t = vdupq_n_u32 (0); -- __t = vsetq_lane_u32 (__hash_e, __t, 0); -- return __builtin_arm_crypto_sha1c (__hash_abcd, __t, __wk); --} -- --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vsha1pq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) --{ -- uint32x4_t __t = vdupq_n_u32 (0); -- __t = vsetq_lane_u32 (__hash_e, __t, 0); -- return __builtin_arm_crypto_sha1p (__hash_abcd, __t, __wk); --} -- --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vsha1mq_u32 (uint32x4_t __hash_abcd, uint32_t __hash_e, uint32x4_t __wk) --{ -- uint32x4_t __t = vdupq_n_u32 (0); -- __t = vsetq_lane_u32 (__hash_e, __t, 0); -- return __builtin_arm_crypto_sha1m (__hash_abcd, __t, __wk); --} -- --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vsha1su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7, uint32x4_t __w8_11) --{ -- return __builtin_arm_crypto_sha1su0 (__w0_3, __w4_7, __w8_11); --} -- --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vsha1su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w12_15) --{ -- return __builtin_arm_crypto_sha1su1 (__tw0_3, __w12_15); --} -- --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vsha256hq_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk) --{ -- return __builtin_arm_crypto_sha256h (__hash_abcd, __hash_efgh, __wk); --} -- --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vsha256h2q_u32 (uint32x4_t __hash_abcd, uint32x4_t __hash_efgh, uint32x4_t __wk) --{ -- return __builtin_arm_crypto_sha256h2 (__hash_abcd, __hash_efgh, __wk); --} -- --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vsha256su0q_u32 (uint32x4_t __w0_3, uint32x4_t __w4_7) --{ -- return __builtin_arm_crypto_sha256su0 (__w0_3, __w4_7); --} -- --__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) --vsha256su1q_u32 (uint32x4_t __tw0_3, uint32x4_t __w8_11, uint32x4_t __w12_15) --{ -- return __builtin_arm_crypto_sha256su1 (__tw0_3, __w8_11, __w12_15); --} -- --__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) --vmull_p64 (poly64_t __a, poly64_t __b) --{ -- return (poly128_t) __builtin_arm_crypto_vmullp64 ((uint64_t) __a, (uint64_t) __b); --} -- --__extension__ static __inline poly128_t __attribute__ ((__always_inline__)) --vmull_high_p64 (poly64x2_t __a, poly64x2_t __b) --{ -- poly64_t __t1 = vget_high_p64 (__a); -- poly64_t __t2 = vget_high_p64 (__b); -- -- return (poly128_t) __builtin_arm_crypto_vmullp64 ((uint64_t) __t1, (uint64_t) __t2); --} -- --#endif --" ---- a/src/gcc/config/arm/predicates.md -+++ b/src/gcc/config/arm/predicates.md -@@ -141,8 +141,7 @@ - (match_test "const_ok_for_arm (~INTVAL (op))"))) - - (define_predicate "const0_operand" -- (and (match_code "const_int") -- (match_test "INTVAL (op) == 0"))) -+ (match_test "op == CONST0_RTX (mode)")) - - ;; Something valid on the RHS of an ARM data-processing instruction - (define_predicate "arm_rhs_operand" -@@ -170,8 +169,7 @@ - - (define_predicate "const_neon_scalar_shift_amount_operand" - (and (match_code "const_int") -- (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) <= GET_MODE_BITSIZE (mode) -- && ((unsigned HOST_WIDE_INT) INTVAL (op)) > 0"))) -+ (match_test "IN_RANGE (UINTVAL (op), 1, GET_MODE_BITSIZE (mode))"))) - - (define_predicate "ldrd_strd_offset_operand" - (and (match_operand 0 "const_int_operand") -@@ -243,11 +241,6 @@ - (and (match_code "const_double") - (match_test "arm_const_double_rtx (op)")))) - --(define_predicate "arm_float_compare_operand" -- (if_then_else (match_test "TARGET_VFP") -- (match_operand 0 "vfp_compare_operand") -- (match_operand 0 "s_register_operand"))) -- - ;; True for valid index operands. - (define_predicate "index_operand" - (ior (match_operand 0 "s_register_operand") -@@ -285,19 +278,19 @@ - (match_test "power_of_two_operand (XEXP (op, 1), mode)")) - (and (match_code "rotate") - (match_test "CONST_INT_P (XEXP (op, 1)) -- && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))) -+ && (UINTVAL (XEXP (op, 1))) < 32"))) - (and (match_code "ashift,ashiftrt,lshiftrt,rotatert") - (match_test "!CONST_INT_P (XEXP (op, 1)) -- || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))) -+ || (UINTVAL (XEXP (op, 1))) < 32"))) - (match_test "mode == GET_MODE (op)"))) - - (define_special_predicate "shift_nomul_operator" - (and (ior (and (match_code "rotate") - (match_test "CONST_INT_P (XEXP (op, 1)) -- && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")) -+ && (UINTVAL (XEXP (op, 1))) < 32")) - (and (match_code "ashift,ashiftrt,lshiftrt,rotatert") - (match_test "!CONST_INT_P (XEXP (op, 1)) -- || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))) -+ || (UINTVAL (XEXP (op, 1))) < 32"))) - (match_test "mode == GET_MODE (op)"))) - - ;; True for shift operators which can be used with saturation instructions. -@@ -306,7 +299,7 @@ - (match_test "power_of_two_operand (XEXP (op, 1), mode)")) - (and (match_code "ashift,ashiftrt") - (match_test "CONST_INT_P (XEXP (op, 1)) -- && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1)) < 32)"))) -+ && (UINTVAL (XEXP (op, 1)) < 32)"))) - (match_test "mode == GET_MODE (op)"))) - - ;; True for MULT, to identify which variant of shift_operator is in use. -@@ -398,6 +391,12 @@ - || mode == CC_DGTUmode)); - }) - -+;; Any register, including CC -+(define_predicate "cc_register_operand" -+ (and (match_code "reg") -+ (ior (match_operand 0 "s_register_operand") -+ (match_operand 0 "cc_register")))) -+ - (define_special_predicate "arm_extendqisi_mem_op" - (and (match_operand 0 "memory_operand") - (match_test "TARGET_ARM ? arm_legitimate_address_outer_p (mode, -@@ -532,7 +531,7 @@ - (ior (and (match_code "reg,subreg") - (match_operand 0 "s_register_operand")) - (and (match_code "const_int") -- (match_test "((unsigned HOST_WIDE_INT) INTVAL (op)) < 256")))) -+ (match_test "(UINTVAL (op)) < 256")))) - - (define_predicate "thumb1_cmpneg_operand" - (and (match_code "const_int") -@@ -612,69 +611,23 @@ - (define_special_predicate "vect_par_constant_high" - (match_code "parallel") - { -- HOST_WIDE_INT count = XVECLEN (op, 0); -- int i; -- int base = GET_MODE_NUNITS (mode); -- -- if ((count < 1) -- || (count != base/2)) -- return false; -- -- if (!VECTOR_MODE_P (mode)) -- return false; -- -- for (i = 0; i < count; i++) -- { -- rtx elt = XVECEXP (op, 0, i); -- int val; -- -- if (!CONST_INT_P (elt)) -- return false; -- -- val = INTVAL (elt); -- if (val != (base/2) + i) -- return false; -- } -- return true; -+ return arm_simd_check_vect_par_cnst_half_p (op, mode, true); - }) - - (define_special_predicate "vect_par_constant_low" - (match_code "parallel") - { -- HOST_WIDE_INT count = XVECLEN (op, 0); -- int i; -- int base = GET_MODE_NUNITS (mode); -- -- if ((count < 1) -- || (count != base/2)) -- return false; -- -- if (!VECTOR_MODE_P (mode)) -- return false; -- -- for (i = 0; i < count; i++) -- { -- rtx elt = XVECEXP (op, 0, i); -- int val; -- -- if (!CONST_INT_P (elt)) -- return false; -- -- val = INTVAL (elt); -- if (val != i) -- return false; -- } -- return true; -+ return arm_simd_check_vect_par_cnst_half_p (op, mode, false); - }) - - (define_predicate "const_double_vcvt_power_of_two_reciprocal" - (and (match_code "const_double") -- (match_test "TARGET_32BIT && TARGET_VFP -- && vfp3_const_double_for_fract_bits (op)"))) -+ (match_test "TARGET_32BIT -+ && vfp3_const_double_for_fract_bits (op)"))) - - (define_predicate "const_double_vcvt_power_of_two" - (and (match_code "const_double") -- (match_test "TARGET_32BIT && TARGET_VFP -+ (match_test "TARGET_32BIT - && vfp3_const_double_for_bits (op) > 0"))) - - (define_predicate "neon_struct_operand" ---- a/src/gcc/config/arm/sync.md -+++ b/src/gcc/config/arm/sync.md -@@ -63,37 +63,59 @@ - (set_attr "predicable" "no")]) - - (define_insn "atomic_load<mode>" -- [(set (match_operand:QHSI 0 "register_operand" "=r") -+ [(set (match_operand:QHSI 0 "register_operand" "=r,r,l") - (unspec_volatile:QHSI -- [(match_operand:QHSI 1 "arm_sync_memory_operand" "Q") -- (match_operand:SI 2 "const_int_operand")] ;; model -+ [(match_operand:QHSI 1 "arm_sync_memory_operand" "Q,Q,Q") -+ (match_operand:SI 2 "const_int_operand" "n,Pf,n")] ;; model - VUNSPEC_LDA))] - "TARGET_HAVE_LDACQ" - { - enum memmodel model = memmodel_from_int (INTVAL (operands[2])); - if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model)) -- return \"ldr<sync_sfx>%?\\t%0, %1\"; -+ { -+ if (TARGET_THUMB1) -+ return \"ldr<sync_sfx>\\t%0, %1\"; -+ else -+ return \"ldr<sync_sfx>%?\\t%0, %1\"; -+ } - else -- return \"lda<sync_sfx>%?\\t%0, %1\"; -+ { -+ if (TARGET_THUMB1) -+ return \"lda<sync_sfx>\\t%0, %1\"; -+ else -+ return \"lda<sync_sfx>%?\\t%0, %1\"; -+ } - } -- [(set_attr "predicable" "yes") -+ [(set_attr "arch" "32,v8mb,any") -+ (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")]) - - (define_insn "atomic_store<mode>" -- [(set (match_operand:QHSI 0 "memory_operand" "=Q") -+ [(set (match_operand:QHSI 0 "memory_operand" "=Q,Q,Q") - (unspec_volatile:QHSI -- [(match_operand:QHSI 1 "general_operand" "r") -- (match_operand:SI 2 "const_int_operand")] ;; model -+ [(match_operand:QHSI 1 "general_operand" "r,r,l") -+ (match_operand:SI 2 "const_int_operand" "n,Pf,n")] ;; model - VUNSPEC_STL))] - "TARGET_HAVE_LDACQ" - { - enum memmodel model = memmodel_from_int (INTVAL (operands[2])); - if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model)) -- return \"str<sync_sfx>%?\t%1, %0\"; -+ { -+ if (TARGET_THUMB1) -+ return \"str<sync_sfx>\t%1, %0\"; -+ else -+ return \"str<sync_sfx>%?\t%1, %0\"; -+ } - else -- return \"stl<sync_sfx>%?\t%1, %0\"; -+ { -+ if (TARGET_THUMB1) -+ return \"stl<sync_sfx>\t%1, %0\"; -+ else -+ return \"stl<sync_sfx>%?\t%1, %0\"; -+ } - } -- [(set_attr "predicable" "yes") -+ [(set_attr "arch" "32,v8mb,any") -+ (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")]) - - ;; An LDRD instruction usable by the atomic_loaddi expander on LPAE targets -@@ -117,7 +139,7 @@ - [(match_operand:DI 0 "s_register_operand") ;; val out - (match_operand:DI 1 "mem_noofs_operand") ;; memory - (match_operand:SI 2 "const_int_operand")] ;; model -- "(TARGET_HAVE_LDREXD || TARGET_HAVE_LPAE || TARGET_HAVE_LDACQ) -+ "(TARGET_HAVE_LDREXD || TARGET_HAVE_LPAE || TARGET_HAVE_LDACQEXD) - && ARM_DOUBLEWORD_ALIGN" - { - memmodel model = memmodel_from_int (INTVAL (operands[2])); -@@ -125,7 +147,7 @@ - /* For ARMv8-A we can use an LDAEXD to atomically load two 32-bit registers - when acquire or stronger semantics are needed. When the relaxed model is - used this can be relaxed to a normal LDRD. */ -- if (TARGET_HAVE_LDACQ) -+ if (TARGET_HAVE_LDACQEXD) - { - if (is_mm_relaxed (model)) - emit_insn (gen_arm_atomic_loaddi2_ldrd (operands[0], operands[1])); -@@ -167,21 +189,23 @@ - DONE; - }) - -+;; Constraints of this pattern must be at least as strict as those of the -+;; cbranchsi operations in thumb1.md and aim to be as permissive. - (define_insn_and_split "atomic_compare_and_swap<mode>_1" -- [(set (reg:CC_Z CC_REGNUM) ;; bool out -+ [(set (match_operand 0 "cc_register_operand" "=&c,&l,&l,&l") ;; bool out - (unspec_volatile:CC_Z [(const_int 0)] VUNSPEC_ATOMIC_CAS)) -- (set (match_operand:SI 0 "s_register_operand" "=&r") ;; val out -+ (set (match_operand:SI 1 "s_register_operand" "=&r,&l,&0,&l*h") ;; val out - (zero_extend:SI -- (match_operand:NARROW 1 "mem_noofs_operand" "+Ua"))) ;; memory -- (set (match_dup 1) -+ (match_operand:NARROW 2 "mem_noofs_operand" "+Ua,Ua,Ua,Ua"))) ;; memory -+ (set (match_dup 2) - (unspec_volatile:NARROW -- [(match_operand:SI 2 "arm_add_operand" "rIL") ;; expected -- (match_operand:NARROW 3 "s_register_operand" "r") ;; desired -- (match_operand:SI 4 "const_int_operand") ;; is_weak -- (match_operand:SI 5 "const_int_operand") ;; mod_s -- (match_operand:SI 6 "const_int_operand")] ;; mod_f -+ [(match_operand:SI 3 "arm_add_operand" "rIL,lIL*h,J,*r") ;; expected -+ (match_operand:NARROW 4 "s_register_operand" "r,r,r,r") ;; desired -+ (match_operand:SI 5 "const_int_operand") ;; is_weak -+ (match_operand:SI 6 "const_int_operand") ;; mod_s -+ (match_operand:SI 7 "const_int_operand")] ;; mod_f - VUNSPEC_ATOMIC_CAS)) -- (clobber (match_scratch:SI 7 "=&r"))] -+ (clobber (match_scratch:SI 8 "=&r,X,X,X"))] - "<sync_predtab>" - "#" - "&& reload_completed" -@@ -189,27 +213,30 @@ - { - arm_split_compare_and_swap (operands); - DONE; -- }) -+ } -+ [(set_attr "arch" "32,v8mb,v8mb,v8mb")]) - - (define_mode_attr cas_cmp_operand - [(SI "arm_add_operand") (DI "cmpdi_operand")]) - (define_mode_attr cas_cmp_str - [(SI "rIL") (DI "rDi")]) - -+;; Constraints of this pattern must be at least as strict as those of the -+;; cbranchsi operations in thumb1.md and aim to be as permissive. - (define_insn_and_split "atomic_compare_and_swap<mode>_1" -- [(set (reg:CC_Z CC_REGNUM) ;; bool out -+ [(set (match_operand 0 "cc_register_operand" "=&c,&l,&l,&l") ;; bool out - (unspec_volatile:CC_Z [(const_int 0)] VUNSPEC_ATOMIC_CAS)) -- (set (match_operand:SIDI 0 "s_register_operand" "=&r") ;; val out -- (match_operand:SIDI 1 "mem_noofs_operand" "+Ua")) ;; memory -- (set (match_dup 1) -+ (set (match_operand:SIDI 1 "s_register_operand" "=&r,&l,&0,&l*h") ;; val out -+ (match_operand:SIDI 2 "mem_noofs_operand" "+Ua,Ua,Ua,Ua")) ;; memory -+ (set (match_dup 2) - (unspec_volatile:SIDI -- [(match_operand:SIDI 2 "<cas_cmp_operand>" "<cas_cmp_str>") ;; expect -- (match_operand:SIDI 3 "s_register_operand" "r") ;; desired -- (match_operand:SI 4 "const_int_operand") ;; is_weak -- (match_operand:SI 5 "const_int_operand") ;; mod_s -- (match_operand:SI 6 "const_int_operand")] ;; mod_f -+ [(match_operand:SIDI 3 "<cas_cmp_operand>" "<cas_cmp_str>,lIL*h,J,*r") ;; expect -+ (match_operand:SIDI 4 "s_register_operand" "r,r,r,r") ;; desired -+ (match_operand:SI 5 "const_int_operand") ;; is_weak -+ (match_operand:SI 6 "const_int_operand") ;; mod_s -+ (match_operand:SI 7 "const_int_operand")] ;; mod_f - VUNSPEC_ATOMIC_CAS)) -- (clobber (match_scratch:SI 7 "=&r"))] -+ (clobber (match_scratch:SI 8 "=&r,X,X,X"))] - "<sync_predtab>" - "#" - "&& reload_completed" -@@ -217,18 +244,19 @@ - { - arm_split_compare_and_swap (operands); - DONE; -- }) -+ } -+ [(set_attr "arch" "32,v8mb,v8mb,v8mb")]) - - (define_insn_and_split "atomic_exchange<mode>" -- [(set (match_operand:QHSD 0 "s_register_operand" "=&r") ;; output -- (match_operand:QHSD 1 "mem_noofs_operand" "+Ua")) ;; memory -+ [(set (match_operand:QHSD 0 "s_register_operand" "=&r,&r") ;; output -+ (match_operand:QHSD 1 "mem_noofs_operand" "+Ua,Ua")) ;; memory - (set (match_dup 1) - (unspec_volatile:QHSD -- [(match_operand:QHSD 2 "s_register_operand" "r") ;; input -+ [(match_operand:QHSD 2 "s_register_operand" "r,r") ;; input - (match_operand:SI 3 "const_int_operand" "")] ;; model - VUNSPEC_ATOMIC_XCHG)) - (clobber (reg:CC CC_REGNUM)) -- (clobber (match_scratch:SI 4 "=&r"))] -+ (clobber (match_scratch:SI 4 "=&r,&l"))] - "<sync_predtab>" - "#" - "&& reload_completed" -@@ -237,7 +265,11 @@ - arm_split_atomic_op (SET, operands[0], NULL, operands[1], - operands[2], operands[3], operands[4]); - DONE; -- }) -+ } -+ [(set_attr "arch" "32,v8mb")]) -+ -+;; The following mode and code attribute are defined here because they are -+;; specific to atomics and are not needed anywhere else. - - (define_mode_attr atomic_op_operand - [(QI "reg_or_int_operand") -@@ -248,16 +280,24 @@ - (define_mode_attr atomic_op_str - [(QI "rn") (HI "rn") (SI "rn") (DI "r")]) - -+(define_code_attr thumb1_atomic_op_str -+ [(ior "l,l") (xor "l,l") (and "l,l") (plus "lIJL,r") (minus "lPd,lPd")]) -+ -+(define_code_attr thumb1_atomic_newop_str -+ [(ior "&l,&l") (xor "&l,&l") (and "&l,&l") (plus "&l,&r") (minus "&l,&l")]) -+ -+;; Constraints of this pattern must be at least as strict as those of the non -+;; atomic operations in thumb1.md and aim to be as permissive. - (define_insn_and_split "atomic_<sync_optab><mode>" -- [(set (match_operand:QHSD 0 "mem_noofs_operand" "+Ua") -+ [(set (match_operand:QHSD 0 "mem_noofs_operand" "+Ua,Ua,Ua") - (unspec_volatile:QHSD - [(syncop:QHSD (match_dup 0) -- (match_operand:QHSD 1 "<atomic_op_operand>" "<atomic_op_str>")) -+ (match_operand:QHSD 1 "<atomic_op_operand>" "<atomic_op_str>,<thumb1_atomic_op_str>")) - (match_operand:SI 2 "const_int_operand")] ;; model - VUNSPEC_ATOMIC_OP)) - (clobber (reg:CC CC_REGNUM)) -- (clobber (match_scratch:QHSD 3 "=&r")) -- (clobber (match_scratch:SI 4 "=&r"))] -+ (clobber (match_scratch:QHSD 3 "=&r,<thumb1_atomic_newop_str>")) -+ (clobber (match_scratch:SI 4 "=&r,&l,&l"))] - "<sync_predtab>" - "#" - "&& reload_completed" -@@ -266,19 +306,22 @@ - arm_split_atomic_op (<CODE>, NULL, operands[3], operands[0], - operands[1], operands[2], operands[4]); - DONE; -- }) -+ } -+ [(set_attr "arch" "32,v8mb,v8mb")]) - -+;; Constraints of this pattern must be at least as strict as those of the non -+;; atomic NANDs in thumb1.md and aim to be as permissive. - (define_insn_and_split "atomic_nand<mode>" -- [(set (match_operand:QHSD 0 "mem_noofs_operand" "+Ua") -+ [(set (match_operand:QHSD 0 "mem_noofs_operand" "+Ua,Ua") - (unspec_volatile:QHSD - [(not:QHSD - (and:QHSD (match_dup 0) -- (match_operand:QHSD 1 "<atomic_op_operand>" "<atomic_op_str>"))) -+ (match_operand:QHSD 1 "<atomic_op_operand>" "<atomic_op_str>,l"))) - (match_operand:SI 2 "const_int_operand")] ;; model - VUNSPEC_ATOMIC_OP)) - (clobber (reg:CC CC_REGNUM)) -- (clobber (match_scratch:QHSD 3 "=&r")) -- (clobber (match_scratch:SI 4 "=&r"))] -+ (clobber (match_scratch:QHSD 3 "=&r,&l")) -+ (clobber (match_scratch:SI 4 "=&r,&l"))] - "<sync_predtab>" - "#" - "&& reload_completed" -@@ -287,20 +330,38 @@ - arm_split_atomic_op (NOT, NULL, operands[3], operands[0], - operands[1], operands[2], operands[4]); - DONE; -- }) -+ } -+ [(set_attr "arch" "32,v8mb")]) -+ -+;; 3 alternatives are needed to represent constraints after split from -+;; thumb1_addsi3: (i) case where operand1 and destination can be in different -+;; registers, (ii) case where they are in the same low register and (iii) case -+;; when they are in the same register without restriction on the register. We -+;; disparage slightly alternatives that require copying the old value into the -+;; register for the new value (see bind_old_new in arm_split_atomic_op). -+(define_code_attr thumb1_atomic_fetch_op_str -+ [(ior "l,l,l") (xor "l,l,l") (and "l,l,l") (plus "lL,?IJ,?r") (minus "lPd,lPd,lPd")]) -+ -+(define_code_attr thumb1_atomic_fetch_newop_str -+ [(ior "&l,&l,&l") (xor "&l,&l,&l") (and "&l,&l,&l") (plus "&l,&l,&r") (minus "&l,&l,&l")]) - -+(define_code_attr thumb1_atomic_fetch_oldop_str -+ [(ior "&r,&r,&r") (xor "&r,&r,&r") (and "&r,&r,&r") (plus "&l,&r,&r") (minus "&l,&l,&l")]) -+ -+;; Constraints of this pattern must be at least as strict as those of the non -+;; atomic operations in thumb1.md and aim to be as permissive. - (define_insn_and_split "atomic_fetch_<sync_optab><mode>" -- [(set (match_operand:QHSD 0 "s_register_operand" "=&r") -- (match_operand:QHSD 1 "mem_noofs_operand" "+Ua")) -+ [(set (match_operand:QHSD 0 "s_register_operand" "=&r,<thumb1_atomic_fetch_oldop_str>") -+ (match_operand:QHSD 1 "mem_noofs_operand" "+Ua,Ua,Ua,Ua")) - (set (match_dup 1) - (unspec_volatile:QHSD - [(syncop:QHSD (match_dup 1) -- (match_operand:QHSD 2 "<atomic_op_operand>" "<atomic_op_str>")) -+ (match_operand:QHSD 2 "<atomic_op_operand>" "<atomic_op_str>,<thumb1_atomic_fetch_op_str>")) - (match_operand:SI 3 "const_int_operand")] ;; model - VUNSPEC_ATOMIC_OP)) - (clobber (reg:CC CC_REGNUM)) -- (clobber (match_scratch:QHSD 4 "=&r")) -- (clobber (match_scratch:SI 5 "=&r"))] -+ (clobber (match_scratch:QHSD 4 "=&r,<thumb1_atomic_fetch_newop_str>")) -+ (clobber (match_scratch:SI 5 "=&r,&l,&l,&l"))] - "<sync_predtab>" - "#" - "&& reload_completed" -@@ -309,21 +370,24 @@ - arm_split_atomic_op (<CODE>, operands[0], operands[4], operands[1], - operands[2], operands[3], operands[5]); - DONE; -- }) -+ } -+ [(set_attr "arch" "32,v8mb,v8mb,v8mb")]) - -+;; Constraints of this pattern must be at least as strict as those of the non -+;; atomic NANDs in thumb1.md and aim to be as permissive. - (define_insn_and_split "atomic_fetch_nand<mode>" -- [(set (match_operand:QHSD 0 "s_register_operand" "=&r") -- (match_operand:QHSD 1 "mem_noofs_operand" "+Ua")) -+ [(set (match_operand:QHSD 0 "s_register_operand" "=&r,&r") -+ (match_operand:QHSD 1 "mem_noofs_operand" "+Ua,Ua")) - (set (match_dup 1) - (unspec_volatile:QHSD - [(not:QHSD - (and:QHSD (match_dup 1) -- (match_operand:QHSD 2 "<atomic_op_operand>" "<atomic_op_str>"))) -+ (match_operand:QHSD 2 "<atomic_op_operand>" "<atomic_op_str>,l"))) - (match_operand:SI 3 "const_int_operand")] ;; model - VUNSPEC_ATOMIC_OP)) - (clobber (reg:CC CC_REGNUM)) -- (clobber (match_scratch:QHSD 4 "=&r")) -- (clobber (match_scratch:SI 5 "=&r"))] -+ (clobber (match_scratch:QHSD 4 "=&r,&l")) -+ (clobber (match_scratch:SI 5 "=&r,&l"))] - "<sync_predtab>" - "#" - "&& reload_completed" -@@ -332,20 +396,23 @@ - arm_split_atomic_op (NOT, operands[0], operands[4], operands[1], - operands[2], operands[3], operands[5]); - DONE; -- }) -+ } -+ [(set_attr "arch" "32,v8mb")]) - -+;; Constraints of this pattern must be at least as strict as those of the non -+;; atomic operations in thumb1.md and aim to be as permissive. - (define_insn_and_split "atomic_<sync_optab>_fetch<mode>" -- [(set (match_operand:QHSD 0 "s_register_operand" "=&r") -+ [(set (match_operand:QHSD 0 "s_register_operand" "=&r,<thumb1_atomic_newop_str>") - (syncop:QHSD -- (match_operand:QHSD 1 "mem_noofs_operand" "+Ua") -- (match_operand:QHSD 2 "<atomic_op_operand>" "<atomic_op_str>"))) -+ (match_operand:QHSD 1 "mem_noofs_operand" "+Ua,Ua,Ua") -+ (match_operand:QHSD 2 "<atomic_op_operand>" "<atomic_op_str>,<thumb1_atomic_op_str>"))) - (set (match_dup 1) - (unspec_volatile:QHSD - [(match_dup 1) (match_dup 2) - (match_operand:SI 3 "const_int_operand")] ;; model - VUNSPEC_ATOMIC_OP)) - (clobber (reg:CC CC_REGNUM)) -- (clobber (match_scratch:SI 4 "=&r"))] -+ (clobber (match_scratch:SI 4 "=&r,&l,&l"))] - "<sync_predtab>" - "#" - "&& reload_completed" -@@ -354,21 +421,24 @@ - arm_split_atomic_op (<CODE>, NULL, operands[0], operands[1], - operands[2], operands[3], operands[4]); - DONE; -- }) -+ } -+ [(set_attr "arch" "32,v8mb,v8mb")]) - -+;; Constraints of this pattern must be at least as strict as those of the non -+;; atomic NANDs in thumb1.md and aim to be as permissive. - (define_insn_and_split "atomic_nand_fetch<mode>" -- [(set (match_operand:QHSD 0 "s_register_operand" "=&r") -+ [(set (match_operand:QHSD 0 "s_register_operand" "=&r,&l") - (not:QHSD - (and:QHSD -- (match_operand:QHSD 1 "mem_noofs_operand" "+Ua") -- (match_operand:QHSD 2 "<atomic_op_operand>" "<atomic_op_str>")))) -+ (match_operand:QHSD 1 "mem_noofs_operand" "+Ua,Ua") -+ (match_operand:QHSD 2 "<atomic_op_operand>" "<atomic_op_str>,l")))) - (set (match_dup 1) - (unspec_volatile:QHSD - [(match_dup 1) (match_dup 2) - (match_operand:SI 3 "const_int_operand")] ;; model - VUNSPEC_ATOMIC_OP)) - (clobber (reg:CC CC_REGNUM)) -- (clobber (match_scratch:SI 4 "=&r"))] -+ (clobber (match_scratch:SI 4 "=&r,&l"))] - "<sync_predtab>" - "#" - "&& reload_completed" -@@ -377,48 +447,61 @@ - arm_split_atomic_op (NOT, NULL, operands[0], operands[1], - operands[2], operands[3], operands[4]); - DONE; -- }) -+ } -+ [(set_attr "arch" "32,v8mb")]) - - (define_insn "arm_load_exclusive<mode>" -- [(set (match_operand:SI 0 "s_register_operand" "=r") -+ [(set (match_operand:SI 0 "s_register_operand" "=r,r") - (zero_extend:SI - (unspec_volatile:NARROW -- [(match_operand:NARROW 1 "mem_noofs_operand" "Ua")] -+ [(match_operand:NARROW 1 "mem_noofs_operand" "Ua,Ua")] - VUNSPEC_LL)))] - "TARGET_HAVE_LDREXBH" -- "ldrex<sync_sfx>%?\t%0, %C1" -- [(set_attr "predicable" "yes") -+ "@ -+ ldrex<sync_sfx>%?\t%0, %C1 -+ ldrex<sync_sfx>\t%0, %C1" -+ [(set_attr "arch" "32,v8mb") -+ (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")]) - - (define_insn "arm_load_acquire_exclusive<mode>" -- [(set (match_operand:SI 0 "s_register_operand" "=r") -+ [(set (match_operand:SI 0 "s_register_operand" "=r,r") - (zero_extend:SI - (unspec_volatile:NARROW -- [(match_operand:NARROW 1 "mem_noofs_operand" "Ua")] -+ [(match_operand:NARROW 1 "mem_noofs_operand" "Ua,Ua")] - VUNSPEC_LAX)))] - "TARGET_HAVE_LDACQ" -- "ldaex<sync_sfx>%?\\t%0, %C1" -- [(set_attr "predicable" "yes") -+ "@ -+ ldaex<sync_sfx>%?\\t%0, %C1 -+ ldaex<sync_sfx>\\t%0, %C1" -+ [(set_attr "arch" "32,v8mb") -+ (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")]) - - (define_insn "arm_load_exclusivesi" -- [(set (match_operand:SI 0 "s_register_operand" "=r") -+ [(set (match_operand:SI 0 "s_register_operand" "=r,r") - (unspec_volatile:SI -- [(match_operand:SI 1 "mem_noofs_operand" "Ua")] -+ [(match_operand:SI 1 "mem_noofs_operand" "Ua,Ua")] - VUNSPEC_LL))] - "TARGET_HAVE_LDREX" -- "ldrex%?\t%0, %C1" -- [(set_attr "predicable" "yes") -+ "@ -+ ldrex%?\t%0, %C1 -+ ldrex\t%0, %C1" -+ [(set_attr "arch" "32,v8mb") -+ (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")]) - - (define_insn "arm_load_acquire_exclusivesi" -- [(set (match_operand:SI 0 "s_register_operand" "=r") -+ [(set (match_operand:SI 0 "s_register_operand" "=r,r") - (unspec_volatile:SI -- [(match_operand:SI 1 "mem_noofs_operand" "Ua")] -+ [(match_operand:SI 1 "mem_noofs_operand" "Ua,Ua")] - VUNSPEC_LAX))] - "TARGET_HAVE_LDACQ" -- "ldaex%?\t%0, %C1" -- [(set_attr "predicable" "yes") -+ "@ -+ ldaex%?\t%0, %C1 -+ ldaex\t%0, %C1" -+ [(set_attr "arch" "32,v8mb") -+ (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")]) - - (define_insn "arm_load_exclusivedi" -@@ -436,7 +519,7 @@ - (unspec_volatile:DI - [(match_operand:DI 1 "mem_noofs_operand" "Ua")] - VUNSPEC_LAX))] -- "TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN" -+ "TARGET_HAVE_LDACQEXD && ARM_DOUBLEWORD_ALIGN" - "ldaexd%?\t%0, %H0, %C1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")]) -@@ -452,16 +535,18 @@ - { - if (<MODE>mode == DImode) - { -- rtx value = operands[2]; - /* The restrictions on target registers in ARM mode are that the two - registers are consecutive and the first one is even; Thumb is - actually more flexible, but DI should give us this anyway. -- Note that the 1st register always gets the lowest word in memory. */ -- gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2); -- operands[3] = gen_rtx_REG (SImode, REGNO (value) + 1); -- return "strexd%?\t%0, %2, %3, %C1"; -+ Note that the 1st register always gets the -+ lowest word in memory. */ -+ gcc_assert ((REGNO (operands[2]) & 1) == 0 || TARGET_THUMB2); -+ return "strexd%?\t%0, %2, %H2, %C1"; - } -- return "strex<sync_sfx>%?\t%0, %2, %C1"; -+ if (TARGET_THUMB1) -+ return "strex<sync_sfx>\t%0, %2, %C1"; -+ else -+ return "strex<sync_sfx>%?\t%0, %2, %C1"; - } - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")]) -@@ -473,25 +558,26 @@ - (unspec_volatile:DI - [(match_operand:DI 2 "s_register_operand" "r")] - VUNSPEC_SLX))] -- "TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN" -+ "TARGET_HAVE_LDACQEXD && ARM_DOUBLEWORD_ALIGN" - { -- rtx value = operands[2]; - /* See comment in arm_store_exclusive<mode> above. */ -- gcc_assert ((REGNO (value) & 1) == 0 || TARGET_THUMB2); -- operands[3] = gen_rtx_REG (SImode, REGNO (value) + 1); -- return "stlexd%?\t%0, %2, %3, %C1"; -+ gcc_assert ((REGNO (operands[2]) & 1) == 0 || TARGET_THUMB2); -+ return "stlexd%?\t%0, %2, %H2, %C1"; - } - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")]) - - (define_insn "arm_store_release_exclusive<mode>" -- [(set (match_operand:SI 0 "s_register_operand" "=&r") -+ [(set (match_operand:SI 0 "s_register_operand" "=&r,&r") - (unspec_volatile:SI [(const_int 0)] VUNSPEC_SLX)) -- (set (match_operand:QHSI 1 "mem_noofs_operand" "=Ua") -+ (set (match_operand:QHSI 1 "mem_noofs_operand" "=Ua,Ua") - (unspec_volatile:QHSI -- [(match_operand:QHSI 2 "s_register_operand" "r")] -+ [(match_operand:QHSI 2 "s_register_operand" "r,r")] - VUNSPEC_SLX))] - "TARGET_HAVE_LDACQ" -- "stlex<sync_sfx>%?\t%0, %2, %C1" -- [(set_attr "predicable" "yes") -+ "@ -+ stlex<sync_sfx>%?\t%0, %2, %C1 -+ stlex<sync_sfx>\t%0, %2, %C1" -+ [(set_attr "arch" "32,v8mb") -+ (set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no")]) ---- a/src/gcc/config/arm/t-aprofile -+++ b/src/gcc/config/arm/t-aprofile -@@ -49,38 +49,33 @@ MULTILIB_DIRNAMES += fpv3 simdv1 fpv4 simdvfpv4 simdv8 - MULTILIB_OPTIONS += mfloat-abi=softfp/mfloat-abi=hard - MULTILIB_DIRNAMES += softfp hard - --# We don't build no-float libraries with an FPU. --MULTILIB_EXCEPTIONS += *mfpu=vfpv3-d16 --MULTILIB_EXCEPTIONS += *mfpu=neon --MULTILIB_EXCEPTIONS += *mfpu=vfpv4-d16 --MULTILIB_EXCEPTIONS += *mfpu=neon-vfpv4 --MULTILIB_EXCEPTIONS += *mfpu=neon-fp-armv8 -- --# We don't build libraries requiring an FPU at the CPU/Arch/ISA level. --MULTILIB_EXCEPTIONS += mfloat-abi=* --MULTILIB_EXCEPTIONS += mfpu=* --MULTILIB_EXCEPTIONS += mthumb/mfloat-abi=* --MULTILIB_EXCEPTIONS += mthumb/mfpu=* --MULTILIB_EXCEPTIONS += *march=armv7-a/mfloat-abi=* --MULTILIB_EXCEPTIONS += *march=armv7ve/mfloat-abi=* --MULTILIB_EXCEPTIONS += *march=armv8-a/mfloat-abi=* -- --# Ensure the correct FPU variants apply to the correct base architectures. --MULTILIB_EXCEPTIONS += *march=armv7ve/*mfpu=vfpv3-d16* --MULTILIB_EXCEPTIONS += *march=armv7ve/*mfpu=neon/* --MULTILIB_EXCEPTIONS += *march=armv8-a/*mfpu=vfpv3-d16* --MULTILIB_EXCEPTIONS += *march=armv8-a/*mfpu=neon/* --MULTILIB_EXCEPTIONS += *march=armv7-a/*mfpu=vfpv4-d16* --MULTILIB_EXCEPTIONS += *march=armv7-a/*mfpu=neon-vfpv4* --MULTILIB_EXCEPTIONS += *march=armv8-a/*mfpu=vfpv4-d16* --MULTILIB_EXCEPTIONS += *march=armv8-a/*mfpu=neon-vfpv4* --MULTILIB_EXCEPTIONS += *march=armv7-a/*mfpu=neon-fp-armv8* --MULTILIB_EXCEPTIONS += *march=armv7ve/*mfpu=neon-fp-armv8* -+ -+# Option combinations to build library with -+ -+# Default CPU/Arch (ARM is implicitly included because it uses the default -+# multilib) -+MULTILIB_REQUIRED += mthumb -+ -+# ARMv7-A -+MULTILIB_REQUIRED += *march=armv7-a -+MULTILIB_REQUIRED += *march=armv7-a/mfpu=vfpv3-d16/mfloat-abi=* -+MULTILIB_REQUIRED += *march=armv7-a/mfpu=neon/mfloat-abi=* -+ -+# ARMv7VE -+MULTILIB_REQUIRED += *march=armv7ve -+MULTILIB_REQUIRED += *march=armv7ve/mfpu=vfpv4-d16/mfloat-abi=* -+MULTILIB_REQUIRED += *march=armv7ve/mfpu=neon-vfpv4/mfloat-abi=* -+ -+# ARMv8-A -+MULTILIB_REQUIRED += *march=armv8-a -+MULTILIB_REQUIRED += *march=armv8-a/mfpu=neon-fp-armv8/mfloat-abi=* -+ - - # CPU Matches - MULTILIB_MATCHES += march?armv7-a=mcpu?cortex-a8 - MULTILIB_MATCHES += march?armv7-a=mcpu?cortex-a9 - MULTILIB_MATCHES += march?armv7-a=mcpu?cortex-a5 -+MULTILIB_MATCHES += march?armv7ve=mcpu?cortex-a7 - MULTILIB_MATCHES += march?armv7ve=mcpu?cortex-a15 - MULTILIB_MATCHES += march?armv7ve=mcpu?cortex-a12 - MULTILIB_MATCHES += march?armv7ve=mcpu?cortex-a17 -@@ -93,6 +88,9 @@ MULTILIB_MATCHES += march?armv8-a=mcpu?cortex-a57 - MULTILIB_MATCHES += march?armv8-a=mcpu?cortex-a57.cortex-a53 - MULTILIB_MATCHES += march?armv8-a=mcpu?cortex-a72 - MULTILIB_MATCHES += march?armv8-a=mcpu?cortex-a72.cortex-a53 -+MULTILIB_MATCHES += march?armv8-a=mcpu?cortex-a73 -+MULTILIB_MATCHES += march?armv8-a=mcpu?cortex-a73.cortex-a35 -+MULTILIB_MATCHES += march?armv8-a=mcpu?cortex-a73.cortex-a53 - MULTILIB_MATCHES += march?armv8-a=mcpu?exynos-m1 - MULTILIB_MATCHES += march?armv8-a=mcpu?qdf24xx - MULTILIB_MATCHES += march?armv8-a=mcpu?xgene1 -@@ -101,13 +99,20 @@ MULTILIB_MATCHES += march?armv8-a=mcpu?xgene1 - MULTILIB_MATCHES += march?armv8-a=march?armv8-a+crc - MULTILIB_MATCHES += march?armv8-a=march?armv8.1-a - MULTILIB_MATCHES += march?armv8-a=march?armv8.1-a+crc -+MULTILIB_MATCHES += march?armv8-a=march?armv8.2-a -+MULTILIB_MATCHES += march?armv8-a=march?armv8.2-a+fp16 - - # FPU matches - MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv3 - MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv3-fp16 --MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv3-fp16-d16 -+MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv3-d16-fp16 -+MULTILIB_MATCHES += mfpu?neon=mfpu?neon-fp16 - MULTILIB_MATCHES += mfpu?vfpv4-d16=mfpu?vfpv4 -+MULTILIB_MATCHES += mfpu?vfpv4-d16=mfpu?fpv5-d16 -+MULTILIB_MATCHES += mfpu?vfpv4-d16=mfpu?fp-armv8 - MULTILIB_MATCHES += mfpu?neon-fp-armv8=mfpu?crypto-neon-fp-armv8 -+MULTILIB_MATCHES += mfpu?vfp=mfpu?vfpv2 -+MULTILIB_MATCHES += mfpu?neon=mfpu?neon-vfpv3 - - - # Map all requests for vfpv3 with a later CPU to vfpv3-d16 v7-a. -@@ -124,10 +129,6 @@ MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=march.armv8 - MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=march.armv8-a/mfpu.vfpv3-d16/mfloat-abi.softfp - MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=march.armv7-a/mfpu.vfpv4-d16/mfloat-abi.hard - MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=march.armv7-a/mfpu.vfpv4-d16/mfloat-abi.softfp --MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=march.armv7-a/mfpu.fp-armv8/mfloat-abi.hard --MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=march.armv7-a/mfpu.fp-armv8/mfloat-abi.softfp --MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=march.armv7-a/mfpu.vfpv4/mfloat-abi.hard --MULTILIB_REUSE += march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=march.armv7-a/mfpu.vfpv4/mfloat-abi.softfp - - - MULTILIB_REUSE += march.armv7-a/mfpu.neon/mfloat-abi.hard=march.armv7ve/mfpu.neon/mfloat-abi.hard -@@ -140,10 +141,6 @@ MULTILIB_REUSE += march.armv7-a/mfpu.neon/mfloat-abi.hard=march.armv7-a/mf - MULTILIB_REUSE += march.armv7-a/mfpu.neon/mfloat-abi.softfp=march.armv7-a/mfpu.neon-fp-armv8/mfloat-abi.softfp - - --MULTILIB_REUSE += march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.hard=march.armv7ve/mfpu.fp-armv8/mfloat-abi.hard --MULTILIB_REUSE += march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.softfp=march.armv7ve/mfpu.fp-armv8/mfloat-abi.softfp --MULTILIB_REUSE += march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.hard=march.armv8-a/mfpu.vfpv4/mfloat-abi.hard --MULTILIB_REUSE += march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.softfp=march.armv8-a/mfpu.vfpv4/mfloat-abi.softfp - MULTILIB_REUSE += march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.hard=march.armv8-a/mfpu.vfpv4-d16/mfloat-abi.hard - MULTILIB_REUSE += march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.softfp=march.armv8-a/mfpu.vfpv4-d16/mfloat-abi.softfp - -@@ -163,10 +160,6 @@ MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=mthu - MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=mthumb/march.armv8-a/mfpu.vfpv3-d16/mfloat-abi.softfp - MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=mthumb/march.armv7-a/mfpu.vfpv4-d16/mfloat-abi.hard - MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=mthumb/march.armv7-a/mfpu.vfpv4-d16/mfloat-abi.softfp --MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=mthumb/march.armv7-a/mfpu.fp-armv8/mfloat-abi.hard --MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=mthumb/march.armv7-a/mfpu.fp-armv8/mfloat-abi.softfp --MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.hard=mthumb/march.armv7-a/mfpu.vfpv4/mfloat-abi.hard --MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.vfpv3-d16/mfloat-abi.softfp=mthumb/march.armv7-a/mfpu.vfpv4/mfloat-abi.softfp - - - MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.hard=mthumb/march.armv7ve/mfpu.neon/mfloat-abi.hard -@@ -179,10 +172,6 @@ MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.hard=mthumb/ma - MULTILIB_REUSE += mthumb/march.armv7-a/mfpu.neon/mfloat-abi.softfp=mthumb/march.armv7-a/mfpu.neon-fp-armv8/mfloat-abi.softfp - - --MULTILIB_REUSE += mthumb/march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.hard=mthumb/march.armv7ve/mfpu.fp-armv8/mfloat-abi.hard --MULTILIB_REUSE += mthumb/march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.softfp=mthumb/march.armv7ve/mfpu.fp-armv8/mfloat-abi.softfp --MULTILIB_REUSE += mthumb/march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.hard=mthumb/march.armv8-a/mfpu.vfpv4/mfloat-abi.hard --MULTILIB_REUSE += mthumb/march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.softfp=mthumb/march.armv8-a/mfpu.vfpv4/mfloat-abi.softfp - MULTILIB_REUSE += mthumb/march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.hard=mthumb/march.armv8-a/mfpu.vfpv4-d16/mfloat-abi.hard - MULTILIB_REUSE += mthumb/march.armv7ve/mfpu.vfpv4-d16/mfloat-abi.softfp=mthumb/march.armv8-a/mfpu.vfpv4-d16/mfloat-abi.softfp - ---- a/src/gcc/config/arm/t-arm -+++ b/src/gcc/config/arm/t-arm -@@ -95,7 +95,8 @@ arm.o: $(srcdir)/config/arm/arm.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ - $(srcdir)/config/arm/arm-cores.def \ - $(srcdir)/config/arm/arm-arches.def $(srcdir)/config/arm/arm-fpus.def \ - $(srcdir)/config/arm/arm-protos.h \ -- $(srcdir)/config/arm/arm_neon_builtins.def -+ $(srcdir)/config/arm/arm_neon_builtins.def \ -+ $(srcdir)/config/arm/arm_vfp_builtins.def - - arm-builtins.o: $(srcdir)/config/arm/arm-builtins.c $(CONFIG_H) \ - $(SYSTEM_H) coretypes.h $(TM_H) \ -@@ -103,6 +104,7 @@ arm-builtins.o: $(srcdir)/config/arm/arm-builtins.c $(CONFIG_H) \ - $(DIAGNOSTIC_CORE_H) $(OPTABS_H) \ - $(srcdir)/config/arm/arm-protos.h \ - $(srcdir)/config/arm/arm_neon_builtins.def \ -+ $(srcdir)/config/arm/arm_vfp_builtins.def \ - $(srcdir)/config/arm/arm-simd-builtin-types.def - $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ - $(srcdir)/config/arm/arm-builtins.c ---- /dev/null -+++ b/src/gcc/config/arm/t-rmprofile -@@ -0,0 +1,176 @@ -+# Copyright (C) 2016 Free Software Foundation, Inc. -+# -+# This file is part of GCC. -+# -+# GCC is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 3, or (at your option) -+# any later version. -+# -+# GCC is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with GCC; see the file COPYING3. If not see -+# <http://www.gnu.org/licenses/>. -+ -+# This is a target makefile fragment that attempts to get -+# multilibs built for the range of CPU's, FPU's and ABI's that -+# are relevant for the ARM architecture. It should not be used in -+# conjunction with another make file fragment and assumes --with-arch, -+# --with-cpu, --with-fpu, --with-float, --with-mode have their default -+# values during the configure step. We enforce this during the -+# top-level configury. -+ -+MULTILIB_OPTIONS = -+MULTILIB_DIRNAMES = -+MULTILIB_EXCEPTIONS = -+MULTILIB_MATCHES = -+MULTILIB_REUSE = -+ -+# We have the following hierachy: -+# ISA: A32 (.) or T16/T32 (thumb). -+# Architecture: ARMv6S-M (v6-m), ARMv7-M (v7-m), ARMv7E-M (v7e-m), -+# ARMv8-M Baseline (v8-m.base) or ARMv8-M Mainline (v8-m.main). -+# FPU: VFPv3-D16 (fpv3), FPV4-SP-D16 (fpv4-sp), FPV5-SP-D16 (fpv5-sp), -+# VFPv5-D16 (fpv5), or None (.). -+# Float-abi: Soft (.), softfp (softfp), or hard (hardfp). -+ -+# Options to build libraries with -+ -+MULTILIB_OPTIONS += mthumb -+MULTILIB_DIRNAMES += thumb -+ -+MULTILIB_OPTIONS += march=armv6s-m/march=armv7-m/march=armv7e-m/march=armv7/march=armv8-m.base/march=armv8-m.main -+MULTILIB_DIRNAMES += v6-m v7-m v7e-m v7-ar v8-m.base v8-m.main -+ -+MULTILIB_OPTIONS += mfpu=vfpv3-d16/mfpu=fpv4-sp-d16/mfpu=fpv5-sp-d16/mfpu=fpv5-d16 -+MULTILIB_DIRNAMES += fpv3 fpv4-sp fpv5-sp fpv5 -+ -+MULTILIB_OPTIONS += mfloat-abi=softfp/mfloat-abi=hard -+MULTILIB_DIRNAMES += softfp hard -+ -+ -+# Option combinations to build library with -+ -+# Default CPU/Arch -+MULTILIB_REQUIRED += mthumb -+MULTILIB_REQUIRED += mfloat-abi=hard -+ -+# ARMv6-M -+MULTILIB_REQUIRED += mthumb/march=armv6s-m -+ -+# ARMv8-M Baseline -+MULTILIB_REQUIRED += mthumb/march=armv8-m.base -+ -+# ARMv7-M -+MULTILIB_REQUIRED += mthumb/march=armv7-m -+ -+# ARMv7E-M -+MULTILIB_REQUIRED += mthumb/march=armv7e-m -+MULTILIB_REQUIRED += mthumb/march=armv7e-m/mfpu=fpv4-sp-d16/mfloat-abi=softfp -+MULTILIB_REQUIRED += mthumb/march=armv7e-m/mfpu=fpv4-sp-d16/mfloat-abi=hard -+MULTILIB_REQUIRED += mthumb/march=armv7e-m/mfpu=fpv5-d16/mfloat-abi=softfp -+MULTILIB_REQUIRED += mthumb/march=armv7e-m/mfpu=fpv5-d16/mfloat-abi=hard -+MULTILIB_REQUIRED += mthumb/march=armv7e-m/mfpu=fpv5-sp-d16/mfloat-abi=softfp -+MULTILIB_REQUIRED += mthumb/march=armv7e-m/mfpu=fpv5-sp-d16/mfloat-abi=hard -+ -+# ARMv8-M Mainline -+MULTILIB_REQUIRED += mthumb/march=armv8-m.main -+MULTILIB_REQUIRED += mthumb/march=armv8-m.main/mfpu=fpv5-d16/mfloat-abi=softfp -+MULTILIB_REQUIRED += mthumb/march=armv8-m.main/mfpu=fpv5-d16/mfloat-abi=hard -+MULTILIB_REQUIRED += mthumb/march=armv8-m.main/mfpu=fpv5-sp-d16/mfloat-abi=softfp -+MULTILIB_REQUIRED += mthumb/march=armv8-m.main/mfpu=fpv5-sp-d16/mfloat-abi=hard -+ -+# ARMv7-R as well as ARMv7-A and ARMv8-A if aprofile was not specified -+MULTILIB_REQUIRED += mthumb/march=armv7 -+MULTILIB_REQUIRED += mthumb/march=armv7/mfpu=vfpv3-d16/mfloat-abi=softfp -+MULTILIB_REQUIRED += mthumb/march=armv7/mfpu=vfpv3-d16/mfloat-abi=hard -+ -+ -+# Matches -+ -+# CPU Matches -+MULTILIB_MATCHES += march?armv6s-m=mcpu?cortex-m0 -+MULTILIB_MATCHES += march?armv6s-m=mcpu?cortex-m0.small-multiply -+MULTILIB_MATCHES += march?armv6s-m=mcpu?cortex-m0plus -+MULTILIB_MATCHES += march?armv6s-m=mcpu?cortex-m0plus.small-multiply -+MULTILIB_MATCHES += march?armv6s-m=mcpu?cortex-m1 -+MULTILIB_MATCHES += march?armv6s-m=mcpu?cortex-m1.small-multiply -+MULTILIB_MATCHES += march?armv7-m=mcpu?cortex-m3 -+MULTILIB_MATCHES += march?armv7e-m=mcpu?cortex-m4 -+MULTILIB_MATCHES += march?armv7e-m=mcpu?cortex-m7 -+MULTILIB_MATCHES += march?armv8-m.base=mcpu?cortex-m23 -+MULTILIB_MATCHES += march?armv8-m.main=mcpu?cortex-m33 -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-r4 -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-r4f -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-r5 -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-r7 -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-r8 -+MULTILIB_MATCHES += march?armv7=mcpu?marvell-pj4 -+MULTILIB_MATCHES += march?armv7=mcpu?generic-armv7-a -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-a8 -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-a9 -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-a5 -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-a7 -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-a15 -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-a12 -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-a17 -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-a15.cortex-a7 -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-a17.cortex-a7 -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-a32 -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-a35 -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-a53 -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-a57 -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-a57.cortex-a53 -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-a72 -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-a72.cortex-a53 -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-a73 -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-a73.cortex-a35 -+MULTILIB_MATCHES += march?armv7=mcpu?cortex-a73.cortex-a53 -+MULTILIB_MATCHES += march?armv7=mcpu?exynos-m1 -+MULTILIB_MATCHES += march?armv7=mcpu?qdf24xx -+MULTILIB_MATCHES += march?armv7=mcpu?xgene1 -+ -+# Arch Matches -+MULTILIB_MATCHES += march?armv6s-m=march?armv6-m -+MULTILIB_MATCHES += march?armv8-m.main=march?armv8-m.main+dsp -+MULTILIB_MATCHES += march?armv7=march?armv7-r -+ifeq (,$(HAS_APROFILE)) -+MULTILIB_MATCHES += march?armv7=march?armv7-a -+MULTILIB_MATCHES += march?armv7=march?armv7ve -+MULTILIB_MATCHES += march?armv7=march?armv8-a -+MULTILIB_MATCHES += march?armv7=march?armv8-a+crc -+MULTILIB_MATCHES += march?armv7=march?armv8.1-a -+MULTILIB_MATCHES += march?armv7=march?armv8.1-a+crc -+MULTILIB_MATCHES += march?armv7=march?armv8.2-a -+MULTILIB_MATCHES += march?armv7=march?armv8.2-a+fp16 -+endif -+ -+# FPU matches -+ifeq (,$(HAS_APROFILE)) -+MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv3 -+MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv3-fp16 -+MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv3-d16-fp16 -+MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?neon -+MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?neon-fp16 -+MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv4 -+MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?vfpv4-d16 -+MULTILIB_MATCHES += mfpu?vfpv3-d16=mfpu?neon-vfpv4 -+MULTILIB_MATCHES += mfpu?fpv5-d16=mfpu?fp-armv8 -+MULTILIB_MATCHES += mfpu?fpv5-d16=mfpu?neon-fp-armv8 -+MULTILIB_MATCHES += mfpu?fpv5-d16=mfpu?crypto-neon-fp-armv8 -+endif -+ -+ -+# We map all requests for ARMv7-R or ARMv7-A in ARM mode to Thumb mode and -+# any FPU to VFPv3-d16 if possible. -+MULTILIB_REUSE += mthumb/march.armv7=march.armv7 -+MULTILIB_REUSE += mthumb/march.armv7/mfpu.vfpv3-d16/mfloat-abi.softfp=march.armv7/mfpu.vfpv3-d16/mfloat-abi.softfp -+MULTILIB_REUSE += mthumb/march.armv7/mfpu.vfpv3-d16/mfloat-abi.hard=march.armv7/mfpu.vfpv3-d16/mfloat-abi.hard -+MULTILIB_REUSE += mthumb/march.armv7/mfpu.vfpv3-d16/mfloat-abi.softfp=march.armv7/mfpu.fpv5-d16/mfloat-abi.softfp -+MULTILIB_REUSE += mthumb/march.armv7/mfpu.vfpv3-d16/mfloat-abi.hard=march.armv7/mfpu.fpv5-d16/mfloat-abi.hard -+MULTILIB_REUSE += mthumb/march.armv7/mfpu.vfpv3-d16/mfloat-abi.softfp=mthumb/march.armv7/mfpu.fpv5-d16/mfloat-abi.softfp -+MULTILIB_REUSE += mthumb/march.armv7/mfpu.vfpv3-d16/mfloat-abi.hard=mthumb/march.armv7/mfpu.fpv5-d16/mfloat-abi.hard ---- a/src/gcc/config/arm/thumb1.md -+++ b/src/gcc/config/arm/thumb1.md -@@ -55,6 +55,10 @@ - (set_attr "type" "multiple")] - ) - -+;; Changes to the constraints of this pattern must be propagated to those of -+;; atomic additions in sync.md and to the logic for bind_old_new in -+;; arm_split_atomic_op in arm.c. These must be at least as strict as the -+;; constraints here and aim to be as permissive. - (define_insn_and_split "*thumb1_addsi3" - [(set (match_operand:SI 0 "register_operand" "=l,l,l,*rk,*hk,l,k,l,l,l") - (plus:SI (match_operand:SI 1 "register_operand" "%0,0,l,*0,*0,k,k,0,l,k") -@@ -114,8 +118,8 @@ - (set (match_dup 0) - (plus:SI (match_dup 0) (reg:SI SP_REGNUM)))] - "TARGET_THUMB1 -- && (unsigned HOST_WIDE_INT) (INTVAL (operands[1])) < 1024 -- && (INTVAL (operands[1]) & 3) == 0" -+ && UINTVAL (operands[1]) < 1024 -+ && (UINTVAL (operands[1]) & 3) == 0" - [(set (match_dup 0) (plus:SI (reg:SI SP_REGNUM) (match_dup 1)))] - "" - ) -@@ -131,6 +135,10 @@ - (set_attr "type" "multiple")] - ) - -+;; Changes to the constraints of this pattern must be propagated to those of -+;; atomic subtractions in sync.md and to the logic for bind_old_new in -+;; arm_split_atomic_op in arm.c. These must be at least as strict as the -+;; constraints here and aim to be as permissive. - (define_insn "thumb1_subsi3_insn" - [(set (match_operand:SI 0 "register_operand" "=l") - (minus:SI (match_operand:SI 1 "register_operand" "l") -@@ -142,11 +150,11 @@ - (set_attr "type" "alus_sreg")] - ) - --; Unfortunately with the Thumb the '&'/'0' trick can fails when operands --; 1 and 2; are the same, because reload will make operand 0 match --; operand 1 without realizing that this conflicts with operand 2. We fix --; this by adding another alternative to match this case, and then `reload' --; it ourselves. This alternative must come first. -+;; Unfortunately on Thumb the '&'/'0' trick can fail when operands -+;; 1 and 2 are the same, because reload will make operand 0 match -+;; operand 1 without realizing that this conflicts with operand 2. We fix -+;; this by adding another alternative to match this case, and then `reload' -+;; it ourselves. This alternative must come first. - (define_insn "*thumb_mulsi3" - [(set (match_operand:SI 0 "register_operand" "=&l,&l,&l") - (mult:SI (match_operand:SI 1 "register_operand" "%l,*h,0") -@@ -173,6 +181,10 @@ - (set_attr "type" "muls")] - ) - -+;; Changes to the constraints of this pattern must be propagated to those of -+;; atomic bitwise ANDs and NANDs in sync.md and to the logic for bind_old_new -+;; in arm_split_atomic_op in arm.c. These must be at least as strict as the -+;; constraints here and aim to be as permissive. - (define_insn "*thumb1_andsi3_insn" - [(set (match_operand:SI 0 "register_operand" "=l") - (and:SI (match_operand:SI 1 "register_operand" "%0") -@@ -227,6 +239,10 @@ - (set_attr "type" "logics_reg")] - ) - -+;; Changes to the constraints of this pattern must be propagated to those of -+;; atomic inclusive ORs in sync.md and to the logic for bind_old_new in -+;; arm_split_atomic_op in arm.c. These must be at least as strict as the -+;; constraints here and aim to be as permissive. - (define_insn "*thumb1_iorsi3_insn" - [(set (match_operand:SI 0 "register_operand" "=l") - (ior:SI (match_operand:SI 1 "register_operand" "%0") -@@ -237,6 +253,10 @@ - (set_attr "conds" "set") - (set_attr "type" "logics_reg")]) - -+;; Changes to the constraints of this pattern must be propagated to those of -+;; atomic exclusive ORs in sync.md and to the logic for bind_old_new in -+;; arm_split_atomic_op in arm.c. These must be at least as strict as the -+;; constraints here and aim to be as permissive. - (define_insn "*thumb1_xorsi3_insn" - [(set (match_operand:SI 0 "register_operand" "=l") - (xor:SI (match_operand:SI 1 "register_operand" "%0") -@@ -590,8 +610,8 @@ - ;;; ??? The 'i' constraint looks funny, but it should always be replaced by - ;;; thumb_reorg with a memory reference. - (define_insn "*thumb1_movdi_insn" -- [(set (match_operand:DI 0 "nonimmediate_operand" "=l,l,l,l,>,l, m,*r") -- (match_operand:DI 1 "general_operand" "l, I,J,>,l,mi,l,*r"))] -+ [(set (match_operand:DI 0 "nonimmediate_operand" "=l,l,l,r,l,>,l, m,*r") -+ (match_operand:DI 1 "general_operand" "l, I,J,j,>,l,mi,l,*r"))] - "TARGET_THUMB1 - && ( register_operand (operands[0], DImode) - || register_operand (operands[1], DImode))" -@@ -610,36 +630,41 @@ - operands[1] = GEN_INT (- INTVAL (operands[1])); - return \"movs\\t%Q0, %1\;rsbs\\t%Q0, %Q0, #0\;asrs\\t%R0, %Q0, #31\"; - case 3: -- return \"ldmia\\t%1, {%0, %H0}\"; -+ gcc_assert (TARGET_HAVE_MOVT); -+ return \"movw\\t%Q0, %L1\;movs\\tR0, #0\"; - case 4: -- return \"stmia\\t%0, {%1, %H1}\"; -+ return \"ldmia\\t%1, {%0, %H0}\"; - case 5: -- return thumb_load_double_from_address (operands); -+ return \"stmia\\t%0, {%1, %H1}\"; - case 6: -+ return thumb_load_double_from_address (operands); -+ case 7: - operands[2] = gen_rtx_MEM (SImode, - plus_constant (Pmode, XEXP (operands[0], 0), 4)); - output_asm_insn (\"str\\t%1, %0\;str\\t%H1, %2\", operands); - return \"\"; -- case 7: -+ case 8: - if (REGNO (operands[1]) == REGNO (operands[0]) + 1) - return \"mov\\t%0, %1\;mov\\t%H0, %H1\"; - return \"mov\\t%H0, %H1\;mov\\t%0, %1\"; - } - }" -- [(set_attr "length" "4,4,6,2,2,6,4,4") -- (set_attr "type" "multiple,multiple,multiple,load2,store2,load2,store2,multiple") -- (set_attr "pool_range" "*,*,*,*,*,1018,*,*")] -+ [(set_attr "length" "4,4,6,6,2,2,6,4,4") -+ (set_attr "type" "multiple,multiple,multiple,multiple,load2,store2,load2,store2,multiple") -+ (set_attr "arch" "t1,t1,t1,v8mb,t1,t1,t1,t1,t1") -+ (set_attr "pool_range" "*,*,*,*,*,*,1018,*,*")] - ) - - (define_insn "*thumb1_movsi_insn" -- [(set (match_operand:SI 0 "nonimmediate_operand" "=l,l,l,l,l,>,l, m,*l*h*k") -- (match_operand:SI 1 "general_operand" "l, I,J,K,>,l,mi,l,*l*h*k"))] -+ [(set (match_operand:SI 0 "nonimmediate_operand" "=l,l,r,l,l,l,>,l, m,*l*h*k") -+ (match_operand:SI 1 "general_operand" "l, I,j,J,K,>,l,mi,l,*l*h*k"))] - "TARGET_THUMB1 - && ( register_operand (operands[0], SImode) - || register_operand (operands[1], SImode))" - "@ - movs %0, %1 - movs %0, %1 -+ movw %0, %1 - # - # - ldmia\\t%1, {%0} -@@ -647,10 +672,11 @@ - ldr\\t%0, %1 - str\\t%1, %0 - mov\\t%0, %1" -- [(set_attr "length" "2,2,4,4,2,2,2,2,2") -- (set_attr "type" "mov_reg,mov_imm,multiple,multiple,load1,store1,load1,store1,mov_reg") -- (set_attr "pool_range" "*,*,*,*,*,*,1018,*,*") -- (set_attr "conds" "set,clob,*,*,nocond,nocond,nocond,nocond,nocond")]) -+ [(set_attr "length" "2,2,4,4,4,2,2,2,2,2") -+ (set_attr "type" "mov_reg,mov_imm,mov_imm,multiple,multiple,load1,store1,load1,store1,mov_reg") -+ (set_attr "pool_range" "*,*,*,*,*,*,*,1018,*,*") -+ (set_attr "arch" "t1,t1,v8mb,t1,t1,t1,t1,t1,t1,t1") -+ (set_attr "conds" "set,clob,nocond,*,*,nocond,nocond,nocond,nocond,nocond")]) - - ; Split the load of 64-bit constant into two loads for high and low 32-bit parts respectively - ; to see if we can load them in fewer instructions or fewer cycles. -@@ -687,7 +713,8 @@ - (define_split - [(set (match_operand:SI 0 "register_operand" "") - (match_operand:SI 1 "const_int_operand" ""))] -- "TARGET_THUMB1 && satisfies_constraint_K (operands[1])" -+ "TARGET_THUMB1 && satisfies_constraint_K (operands[1]) -+ && !(TARGET_HAVE_MOVT && satisfies_constraint_j (operands[1]))" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (ashift:SI (match_dup 2) (match_dup 3)))] - " -@@ -714,7 +741,8 @@ - (define_split - [(set (match_operand:SI 0 "register_operand" "") - (match_operand:SI 1 "const_int_operand" ""))] -- "TARGET_THUMB1 && satisfies_constraint_Pe (operands[1])" -+ "TARGET_THUMB1 && satisfies_constraint_Pe (operands[1]) -+ && !(TARGET_HAVE_MOVT && satisfies_constraint_j (operands[1]))" - [(set (match_dup 2) (match_dup 1)) - (set (match_dup 0) (plus:SI (match_dup 2) (match_dup 3)))] - " -@@ -726,8 +754,8 @@ - ) - - (define_insn "*thumb1_movhi_insn" -- [(set (match_operand:HI 0 "nonimmediate_operand" "=l,l,m,l*r,*h,l") -- (match_operand:HI 1 "general_operand" "l,m,l,k*h,*r,I"))] -+ [(set (match_operand:HI 0 "nonimmediate_operand" "=l,l,m,l*r,*h,l,r") -+ (match_operand:HI 1 "general_operand" "l,m,l,k*h,*r,I,n"))] - "TARGET_THUMB1 - && ( register_operand (operands[0], HImode) - || register_operand (operands[1], HImode))" -@@ -739,6 +767,8 @@ - case 3: return \"mov %0, %1\"; - case 4: return \"mov %0, %1\"; - case 5: return \"movs %0, %1\"; -+ case 6: gcc_assert (TARGET_HAVE_MOVT); -+ return \"movw %0, %L1\"; - default: gcc_unreachable (); - case 1: - /* The stack pointer can end up being taken as an index register. -@@ -758,9 +788,10 @@ - } - return \"ldrh %0, %1\"; - }" -- [(set_attr "length" "2,4,2,2,2,2") -- (set_attr "type" "alus_imm,load1,store1,mov_reg,mov_reg,mov_imm") -- (set_attr "conds" "clob,nocond,nocond,nocond,nocond,clob")]) -+ [(set_attr "length" "2,4,2,2,2,2,4") -+ (set_attr "type" "alus_imm,load1,store1,mov_reg,mov_reg,mov_imm,mov_imm") -+ (set_attr "arch" "t1,t1,t1,t1,t1,t1,v8mb") -+ (set_attr "conds" "clob,nocond,nocond,nocond,nocond,clob,nocond")]) - - (define_expand "thumb_movhi_clobber" - [(set (match_operand:HI 0 "memory_operand" "") -@@ -963,6 +994,94 @@ - DONE; - }) - -+;; A pattern for the CB(N)Z instruction added in ARMv8-M Baseline profile, -+;; adapted from cbranchsi4_insn. Modifying cbranchsi4_insn instead leads to -+;; code generation difference for ARMv6-M because the minimum length of the -+;; instruction becomes 2 even for ARMv6-M due to a limitation in genattrtab's -+;; handling of PC in the length condition. -+(define_insn "thumb1_cbz" -+ [(set (pc) (if_then_else -+ (match_operator 0 "equality_operator" -+ [(match_operand:SI 1 "s_register_operand" "l") -+ (const_int 0)]) -+ (label_ref (match_operand 2 "" "")) -+ (pc)))] -+ "TARGET_THUMB1 && TARGET_HAVE_CBZ" -+{ -+ if (get_attr_length (insn) == 2) -+ { -+ if (GET_CODE (operands[0]) == EQ) -+ return "cbz\t%1, %l2"; -+ else -+ return "cbnz\t%1, %l2"; -+ } -+ else -+ { -+ rtx t = cfun->machine->thumb1_cc_insn; -+ if (t != NULL_RTX) -+ { -+ if (!rtx_equal_p (cfun->machine->thumb1_cc_op0, operands[1]) -+ || !rtx_equal_p (cfun->machine->thumb1_cc_op1, operands[2])) -+ t = NULL_RTX; -+ if (cfun->machine->thumb1_cc_mode == CC_NOOVmode) -+ { -+ if (!noov_comparison_operator (operands[0], VOIDmode)) -+ t = NULL_RTX; -+ } -+ else if (cfun->machine->thumb1_cc_mode != CCmode) -+ t = NULL_RTX; -+ } -+ if (t == NULL_RTX) -+ { -+ output_asm_insn ("cmp\t%1, #0", operands); -+ cfun->machine->thumb1_cc_insn = insn; -+ cfun->machine->thumb1_cc_op0 = operands[1]; -+ cfun->machine->thumb1_cc_op1 = operands[2]; -+ cfun->machine->thumb1_cc_mode = CCmode; -+ } -+ else -+ /* Ensure we emit the right type of condition code on the jump. */ -+ XEXP (operands[0], 0) = gen_rtx_REG (cfun->machine->thumb1_cc_mode, -+ CC_REGNUM); -+ -+ switch (get_attr_length (insn)) -+ { -+ case 4: return "b%d0\t%l2"; -+ case 6: return "b%D0\t.LCB%=;b\t%l2\t%@long jump\n.LCB%=:"; -+ case 8: return "b%D0\t.LCB%=;bl\t%l2\t%@far jump\n.LCB%=:"; -+ default: gcc_unreachable (); -+ } -+ } -+} -+ [(set (attr "far_jump") -+ (if_then_else -+ (eq_attr "length" "8") -+ (const_string "yes") -+ (const_string "no"))) -+ (set (attr "length") -+ (if_then_else -+ (and (ge (minus (match_dup 2) (pc)) (const_int 2)) -+ (le (minus (match_dup 2) (pc)) (const_int 128))) -+ (const_int 2) -+ (if_then_else -+ (and (ge (minus (match_dup 2) (pc)) (const_int -250)) -+ (le (minus (match_dup 2) (pc)) (const_int 256))) -+ (const_int 4) -+ (if_then_else -+ (and (ge (minus (match_dup 2) (pc)) (const_int -2040)) -+ (le (minus (match_dup 2) (pc)) (const_int 2048))) -+ (const_int 6) -+ (const_int 8))))) -+ (set (attr "type") -+ (if_then_else -+ (eq_attr "length" "2") -+ (const_string "branch") -+ (const_string "multiple")))] -+) -+ -+;; Changes to the constraints of this pattern must be propagated to those of -+;; atomic compare_and_swap splitters in sync.md. These must be at least as -+;; strict as the constraints here and aim to be as permissive. - (define_insn "cbranchsi4_insn" - [(set (pc) (if_then_else - (match_operator 0 "arm_comparison_operator" -@@ -1024,6 +1143,9 @@ - (set_attr "type" "multiple")] - ) - -+;; Changes to the constraints of this pattern must be propagated to those of -+;; atomic compare_and_swap splitters in sync.md. These must be at least as -+;; strict as the constraints here and aim to be as permissive. - (define_insn "cbranchsi4_scratch" - [(set (pc) (if_then_else - (match_operator 4 "arm_comparison_operator" -@@ -1609,6 +1731,19 @@ - (set_attr "type" "call")] - ) - -+(define_insn "*nonsecure_call_reg_thumb1_v5" -+ [(call (unspec:SI [(mem:SI (match_operand:SI 0 "register_operand" "l*r"))] -+ UNSPEC_NONSECURE_MEM) -+ (match_operand 1 "" "")) -+ (use (match_operand 2 "" "")) -+ (clobber (reg:SI LR_REGNUM)) -+ (clobber (match_dup 0))] -+ "TARGET_THUMB1 && use_cmse && !SIBLING_CALL_P (insn)" -+ "bl\\t__gnu_cmse_nonsecure_call" -+ [(set_attr "length" "4") -+ (set_attr "type" "call")] -+) -+ - (define_insn "*call_reg_thumb1" - [(call (mem:SI (match_operand:SI 0 "register_operand" "l*r")) - (match_operand 1 "" "")) -@@ -1641,6 +1776,21 @@ - (set_attr "type" "call")] - ) - -+(define_insn "*nonsecure_call_value_reg_thumb1_v5" -+ [(set (match_operand 0 "" "") -+ (call (unspec:SI -+ [(mem:SI (match_operand:SI 1 "register_operand" "l*r"))] -+ UNSPEC_NONSECURE_MEM) -+ (match_operand 2 "" ""))) -+ (use (match_operand 3 "" "")) -+ (clobber (reg:SI LR_REGNUM)) -+ (clobber (match_dup 1))] -+ "TARGET_THUMB1 && use_cmse" -+ "bl\\t__gnu_cmse_nonsecure_call" -+ [(set_attr "length" "4") -+ (set_attr "type" "call")] -+) -+ - (define_insn "*call_value_reg_thumb1" - [(set (match_operand 0 "" "") - (call (mem:SI (match_operand:SI 1 "register_operand" "l*r")) -@@ -1747,8 +1897,13 @@ - "* - return thumb1_unexpanded_epilogue (); - " -- ; Length is absolute worst case -- [(set_attr "length" "44") -+ ; Length is absolute worst case, when using CMSE and if this is an entry -+ ; function an extra 4 (MSR) bytes will be added. -+ [(set (attr "length") -+ (if_then_else -+ (match_test "IS_CMSE_ENTRY (arm_current_func_type ())") -+ (const_int 48) -+ (const_int 44))) - (set_attr "type" "block") - ;; We don't clobber the conditions, but the potential length of this - ;; operation is sufficient to make conditionalizing the sequence ---- a/src/gcc/config/arm/thumb2.md -+++ b/src/gcc/config/arm/thumb2.md -@@ -125,32 +125,6 @@ - (set_attr "type" "multiple")] - ) - --;; Thumb-2 does not have rsc, so use a clever trick with shifter operands. --(define_insn_and_split "*thumb2_negdi2" -- [(set (match_operand:DI 0 "s_register_operand" "=&r,r") -- (neg:DI (match_operand:DI 1 "s_register_operand" "?r,0"))) -- (clobber (reg:CC CC_REGNUM))] -- "TARGET_THUMB2" -- "#" ; negs\\t%Q0, %Q1\;sbc\\t%R0, %R1, %R1, lsl #1 -- "&& reload_completed" -- [(parallel [(set (reg:CC CC_REGNUM) -- (compare:CC (const_int 0) (match_dup 1))) -- (set (match_dup 0) (minus:SI (const_int 0) (match_dup 1)))]) -- (set (match_dup 2) (minus:SI (minus:SI (match_dup 3) -- (ashift:SI (match_dup 3) -- (const_int 1))) -- (ltu:SI (reg:CC_C CC_REGNUM) (const_int 0))))] -- { -- operands[2] = gen_highpart (SImode, operands[0]); -- operands[0] = gen_lowpart (SImode, operands[0]); -- operands[3] = gen_highpart (SImode, operands[1]); -- operands[1] = gen_lowpart (SImode, operands[1]); -- } -- [(set_attr "conds" "clob") -- (set_attr "length" "8") -- (set_attr "type" "multiple")] --) -- - (define_insn_and_split "*thumb2_abssi2" - [(set (match_operand:SI 0 "s_register_operand" "=&r,l,r") - (abs:SI (match_operand:SI 1 "s_register_operand" "r,0,0"))) -@@ -278,8 +252,7 @@ - (define_insn "*thumb2_movsi_insn" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,l ,*hk,m,*m") - (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,*mi,l,*hk"))] -- "TARGET_THUMB2 && ! TARGET_IWMMXT -- && !(TARGET_HARD_FLOAT && TARGET_VFP) -+ "TARGET_THUMB2 && !TARGET_IWMMXT && !TARGET_HARD_FLOAT - && ( register_operand (operands[0], SImode) - || register_operand (operands[1], SImode))" - "@ -@@ -581,6 +554,19 @@ - [(set_attr "type" "call")] - ) - -+(define_insn "*nonsecure_call_reg_thumb2" -+ [(call (unspec:SI [(mem:SI (match_operand:SI 0 "s_register_operand" "r"))] -+ UNSPEC_NONSECURE_MEM) -+ (match_operand 1 "" "")) -+ (use (match_operand 2 "" "")) -+ (clobber (reg:SI LR_REGNUM)) -+ (clobber (match_dup 0))] -+ "TARGET_THUMB2 && use_cmse" -+ "bl\\t__gnu_cmse_nonsecure_call" -+ [(set_attr "length" "4") -+ (set_attr "type" "call")] -+) -+ - (define_insn "*call_value_reg_thumb2" - [(set (match_operand 0 "" "") - (call (mem:SI (match_operand:SI 1 "register_operand" "l*r")) -@@ -592,6 +578,21 @@ - [(set_attr "type" "call")] - ) - -+(define_insn "*nonsecure_call_value_reg_thumb2" -+ [(set (match_operand 0 "" "") -+ (call -+ (unspec:SI [(mem:SI (match_operand:SI 1 "register_operand" "l*r"))] -+ UNSPEC_NONSECURE_MEM) -+ (match_operand 2 "" ""))) -+ (use (match_operand 3 "" "")) -+ (clobber (reg:SI LR_REGNUM)) -+ (clobber (match_dup 1))] -+ "TARGET_THUMB2 && use_cmse" -+ "bl\t__gnu_cmse_nonsecure_call" -+ [(set_attr "length" "4") -+ (set_attr "type" "call")] -+) -+ - (define_insn "*thumb2_indirect_jump" - [(set (pc) - (match_operand:SI 0 "register_operand" "l*r"))] -@@ -1115,12 +1116,31 @@ - - (define_insn "*thumb2_return" - [(simple_return)] -- "TARGET_THUMB2" -+ "TARGET_THUMB2 && !IS_CMSE_ENTRY (arm_current_func_type ())" - "* return output_return_instruction (const_true_rtx, true, false, true);" - [(set_attr "type" "branch") - (set_attr "length" "4")] - ) - -+(define_insn "*thumb2_cmse_entry_return" -+ [(simple_return)] -+ "TARGET_THUMB2 && IS_CMSE_ENTRY (arm_current_func_type ())" -+ "* return output_return_instruction (const_true_rtx, true, false, true);" -+ [(set_attr "type" "branch") -+ ; This is a return from a cmse_nonsecure_entry function so code will be -+ ; added to clear the APSR and potentially the FPSCR if VFP is available, so -+ ; we adapt the length accordingly. -+ (set (attr "length") -+ (if_then_else (match_test "TARGET_HARD_FLOAT") -+ (const_int 12) -+ (const_int 8))) -+ ; We do not support predicate execution of returns from cmse_nonsecure_entry -+ ; functions because we need to clear the APSR. Since predicable has to be -+ ; a constant, we had to duplicate the thumb2_return pattern for CMSE entry -+ ; functions. -+ (set_attr "predicable" "no")] -+) -+ - (define_insn_and_split "thumb2_eh_return" - [(unspec_volatile [(match_operand:SI 0 "s_register_operand" "r")] - VUNSPEC_EH_RETURN) ---- a/src/gcc/config/arm/types.md -+++ b/src/gcc/config/arm/types.md -@@ -51,6 +51,7 @@ - ; alus_shift_imm as alu_shift_imm, setting condition flags. - ; alus_shift_reg as alu_shift_reg, setting condition flags. - ; bfm bitfield move operation. -+; bfx bitfield extract operation. - ; block blockage insn, this blocks all functional units. - ; branch branch. - ; call subroutine call. -@@ -557,6 +558,7 @@ - alus_shift_imm,\ - alus_shift_reg,\ - bfm,\ -+ bfx,\ - block,\ - branch,\ - call,\ ---- a/src/gcc/config/arm/unspecs.md -+++ b/src/gcc/config/arm/unspecs.md -@@ -84,6 +84,8 @@ - UNSPEC_VRINTA ; Represent a float to integral float rounding - ; towards nearest, ties away from zero. - UNSPEC_PROBE_STACK ; Probe stack memory reference -+ UNSPEC_NONSECURE_MEM ; Represent non-secure memory in ARMv8-M with -+ ; security extension - ]) - - (define_c_enum "unspec" [ -@@ -191,6 +193,8 @@ - UNSPEC_VBSL - UNSPEC_VCAGE - UNSPEC_VCAGT -+ UNSPEC_VCALE -+ UNSPEC_VCALT - UNSPEC_VCEQ - UNSPEC_VCGE - UNSPEC_VCGEU -@@ -203,6 +207,20 @@ - UNSPEC_VCVT_U - UNSPEC_VCVT_S_N - UNSPEC_VCVT_U_N -+ UNSPEC_VCVT_HF_S_N -+ UNSPEC_VCVT_HF_U_N -+ UNSPEC_VCVT_SI_S_N -+ UNSPEC_VCVT_SI_U_N -+ UNSPEC_VCVTH_S -+ UNSPEC_VCVTH_U -+ UNSPEC_VCVTA_S -+ UNSPEC_VCVTA_U -+ UNSPEC_VCVTM_S -+ UNSPEC_VCVTM_U -+ UNSPEC_VCVTN_S -+ UNSPEC_VCVTN_U -+ UNSPEC_VCVTP_S -+ UNSPEC_VCVTP_U - UNSPEC_VEXT - UNSPEC_VHADD_S - UNSPEC_VHADD_U -@@ -244,6 +262,8 @@ - UNSPEC_VMLSL_S_LANE - UNSPEC_VMLSL_U_LANE - UNSPEC_VMLSL_LANE -+ UNSPEC_VFMA_LANE -+ UNSPEC_VFMS_LANE - UNSPEC_VMOVL_S - UNSPEC_VMOVL_U - UNSPEC_VMOVN -@@ -365,5 +385,11 @@ - UNSPEC_NVRINTN - UNSPEC_VQRDMLAH - UNSPEC_VQRDMLSH -+ UNSPEC_VRND -+ UNSPEC_VRNDA -+ UNSPEC_VRNDI -+ UNSPEC_VRNDM -+ UNSPEC_VRNDN -+ UNSPEC_VRNDP -+ UNSPEC_VRNDX - ]) -- ---- a/src/gcc/config/arm/vec-common.md -+++ b/src/gcc/config/arm/vec-common.md -@@ -124,6 +124,20 @@ - FAIL; - }) - -+(define_expand "vec_perm_const<mode>" -+ [(match_operand:VH 0 "s_register_operand") -+ (match_operand:VH 1 "s_register_operand") -+ (match_operand:VH 2 "s_register_operand") -+ (match_operand:<V_cmp_result> 3)] -+ "TARGET_NEON" -+{ -+ if (arm_expand_vec_perm_const (operands[0], operands[1], -+ operands[2], operands[3])) -+ DONE; -+ else -+ FAIL; -+}) -+ - (define_expand "vec_perm<mode>" - [(match_operand:VE 0 "s_register_operand" "") - (match_operand:VE 1 "s_register_operand" "") ---- a/src/gcc/config/arm/vfp.md -+++ b/src/gcc/config/arm/vfp.md -@@ -18,13 +18,206 @@ - ;; along with GCC; see the file COPYING3. If not see - ;; <http://www.gnu.org/licenses/>. */ - -+;; Patterns for HI moves which provide more data transfer instructions when VFP -+;; support is enabled. -+(define_insn "*arm_movhi_vfp" -+ [(set -+ (match_operand:HI 0 "nonimmediate_operand" -+ "=rk, r, r, m, r, *t, r, *t") -+ (match_operand:HI 1 "general_operand" -+ "rIk, K, n, r, mi, r, *t, *t"))] -+ "TARGET_ARM && TARGET_HARD_FLOAT -+ && !TARGET_VFP_FP16INST -+ && (register_operand (operands[0], HImode) -+ || register_operand (operands[1], HImode))" -+{ -+ switch (which_alternative) -+ { -+ case 0: -+ return "mov%?\t%0, %1\t%@ movhi"; -+ case 1: -+ return "mvn%?\t%0, #%B1\t%@ movhi"; -+ case 2: -+ return "movw%?\t%0, %L1\t%@ movhi"; -+ case 3: -+ return "strh%?\t%1, %0\t%@ movhi"; -+ case 4: -+ return "ldrh%?\t%0, %1\t%@ movhi"; -+ case 5: -+ case 6: -+ return "vmov%?\t%0, %1\t%@ int"; -+ case 7: -+ return "vmov%?.f32\t%0, %1\t%@ int"; -+ default: -+ gcc_unreachable (); -+ } -+} -+ [(set_attr "predicable" "yes") -+ (set_attr_alternative "type" -+ [(if_then_else -+ (match_operand 1 "const_int_operand" "") -+ (const_string "mov_imm") -+ (const_string "mov_reg")) -+ (const_string "mvn_imm") -+ (const_string "mov_imm") -+ (const_string "store1") -+ (const_string "load1") -+ (const_string "f_mcr") -+ (const_string "f_mrc") -+ (const_string "fmov")]) -+ (set_attr "arch" "*, *, v6t2, *, *, *, *, *") -+ (set_attr "pool_range" "*, *, *, *, 256, *, *, *") -+ (set_attr "neg_pool_range" "*, *, *, *, 244, *, *, *") -+ (set_attr "length" "4")] -+) -+ -+(define_insn "*thumb2_movhi_vfp" -+ [(set -+ (match_operand:HI 0 "nonimmediate_operand" -+ "=rk, r, l, r, m, r, *t, r, *t") -+ (match_operand:HI 1 "general_operand" -+ "rk, I, Py, n, r, m, r, *t, *t"))] -+ "TARGET_THUMB2 && TARGET_HARD_FLOAT -+ && !TARGET_VFP_FP16INST -+ && (register_operand (operands[0], HImode) -+ || register_operand (operands[1], HImode))" -+{ -+ switch (which_alternative) -+ { -+ case 0: -+ case 1: -+ case 2: -+ return "mov%?\t%0, %1\t%@ movhi"; -+ case 3: -+ return "movw%?\t%0, %L1\t%@ movhi"; -+ case 4: -+ return "strh%?\t%1, %0\t%@ movhi"; -+ case 5: -+ return "ldrh%?\t%0, %1\t%@ movhi"; -+ case 6: -+ case 7: -+ return "vmov%?\t%0, %1\t%@ int"; -+ case 8: -+ return "vmov%?.f32\t%0, %1\t%@ int"; -+ default: -+ gcc_unreachable (); -+ } -+} -+ [(set_attr "predicable" "yes") -+ (set_attr "predicable_short_it" -+ "yes, no, yes, no, no, no, no, no, no") -+ (set_attr "type" -+ "mov_reg, mov_imm, mov_imm, mov_imm, store1, load1,\ -+ f_mcr, f_mrc, fmov") -+ (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *") -+ (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *") -+ (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *") -+ (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4")] -+) -+ -+;; Patterns for HI moves which provide more data transfer instructions when FP16 -+;; instructions are available. -+(define_insn "*arm_movhi_fp16" -+ [(set -+ (match_operand:HI 0 "nonimmediate_operand" -+ "=r, r, r, m, r, *t, r, *t") -+ (match_operand:HI 1 "general_operand" -+ "rIk, K, n, r, mi, r, *t, *t"))] -+ "TARGET_ARM && TARGET_VFP_FP16INST -+ && (register_operand (operands[0], HImode) -+ || register_operand (operands[1], HImode))" -+{ -+ switch (which_alternative) -+ { -+ case 0: -+ return "mov%?\t%0, %1\t%@ movhi"; -+ case 1: -+ return "mvn%?\t%0, #%B1\t%@ movhi"; -+ case 2: -+ return "movw%?\t%0, %L1\t%@ movhi"; -+ case 3: -+ return "strh%?\t%1, %0\t%@ movhi"; -+ case 4: -+ return "ldrh%?\t%0, %1\t%@ movhi"; -+ case 5: -+ case 6: -+ return "vmov.f16\t%0, %1\t%@ int"; -+ case 7: -+ return "vmov%?.f32\t%0, %1\t%@ int"; -+ default: -+ gcc_unreachable (); -+ } -+} -+ [(set_attr "predicable" "yes, yes, yes, yes, yes, no, no, yes") -+ (set_attr_alternative "type" -+ [(if_then_else -+ (match_operand 1 "const_int_operand" "") -+ (const_string "mov_imm") -+ (const_string "mov_reg")) -+ (const_string "mvn_imm") -+ (const_string "mov_imm") -+ (const_string "store1") -+ (const_string "load1") -+ (const_string "f_mcr") -+ (const_string "f_mrc") -+ (const_string "fmov")]) -+ (set_attr "arch" "*, *, v6t2, *, *, *, *, *") -+ (set_attr "pool_range" "*, *, *, *, 256, *, *, *") -+ (set_attr "neg_pool_range" "*, *, *, *, 244, *, *, *") -+ (set_attr "length" "4")] -+) -+ -+(define_insn "*thumb2_movhi_fp16" -+ [(set -+ (match_operand:HI 0 "nonimmediate_operand" -+ "=rk, r, l, r, m, r, *t, r, *t") -+ (match_operand:HI 1 "general_operand" -+ "rk, I, Py, n, r, m, r, *t, *t"))] -+ "TARGET_THUMB2 && TARGET_VFP_FP16INST -+ && (register_operand (operands[0], HImode) -+ || register_operand (operands[1], HImode))" -+{ -+ switch (which_alternative) -+ { -+ case 0: -+ case 1: -+ case 2: -+ return "mov%?\t%0, %1\t%@ movhi"; -+ case 3: -+ return "movw%?\t%0, %L1\t%@ movhi"; -+ case 4: -+ return "strh%?\t%1, %0\t%@ movhi"; -+ case 5: -+ return "ldrh%?\t%0, %1\t%@ movhi"; -+ case 6: -+ case 7: -+ return "vmov.f16\t%0, %1\t%@ int"; -+ case 8: -+ return "vmov%?.f32\t%0, %1\t%@ int"; -+ default: -+ gcc_unreachable (); -+ } -+} -+ [(set_attr "predicable" -+ "yes, yes, yes, yes, yes, yes, no, no, yes") -+ (set_attr "predicable_short_it" -+ "yes, no, yes, no, no, no, no, no, no") -+ (set_attr "type" -+ "mov_reg, mov_imm, mov_imm, mov_imm, store1, load1,\ -+ f_mcr, f_mrc, fmov") -+ (set_attr "arch" "*, *, *, v6t2, *, *, *, *, *") -+ (set_attr "pool_range" "*, *, *, *, *, 4094, *, *, *") -+ (set_attr "neg_pool_range" "*, *, *, *, *, 250, *, *, *") -+ (set_attr "length" "2, 4, 2, 4, 4, 4, 4, 4, 4")] -+) -+ - ;; SImode moves - ;; ??? For now do not allow loading constants into vfp regs. This causes - ;; problems because small constants get converted into adds. - (define_insn "*arm_movsi_vfp" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m ,*t,r,*t,*t, *Uv") - (match_operand:SI 1 "general_operand" "rk, I,K,j,mi,rk,r,*t,*t,*Uvi,*t"))] -- "TARGET_ARM && TARGET_VFP && TARGET_HARD_FLOAT -+ "TARGET_ARM && TARGET_HARD_FLOAT - && ( s_register_operand (operands[0], SImode) - || s_register_operand (operands[1], SImode))" - "* -@@ -53,7 +246,8 @@ - } - " - [(set_attr "predicable" "yes") -- (set_attr "type" "mov_reg,mov_reg,mvn_imm,mov_imm,load1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores") -+ (set_attr "type" "mov_reg,mov_reg,mvn_imm,mov_imm,load1,store1, -+ f_mcr,f_mrc,fmov,f_loads,f_stores") - (set_attr "pool_range" "*,*,*,*,4096,*,*,*,*,1020,*") - (set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")] - ) -@@ -66,7 +260,7 @@ - (define_insn "*thumb2_movsi_vfp" - [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r, l,*hk,m, *m,*t, r,*t,*t, *Uv") - (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,*mi,l,*hk, r,*t,*t,*Uvi,*t"))] -- "TARGET_THUMB2 && TARGET_VFP && TARGET_HARD_FLOAT -+ "TARGET_THUMB2 && TARGET_HARD_FLOAT - && ( s_register_operand (operands[0], SImode) - || s_register_operand (operands[1], SImode))" - "* -@@ -112,7 +306,7 @@ - (define_insn "*movdi_vfp" - [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,q,q,m,w,r,w,w, Uv") - (match_operand:DI 1 "di_operand" "r,rDa,Db,Dc,mi,mi,q,r,w,w,Uvi,w"))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP && arm_tune != cortexa8 -+ "TARGET_32BIT && TARGET_HARD_FLOAT && arm_tune != cortexa8 - && ( register_operand (operands[0], DImode) - || register_operand (operands[1], DImode)) - && !(TARGET_NEON && CONST_INT_P (operands[1]) -@@ -163,7 +357,7 @@ - (define_insn "*movdi_vfp_cortexa8" - [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r,r,r,r,r,r,m,w,!r,w,w, Uv") - (match_operand:DI 1 "di_operand" "r,rDa,Db,Dc,mi,mi,r,r,w,w,Uvi,w"))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP && arm_tune == cortexa8 -+ "TARGET_32BIT && TARGET_HARD_FLOAT && arm_tune == cortexa8 - && ( register_operand (operands[0], DImode) - || register_operand (operands[1], DImode)) - && !(TARGET_NEON && CONST_INT_P (operands[1]) -@@ -211,10 +405,87 @@ - ) - - ;; HFmode moves -+ -+(define_insn "*movhf_vfp_fp16" -+ [(set (match_operand:HF 0 "nonimmediate_operand" -+ "= r,m,t,r,t,r,t,t,Um,r") -+ (match_operand:HF 1 "general_operand" -+ " m,r,t,r,r,t,Dv,Um,t,F"))] -+ "TARGET_32BIT -+ && TARGET_VFP_FP16INST -+ && (s_register_operand (operands[0], HFmode) -+ || s_register_operand (operands[1], HFmode))" -+ { -+ switch (which_alternative) -+ { -+ case 0: /* ARM register from memory. */ -+ return \"ldrh%?\\t%0, %1\\t%@ __fp16\"; -+ case 1: /* Memory from ARM register. */ -+ return \"strh%?\\t%1, %0\\t%@ __fp16\"; -+ case 2: /* S register from S register. */ -+ return \"vmov\\t%0, %1\t%@ __fp16\"; -+ case 3: /* ARM register from ARM register. */ -+ return \"mov%?\\t%0, %1\\t%@ __fp16\"; -+ case 4: /* S register from ARM register. */ -+ case 5: /* ARM register from S register. */ -+ case 6: /* S register from immediate. */ -+ return \"vmov.f16\\t%0, %1\t%@ __fp16\"; -+ case 7: /* S register from memory. */ -+ return \"vld1.16\\t{%z0}, %A1\"; -+ case 8: /* Memory from S register. */ -+ return \"vst1.16\\t{%z1}, %A0\"; -+ case 9: /* ARM register from constant. */ -+ { -+ long bits; -+ rtx ops[4]; -+ -+ bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (operands[1]), -+ HFmode); -+ ops[0] = operands[0]; -+ ops[1] = GEN_INT (bits); -+ ops[2] = GEN_INT (bits & 0xff00); -+ ops[3] = GEN_INT (bits & 0x00ff); -+ -+ if (arm_arch_thumb2) -+ output_asm_insn (\"movw\\t%0, %1\", ops); -+ else -+ output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops); -+ return \"\"; -+ } -+ default: -+ gcc_unreachable (); -+ } -+ } -+ [(set_attr "predicable" "yes, yes, no, yes, no, no, no, no, no, no") -+ (set_attr "predicable_short_it" "no, no, no, yes,\ -+ no, no, no, no,\ -+ no, no") -+ (set_attr_alternative "type" -+ [(const_string "load1") (const_string "store1") -+ (const_string "fmov") (const_string "mov_reg") -+ (const_string "f_mcr") (const_string "f_mrc") -+ (const_string "fconsts") (const_string "neon_load1_1reg") -+ (const_string "neon_store1_1reg") -+ (if_then_else (match_test "arm_arch_thumb2") -+ (const_string "mov_imm") -+ (const_string "multiple"))]) -+ (set_attr_alternative "length" -+ [(const_int 4) (const_int 4) -+ (const_int 4) (const_int 4) -+ (const_int 4) (const_int 4) -+ (const_int 4) (const_int 4) -+ (const_int 4) -+ (if_then_else (match_test "arm_arch_thumb2") -+ (const_int 4) -+ (const_int 8))])] -+) -+ - (define_insn "*movhf_vfp_neon" - [(set (match_operand:HF 0 "nonimmediate_operand" "= t,Um,r,m,t,r,t,r,r") - (match_operand:HF 1 "general_operand" " Um, t,m,r,t,r,r,t,F"))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_NEON_FP16 -+ "TARGET_32BIT -+ && TARGET_HARD_FLOAT && TARGET_NEON_FP16 -+ && !TARGET_VFP_FP16INST - && ( s_register_operand (operands[0], HFmode) - || s_register_operand (operands[1], HFmode))" - "* -@@ -268,7 +539,10 @@ - (define_insn "*movhf_vfp" - [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,t,r,t,r,r") - (match_operand:HF 1 "general_operand" " m,r,t,r,r,t,F"))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16 && !TARGET_NEON_FP16 -+ "TARGET_32BIT -+ && TARGET_HARD_FLOAT -+ && !TARGET_NEON_FP16 -+ && !TARGET_VFP_FP16INST - && ( s_register_operand (operands[0], HFmode) - || s_register_operand (operands[1], HFmode))" - "* -@@ -321,7 +595,7 @@ - (define_insn "*movsf_vfp" - [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t ,t ,Uv,r ,m,t,r") - (match_operand:SF 1 "general_operand" " ?r,t,Dv,UvE,t, mE,r,t,r"))] -- "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP -+ "TARGET_ARM && TARGET_HARD_FLOAT - && ( s_register_operand (operands[0], SFmode) - || s_register_operand (operands[1], SFmode))" - "* -@@ -357,7 +631,7 @@ - (define_insn "*thumb2_movsf_vfp" - [(set (match_operand:SF 0 "nonimmediate_operand" "=t,?r,t, t ,Uv,r ,m,t,r") - (match_operand:SF 1 "general_operand" " ?r,t,Dv,UvE,t, mE,r,t,r"))] -- "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP -+ "TARGET_THUMB2 && TARGET_HARD_FLOAT - && ( s_register_operand (operands[0], SFmode) - || s_register_operand (operands[1], SFmode))" - "* -@@ -394,9 +668,9 @@ - ;; DFmode moves - - (define_insn "*movdf_vfp" -- [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w ,Uv,r, m,w,r") -- (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,UvF,w ,mF,r,w,r"))] -- "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP -+ [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w,w ,Uv,r, m,w,r") -+ (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,G,UvF,w ,mF,r,w,r"))] -+ "TARGET_ARM && TARGET_HARD_FLOAT - && ( register_operand (operands[0], DFmode) - || register_operand (operands[1], DFmode))" - "* -@@ -410,40 +684,44 @@ - case 2: - gcc_assert (TARGET_VFP_DOUBLE); - return \"vmov%?.f64\\t%P0, %1\"; -- case 3: case 4: -+ case 3: -+ gcc_assert (TARGET_VFP_DOUBLE); -+ return \"vmov.i64\\t%P0, #0\\t%@ float\"; -+ case 4: case 5: - return output_move_vfp (operands); -- case 5: case 6: -+ case 6: case 7: - return output_move_double (operands, true, NULL); -- case 7: -+ case 8: - if (TARGET_VFP_SINGLE) - return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\"; - else - return \"vmov%?.f64\\t%P0, %P1\"; -- case 8: -+ case 9: - return \"#\"; - default: - gcc_unreachable (); - } - } - " -- [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,f_stored,\ -+ [(set_attr "type" "f_mcrr,f_mrrc,fconstd,neon_move,f_loadd,f_stored,\ - load2,store2,ffarithd,multiple") -- (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8) -- (eq_attr "alternative" "7") -+ (set (attr "length") (cond [(eq_attr "alternative" "6,7,9") (const_int 8) -+ (eq_attr "alternative" "8") - (if_then_else - (match_test "TARGET_VFP_SINGLE") - (const_int 8) - (const_int 4))] - (const_int 4))) -- (set_attr "predicable" "yes") -- (set_attr "pool_range" "*,*,*,1020,*,1020,*,*,*") -- (set_attr "neg_pool_range" "*,*,*,1004,*,1004,*,*,*")] -+ (set_attr "predicable" "yes,yes,yes,no,yes,yes,yes,yes,yes,yes") -+ (set_attr "pool_range" "*,*,*,*,1020,*,1020,*,*,*") -+ (set_attr "neg_pool_range" "*,*,*,*,1004,*,1004,*,*,*") -+ (set_attr "arch" "any,any,any,neon,any,any,any,any,any,any")] - ) - - (define_insn "*thumb2_movdf_vfp" -- [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w ,Uv,r ,m,w,r") -- (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,UvF,w, mF,r, w,r"))] -- "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP -+ [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=w,?r,w ,w,w ,Uv,r ,m,w,r") -+ (match_operand:DF 1 "soft_df_operand" " ?r,w,Dy,G,UvF,w, mF,r, w,r"))] -+ "TARGET_THUMB2 && TARGET_HARD_FLOAT - && ( register_operand (operands[0], DFmode) - || register_operand (operands[1], DFmode))" - "* -@@ -457,11 +735,14 @@ - case 2: - gcc_assert (TARGET_VFP_DOUBLE); - return \"vmov%?.f64\\t%P0, %1\"; -- case 3: case 4: -+ case 3: -+ gcc_assert (TARGET_VFP_DOUBLE); -+ return \"vmov.i64\\t%P0, #0\\t%@ float\"; -+ case 4: case 5: - return output_move_vfp (operands); -- case 5: case 6: case 8: -+ case 6: case 7: case 9: - return output_move_double (operands, true, NULL); -- case 7: -+ case 8: - if (TARGET_VFP_SINGLE) - return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\"; - else -@@ -471,17 +752,18 @@ - } - } - " -- [(set_attr "type" "f_mcrr,f_mrrc,fconstd,f_loadd,\ -+ [(set_attr "type" "f_mcrr,f_mrrc,fconstd,neon_move,f_loadd,\ - f_stored,load2,store2,ffarithd,multiple") -- (set (attr "length") (cond [(eq_attr "alternative" "5,6,8") (const_int 8) -- (eq_attr "alternative" "7") -+ (set (attr "length") (cond [(eq_attr "alternative" "6,7,9") (const_int 8) -+ (eq_attr "alternative" "8") - (if_then_else - (match_test "TARGET_VFP_SINGLE") - (const_int 8) - (const_int 4))] - (const_int 4))) -- (set_attr "pool_range" "*,*,*,1018,*,4094,*,*,*") -- (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")] -+ (set_attr "pool_range" "*,*,*,*,1018,*,4094,*,*,*") -+ (set_attr "neg_pool_range" "*,*,*,*,1008,*,0,*,*,*") -+ (set_attr "arch" "any,any,any,neon,any,any,any,any,any,any")] - ) - - -@@ -494,7 +776,7 @@ - [(match_operand 4 "cc_register" "") (const_int 0)]) - (match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t") - (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))] -- "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_ARM && TARGET_HARD_FLOAT" - "@ - vmov%D3.f32\\t%0, %2 - vmov%d3.f32\\t%0, %1 -@@ -517,7 +799,7 @@ - [(match_operand 4 "cc_register" "") (const_int 0)]) - (match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t") - (match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))] -- "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP && !arm_restrict_it" -+ "TARGET_THUMB2 && TARGET_HARD_FLOAT && !arm_restrict_it" - "@ - it\\t%D3\;vmov%D3.f32\\t%0, %2 - it\\t%d3\;vmov%d3.f32\\t%0, %1 -@@ -585,7 +867,7 @@ - (define_insn "*abssf2_vfp" - [(set (match_operand:SF 0 "s_register_operand" "=t") - (abs:SF (match_operand:SF 1 "s_register_operand" "t")))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "vabs%?.f32\\t%0, %1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") -@@ -605,7 +887,7 @@ - (define_insn "*negsf2_vfp" - [(set (match_operand:SF 0 "s_register_operand" "=t,?r") - (neg:SF (match_operand:SF 1 "s_register_operand" "t,r")))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "@ - vneg%?.f32\\t%0, %1 - eor%?\\t%0, %1, #-2147483648" -@@ -661,14 +943,68 @@ - (set_attr "type" "ffarithd")] - ) - -+;; ABS and NEG for FP16. -+(define_insn "<absneg_str>hf2" -+ [(set (match_operand:HF 0 "s_register_operand" "=w") -+ (ABSNEG:HF (match_operand:HF 1 "s_register_operand" "w")))] -+ "TARGET_VFP_FP16INST" -+ "v<absneg_str>.f16\t%0, %1" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "ffariths")] -+) -+ -+(define_expand "neon_vabshf" -+ [(set -+ (match_operand:HF 0 "s_register_operand") -+ (abs:HF (match_operand:HF 1 "s_register_operand")))] -+ "TARGET_VFP_FP16INST" -+{ -+ emit_insn (gen_abshf2 (operands[0], operands[1])); -+ DONE; -+}) -+ -+;; VRND for FP16. -+(define_insn "neon_v<fp16_rnd_str>hf" -+ [(set (match_operand:HF 0 "s_register_operand" "=w") -+ (unspec:HF -+ [(match_operand:HF 1 "s_register_operand" "w")] -+ FP16_RND))] -+ "TARGET_VFP_FP16INST" -+ "<fp16_rnd_insn>.f16\t%0, %1" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "neon_fp_round_s")] -+) -+ -+(define_insn "neon_vrndihf" -+ [(set (match_operand:HF 0 "s_register_operand" "=w") -+ (unspec:HF -+ [(match_operand:HF 1 "s_register_operand" "w")] -+ UNSPEC_VRNDI))] -+ "TARGET_VFP_FP16INST" -+ "vrintr.f16\t%0, %1" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "neon_fp_round_s")] -+) - - ;; Arithmetic insns - -+(define_insn "addhf3" -+ [(set -+ (match_operand:HF 0 "s_register_operand" "=w") -+ (plus:HF -+ (match_operand:HF 1 "s_register_operand" "w") -+ (match_operand:HF 2 "s_register_operand" "w")))] -+ "TARGET_VFP_FP16INST" -+ "vadd.f16\t%0, %1, %2" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "fadds")] -+) -+ - (define_insn "*addsf3_vfp" - [(set (match_operand:SF 0 "s_register_operand" "=t") - (plus:SF (match_operand:SF 1 "s_register_operand" "t") - (match_operand:SF 2 "s_register_operand" "t")))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "vadd%?.f32\\t%0, %1, %2" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") -@@ -686,12 +1022,23 @@ - (set_attr "type" "faddd")] - ) - -+(define_insn "subhf3" -+ [(set -+ (match_operand:HF 0 "s_register_operand" "=w") -+ (minus:HF -+ (match_operand:HF 1 "s_register_operand" "w") -+ (match_operand:HF 2 "s_register_operand" "w")))] -+ "TARGET_VFP_FP16INST" -+ "vsub.f16\t%0, %1, %2" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "fadds")] -+) - - (define_insn "*subsf3_vfp" - [(set (match_operand:SF 0 "s_register_operand" "=t") - (minus:SF (match_operand:SF 1 "s_register_operand" "t") - (match_operand:SF 2 "s_register_operand" "t")))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "vsub%?.f32\\t%0, %1, %2" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") -@@ -712,6 +1059,19 @@ - - ;; Division insns - -+;; FP16 Division. -+(define_insn "divhf3" -+ [(set -+ (match_operand:HF 0 "s_register_operand" "=w") -+ (div:HF -+ (match_operand:HF 1 "s_register_operand" "w") -+ (match_operand:HF 2 "s_register_operand" "w")))] -+ "TARGET_VFP_FP16INST" -+ "vdiv.f16\t%0, %1, %2" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "fdivs")] -+) -+ - ; VFP9 Erratum 760019: It's potentially unsafe to overwrite the input - ; operands, so mark the output as early clobber for VFPv2 on ARMv5 or - ; earlier. -@@ -719,7 +1079,7 @@ - [(set (match_operand:SF 0 "s_register_operand" "=&t,t") - (div:SF (match_operand:SF 1 "s_register_operand" "t,t") - (match_operand:SF 2 "s_register_operand" "t,t")))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "vdiv%?.f32\\t%0, %1, %2" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") -@@ -742,11 +1102,22 @@ - - ;; Multiplication insns - -+(define_insn "mulhf3" -+ [(set -+ (match_operand:HF 0 "s_register_operand" "=w") -+ (mult:HF (match_operand:HF 1 "s_register_operand" "w") -+ (match_operand:HF 2 "s_register_operand" "w")))] -+ "TARGET_VFP_FP16INST" -+ "vmul.f16\t%0, %1, %2" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "fmuls")] -+) -+ - (define_insn "*mulsf3_vfp" - [(set (match_operand:SF 0 "s_register_operand" "=t") - (mult:SF (match_operand:SF 1 "s_register_operand" "t") - (match_operand:SF 2 "s_register_operand" "t")))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "vmul%?.f32\\t%0, %1, %2" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") -@@ -764,11 +1135,31 @@ - (set_attr "type" "fmuld")] - ) - -+(define_insn "*mulsf3neghf_vfp" -+ [(set (match_operand:HF 0 "s_register_operand" "=t") -+ (mult:HF (neg:HF (match_operand:HF 1 "s_register_operand" "t")) -+ (match_operand:HF 2 "s_register_operand" "t")))] -+ "TARGET_VFP_FP16INST && !flag_rounding_math" -+ "vnmul.f16\\t%0, %1, %2" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "fmuls")] -+) -+ -+(define_insn "*negmulhf3_vfp" -+ [(set (match_operand:HF 0 "s_register_operand" "=t") -+ (neg:HF (mult:HF (match_operand:HF 1 "s_register_operand" "t") -+ (match_operand:HF 2 "s_register_operand" "t"))))] -+ "TARGET_VFP_FP16INST" -+ "vnmul.f16\\t%0, %1, %2" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "fmuls")] -+) -+ - (define_insn "*mulsf3negsf_vfp" - [(set (match_operand:SF 0 "s_register_operand" "=t") - (mult:SF (neg:SF (match_operand:SF 1 "s_register_operand" "t")) - (match_operand:SF 2 "s_register_operand" "t")))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP && !flag_rounding_math" -+ "TARGET_32BIT && TARGET_HARD_FLOAT && !flag_rounding_math" - "vnmul%?.f32\\t%0, %1, %2" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") -@@ -779,7 +1170,7 @@ - [(set (match_operand:SF 0 "s_register_operand" "=t") - (neg:SF (mult:SF (match_operand:SF 1 "s_register_operand" "t") - (match_operand:SF 2 "s_register_operand" "t"))))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "vnmul%?.f32\\t%0, %1, %2" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") -@@ -813,12 +1204,24 @@ - ;; Multiply-accumulate insns - - ;; 0 = 1 * 2 + 0 -+(define_insn "*mulsf3addhf_vfp" -+ [(set (match_operand:HF 0 "s_register_operand" "=t") -+ (plus:HF -+ (mult:HF (match_operand:HF 2 "s_register_operand" "t") -+ (match_operand:HF 3 "s_register_operand" "t")) -+ (match_operand:HF 1 "s_register_operand" "0")))] -+ "TARGET_VFP_FP16INST" -+ "vmla.f16\\t%0, %2, %3" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "fmacs")] -+) -+ - (define_insn "*mulsf3addsf_vfp" - [(set (match_operand:SF 0 "s_register_operand" "=t") - (plus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t") - (match_operand:SF 3 "s_register_operand" "t")) - (match_operand:SF 1 "s_register_operand" "0")))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "vmla%?.f32\\t%0, %2, %3" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") -@@ -838,12 +1241,23 @@ - ) - - ;; 0 = 1 * 2 - 0 -+(define_insn "*mulhf3subhf_vfp" -+ [(set (match_operand:HF 0 "s_register_operand" "=t") -+ (minus:HF (mult:HF (match_operand:HF 2 "s_register_operand" "t") -+ (match_operand:HF 3 "s_register_operand" "t")) -+ (match_operand:HF 1 "s_register_operand" "0")))] -+ "TARGET_VFP_FP16INST" -+ "vnmls.f16\\t%0, %2, %3" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "fmacs")] -+) -+ - (define_insn "*mulsf3subsf_vfp" - [(set (match_operand:SF 0 "s_register_operand" "=t") - (minus:SF (mult:SF (match_operand:SF 2 "s_register_operand" "t") - (match_operand:SF 3 "s_register_operand" "t")) - (match_operand:SF 1 "s_register_operand" "0")))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "vnmls%?.f32\\t%0, %2, %3" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") -@@ -863,12 +1277,23 @@ - ) - - ;; 0 = -(1 * 2) + 0 -+(define_insn "*mulhf3neghfaddhf_vfp" -+ [(set (match_operand:HF 0 "s_register_operand" "=t") -+ (minus:HF (match_operand:HF 1 "s_register_operand" "0") -+ (mult:HF (match_operand:HF 2 "s_register_operand" "t") -+ (match_operand:HF 3 "s_register_operand" "t"))))] -+ "TARGET_VFP_FP16INST" -+ "vmls.f16\\t%0, %2, %3" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "fmacs")] -+) -+ - (define_insn "*mulsf3negsfaddsf_vfp" - [(set (match_operand:SF 0 "s_register_operand" "=t") - (minus:SF (match_operand:SF 1 "s_register_operand" "0") - (mult:SF (match_operand:SF 2 "s_register_operand" "t") - (match_operand:SF 3 "s_register_operand" "t"))))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "vmls%?.f32\\t%0, %2, %3" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") -@@ -889,13 +1314,25 @@ - - - ;; 0 = -(1 * 2) - 0 -+(define_insn "*mulhf3neghfsubhf_vfp" -+ [(set (match_operand:HF 0 "s_register_operand" "=t") -+ (minus:HF (mult:HF -+ (neg:HF (match_operand:HF 2 "s_register_operand" "t")) -+ (match_operand:HF 3 "s_register_operand" "t")) -+ (match_operand:HF 1 "s_register_operand" "0")))] -+ "TARGET_VFP_FP16INST" -+ "vnmla.f16\\t%0, %2, %3" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "fmacs")] -+) -+ - (define_insn "*mulsf3negsfsubsf_vfp" - [(set (match_operand:SF 0 "s_register_operand" "=t") - (minus:SF (mult:SF - (neg:SF (match_operand:SF 2 "s_register_operand" "t")) - (match_operand:SF 3 "s_register_operand" "t")) - (match_operand:SF 1 "s_register_operand" "0")))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "vnmla%?.f32\\t%0, %2, %3" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") -@@ -917,6 +1354,30 @@ - - ;; Fused-multiply-accumulate - -+(define_insn "fmahf4" -+ [(set (match_operand:HF 0 "register_operand" "=w") -+ (fma:HF -+ (match_operand:HF 1 "register_operand" "w") -+ (match_operand:HF 2 "register_operand" "w") -+ (match_operand:HF 3 "register_operand" "0")))] -+ "TARGET_VFP_FP16INST" -+ "vfma.f16\\t%0, %1, %2" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "ffmas")] -+) -+ -+(define_expand "neon_vfmahf" -+ [(match_operand:HF 0 "s_register_operand") -+ (match_operand:HF 1 "s_register_operand") -+ (match_operand:HF 2 "s_register_operand") -+ (match_operand:HF 3 "s_register_operand")] -+ "TARGET_VFP_FP16INST" -+{ -+ emit_insn (gen_fmahf4 (operands[0], operands[2], operands[3], -+ operands[1])); -+ DONE; -+}) -+ - (define_insn "fma<SDF:mode>4" - [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>") - (fma:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>") -@@ -929,6 +1390,30 @@ - (set_attr "type" "ffma<vfp_type>")] - ) - -+(define_insn "fmsubhf4_fp16" -+ [(set (match_operand:HF 0 "register_operand" "=w") -+ (fma:HF -+ (neg:HF (match_operand:HF 1 "register_operand" "w")) -+ (match_operand:HF 2 "register_operand" "w") -+ (match_operand:HF 3 "register_operand" "0")))] -+ "TARGET_VFP_FP16INST" -+ "vfms.f16\\t%0, %1, %2" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "ffmas")] -+) -+ -+(define_expand "neon_vfmshf" -+ [(match_operand:HF 0 "s_register_operand") -+ (match_operand:HF 1 "s_register_operand") -+ (match_operand:HF 2 "s_register_operand") -+ (match_operand:HF 3 "s_register_operand")] -+ "TARGET_VFP_FP16INST" -+{ -+ emit_insn (gen_fmsubhf4_fp16 (operands[0], operands[2], operands[3], -+ operands[1])); -+ DONE; -+}) -+ - (define_insn "*fmsub<SDF:mode>4" - [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>") - (fma:SDF (neg:SDF (match_operand:SDF 1 "register_operand" -@@ -942,6 +1427,17 @@ - (set_attr "type" "ffma<vfp_type>")] - ) - -+(define_insn "*fnmsubhf4" -+ [(set (match_operand:HF 0 "register_operand" "=w") -+ (fma:HF (match_operand:HF 1 "register_operand" "w") -+ (match_operand:HF 2 "register_operand" "w") -+ (neg:HF (match_operand:HF 3 "register_operand" "0"))))] -+ "TARGET_VFP_FP16INST" -+ "vfnms.f16\\t%0, %1, %2" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "ffmas")] -+) -+ - (define_insn "*fnmsub<SDF:mode>4" - [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>") - (fma:SDF (match_operand:SDF 1 "register_operand" "<F_constraint>") -@@ -954,6 +1450,17 @@ - (set_attr "type" "ffma<vfp_type>")] - ) - -+(define_insn "*fnmaddhf4" -+ [(set (match_operand:HF 0 "register_operand" "=w") -+ (fma:HF (neg:HF (match_operand:HF 1 "register_operand" "w")) -+ (match_operand:HF 2 "register_operand" "w") -+ (neg:HF (match_operand:HF 3 "register_operand" "0"))))] -+ "TARGET_VFP_FP16INST" -+ "vfnma.f16\\t%0, %1, %2" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "ffmas")] -+) -+ - (define_insn "*fnmadd<SDF:mode>4" - [(set (match_operand:SDF 0 "register_operand" "=<F_constraint>") - (fma:SDF (neg:SDF (match_operand:SDF 1 "register_operand" -@@ -993,7 +1500,7 @@ - (define_insn "extendhfsf2" - [(set (match_operand:SF 0 "s_register_operand" "=t") - (float_extend:SF (match_operand:HF 1 "s_register_operand" "t")))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16" -+ "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FP16 || TARGET_VFP_FP16INST)" - "vcvtb%?.f32.f16\\t%0, %1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") -@@ -1003,7 +1510,7 @@ - (define_insn "truncsfhf2" - [(set (match_operand:HF 0 "s_register_operand" "=t") - (float_truncate:HF (match_operand:SF 1 "s_register_operand" "t")))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16" -+ "TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_FP16 || TARGET_VFP_FP16INST)" - "vcvtb%?.f16.f32\\t%0, %1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") -@@ -1013,7 +1520,7 @@ - (define_insn "*truncsisf2_vfp" - [(set (match_operand:SI 0 "s_register_operand" "=t") - (fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "vcvt%?.s32.f32\\t%0, %1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") -@@ -1034,7 +1541,7 @@ - (define_insn "fixuns_truncsfsi2" - [(set (match_operand:SI 0 "s_register_operand" "=t") - (unsigned_fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "vcvt%?.u32.f32\\t%0, %1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") -@@ -1055,7 +1562,7 @@ - (define_insn "*floatsisf2_vfp" - [(set (match_operand:SF 0 "s_register_operand" "=t") - (float:SF (match_operand:SI 1 "s_register_operand" "t")))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "vcvt%?.f32.s32\\t%0, %1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") -@@ -1076,7 +1583,7 @@ - (define_insn "floatunssisf2" - [(set (match_operand:SF 0 "s_register_operand" "=t") - (unsigned_float:SF (match_operand:SI 1 "s_register_operand" "t")))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "vcvt%?.f32.u32\\t%0, %1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") -@@ -1096,13 +1603,34 @@ - - ;; Sqrt insns. - -+(define_insn "neon_vsqrthf" -+ [(set (match_operand:HF 0 "s_register_operand" "=w") -+ (sqrt:HF (match_operand:HF 1 "s_register_operand" "w")))] -+ "TARGET_VFP_FP16INST" -+ "vsqrt.f16\t%0, %1" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "fsqrts")] -+) -+ -+(define_insn "neon_vrsqrtshf" -+ [(set -+ (match_operand:HF 0 "s_register_operand" "=w") -+ (unspec:HF [(match_operand:HF 1 "s_register_operand" "w") -+ (match_operand:HF 2 "s_register_operand" "w")] -+ UNSPEC_VRSQRTS))] -+ "TARGET_VFP_FP16INST" -+ "vrsqrts.f16\t%0, %1, %2" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "fsqrts")] -+) -+ - ; VFP9 Erratum 760019: It's potentially unsafe to overwrite the input - ; operands, so mark the output as early clobber for VFPv2 on ARMv5 or - ; earlier. - (define_insn "*sqrtsf2_vfp" - [(set (match_operand:SF 0 "s_register_operand" "=&t,t") - (sqrt:SF (match_operand:SF 1 "s_register_operand" "t,t")))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "vsqrt%?.f32\\t%0, %1" - [(set_attr "predicable" "yes") - (set_attr "predicable_short_it" "no") -@@ -1127,7 +1655,7 @@ - (define_insn "*movcc_vfp" - [(set (reg CC_REGNUM) - (reg VFPCC_REGNUM))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "vmrs%?\\tAPSR_nzcv, FPSCR" - [(set_attr "conds" "set") - (set_attr "type" "f_flag")] -@@ -1137,9 +1665,9 @@ - [(set (reg:CCFP CC_REGNUM) - (compare:CCFP (match_operand:SF 0 "s_register_operand" "t") - (match_operand:SF 1 "vfp_compare_operand" "tG")))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "#" -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - [(set (reg:CCFP VFPCC_REGNUM) - (compare:CCFP (match_dup 0) - (match_dup 1))) -@@ -1152,9 +1680,9 @@ - [(set (reg:CCFPE CC_REGNUM) - (compare:CCFPE (match_operand:SF 0 "s_register_operand" "t") - (match_operand:SF 1 "vfp_compare_operand" "tG")))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "#" -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - [(set (reg:CCFPE VFPCC_REGNUM) - (compare:CCFPE (match_dup 0) - (match_dup 1))) -@@ -1203,7 +1731,7 @@ - [(set (reg:CCFP VFPCC_REGNUM) - (compare:CCFP (match_operand:SF 0 "s_register_operand" "t,t") - (match_operand:SF 1 "vfp_compare_operand" "t,G")))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "@ - vcmp%?.f32\\t%0, %1 - vcmp%?.f32\\t%0, #0" -@@ -1216,7 +1744,7 @@ - [(set (reg:CCFPE VFPCC_REGNUM) - (compare:CCFPE (match_operand:SF 0 "s_register_operand" "t,t") - (match_operand:SF 1 "vfp_compare_operand" "t,G")))] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "@ - vcmpe%?.f32\\t%0, %1 - vcmpe%?.f32\\t%0, #0" -@@ -1252,9 +1780,6 @@ - ) - - ;; Fixed point to floating point conversions. --(define_code_iterator FCVT [unsigned_float float]) --(define_code_attr FCVTI32typename [(unsigned_float "u32") (float "s32")]) -- - (define_insn "*combine_vcvt_f32_<FCVTI32typename>" - [(set (match_operand:SF 0 "s_register_operand" "=t") - (mult:SF (FCVT:SF (match_operand:SI 1 "s_register_operand" "0")) -@@ -1299,13 +1824,132 @@ - (set_attr "type" "f_cvtf2i")] - ) - -+;; FP16 conversions. -+(define_insn "neon_vcvth<sup>hf" -+ [(set (match_operand:HF 0 "s_register_operand" "=w") -+ (unspec:HF -+ [(match_operand:SI 1 "s_register_operand" "w")] -+ VCVTH_US))] -+ "TARGET_VFP_FP16INST" -+ "vcvt.f16.<sup>%#32\t%0, %1" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "f_cvti2f")] -+) -+ -+(define_insn "neon_vcvth<sup>si" -+ [(set (match_operand:SI 0 "s_register_operand" "=w") -+ (unspec:SI -+ [(match_operand:HF 1 "s_register_operand" "w")] -+ VCVTH_US))] -+ "TARGET_VFP_FP16INST" -+ "vcvt.<sup>%#32.f16\t%0, %1" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "f_cvtf2i")] -+) -+ -+;; The neon_vcvth<sup>_nhf patterns are used to generate the instruction for the -+;; vcvth_n_f16_<sup>32 arm_fp16 intrinsics. They are complicated by the -+;; hardware requirement that the source and destination registers are the same -+;; despite having different machine modes. The approach is to use a temporary -+;; register for the conversion and move that to the correct destination. -+ -+;; Generate an unspec pattern for the intrinsic. -+(define_insn "neon_vcvth<sup>_nhf_unspec" -+ [(set -+ (match_operand:SI 0 "s_register_operand" "=w") -+ (unspec:SI -+ [(match_operand:SI 1 "s_register_operand" "0") -+ (match_operand:SI 2 "immediate_operand" "i")] -+ VCVT_HF_US_N)) -+ (set -+ (match_operand:HF 3 "s_register_operand" "=w") -+ (float_truncate:HF (float:SF (match_dup 0))))] -+ "TARGET_VFP_FP16INST" -+{ -+ neon_const_bounds (operands[2], 1, 33); -+ return "vcvt.f16.<sup>32\t%0, %0, %2\;vmov.f32\t%3, %0"; -+} -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "f_cvti2f")] -+) -+ -+;; Generate the instruction patterns needed for vcvth_n_f16_s32 neon intrinsics. -+(define_expand "neon_vcvth<sup>_nhf" -+ [(match_operand:HF 0 "s_register_operand") -+ (unspec:HF [(match_operand:SI 1 "s_register_operand") -+ (match_operand:SI 2 "immediate_operand")] -+ VCVT_HF_US_N)] -+"TARGET_VFP_FP16INST" -+{ -+ rtx op1 = gen_reg_rtx (SImode); -+ -+ neon_const_bounds (operands[2], 1, 33); -+ -+ emit_move_insn (op1, operands[1]); -+ emit_insn (gen_neon_vcvth<sup>_nhf_unspec (op1, op1, operands[2], -+ operands[0])); -+ DONE; -+}) -+ -+;; The neon_vcvth<sup>_nsi patterns are used to generate the instruction for the -+;; vcvth_n_<sup>32_f16 arm_fp16 intrinsics. They have the same restrictions and -+;; are implemented in the same way as the neon_vcvth<sup>_nhf patterns. -+ -+;; Generate an unspec pattern, constraining the registers. -+(define_insn "neon_vcvth<sup>_nsi_unspec" -+ [(set (match_operand:SI 0 "s_register_operand" "=w") -+ (unspec:SI -+ [(fix:SI -+ (fix:SF -+ (float_extend:SF -+ (match_operand:HF 1 "s_register_operand" "w")))) -+ (match_operand:SI 2 "immediate_operand" "i")] -+ VCVT_SI_US_N))] -+ "TARGET_VFP_FP16INST" -+{ -+ neon_const_bounds (operands[2], 1, 33); -+ return "vmov.f32\t%0, %1\;vcvt.<sup>%#32.f16\t%0, %0, %2"; -+} -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "f_cvtf2i")] -+) -+ -+;; Generate the instruction patterns needed for vcvth_n_f16_s32 neon intrinsics. -+(define_expand "neon_vcvth<sup>_nsi" -+ [(match_operand:SI 0 "s_register_operand") -+ (unspec:SI -+ [(match_operand:HF 1 "s_register_operand") -+ (match_operand:SI 2 "immediate_operand")] -+ VCVT_SI_US_N)] -+ "TARGET_VFP_FP16INST" -+{ -+ rtx op1 = gen_reg_rtx (SImode); -+ -+ neon_const_bounds (operands[2], 1, 33); -+ emit_insn (gen_neon_vcvth<sup>_nsi_unspec (op1, operands[1], operands[2])); -+ emit_move_insn (operands[0], op1); -+ DONE; -+}) -+ -+(define_insn "neon_vcvt<vcvth_op>h<sup>si" -+ [(set -+ (match_operand:SI 0 "s_register_operand" "=w") -+ (unspec:SI -+ [(match_operand:HF 1 "s_register_operand" "w")] -+ VCVT_HF_US))] -+ "TARGET_VFP_FP16INST" -+ "vcvt<vcvth_op>.<sup>%#32.f16\t%0, %1" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "f_cvtf2i")] -+) -+ - ;; Store multiple insn used in function prologue. - (define_insn "*push_multi_vfp" - [(match_parallel 2 "multi_register_push" - [(set (match_operand:BLK 0 "memory_operand" "=m") - (unspec:BLK [(match_operand:DF 1 "vfp_register_operand" "")] - UNSPEC_PUSH_MULT))])] -- "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP" -+ "TARGET_32BIT && TARGET_HARD_FLOAT" - "* return vfp_output_vstmd (operands);" - [(set_attr "type" "f_stored")] - ) -@@ -1368,6 +2012,20 @@ - ) - - ;; Scalar forms for the IEEE-754 fmax()/fmin() functions -+ -+(define_insn "neon_<fmaxmin_op>hf" -+ [(set -+ (match_operand:HF 0 "s_register_operand" "=w") -+ (unspec:HF -+ [(match_operand:HF 1 "s_register_operand" "w") -+ (match_operand:HF 2 "s_register_operand" "w")] -+ VMAXMINFNM))] -+ "TARGET_VFP_FP16INST" -+ "<fmaxmin_op>.f16\t%0, %1, %2" -+ [(set_attr "conds" "unconditional") -+ (set_attr "type" "f_minmaxs")] -+) -+ - (define_insn "<fmaxmin><mode>3" - [(set (match_operand:SDF 0 "s_register_operand" "=<F_constraint>") - (unspec:SDF [(match_operand:SDF 1 "s_register_operand" "<F_constraint>") -@@ -1382,7 +2040,7 @@ - ;; Write Floating-point Status and Control Register. - (define_insn "set_fpscr" - [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] VUNSPEC_SET_FPSCR)] -- "TARGET_VFP && TARGET_HARD_FLOAT" -+ "TARGET_HARD_FLOAT" - "mcr\\tp10, 7, %0, cr1, cr0, 0\\t @SET_FPSCR" - [(set_attr "type" "mrs")]) - -@@ -1390,7 +2048,7 @@ - (define_insn "get_fpscr" - [(set (match_operand:SI 0 "register_operand" "=r") - (unspec_volatile:SI [(const_int 0)] VUNSPEC_GET_FPSCR))] -- "TARGET_VFP && TARGET_HARD_FLOAT" -+ "TARGET_HARD_FLOAT" - "mrc\\tp10, 7, %0, cr1, cr0, 0\\t @GET_FPSCR" - [(set_attr "type" "mrs")]) - ---- a/src/gcc/config/arm/xgene1.md -+++ b/src/gcc/config/arm/xgene1.md -@@ -164,7 +164,7 @@ - - (define_insn_reservation "xgene1_bfm" 2 - (and (eq_attr "tune" "xgene1") -- (eq_attr "type" "bfm")) -+ (eq_attr "type" "bfm,bfx")) - "xgene1_decode1op,xgene1_fsu") - - (define_insn_reservation "xgene1_f_rint" 5 ---- a/src/gcc/config/i386/i386.c -+++ b/src/gcc/config/i386/i386.c -@@ -23,6 +23,7 @@ along with GCC; see the file COPYING3. If not see - #include "backend.h" - #include "rtl.h" - #include "tree.h" -+#include "memmodel.h" - #include "gimple.h" - #include "cfghooks.h" - #include "cfgloop.h" ---- a/src/gcc/config/ia64/ia64.c -+++ b/src/gcc/config/ia64/ia64.c -@@ -26,6 +26,7 @@ along with GCC; see the file COPYING3. If not see - #include "target.h" - #include "rtl.h" - #include "tree.h" -+#include "memmodel.h" - #include "cfghooks.h" - #include "df.h" - #include "tm_p.h" ---- a/src/gcc/config/linux.c -+++ b/src/gcc/config/linux.c -@@ -26,7 +26,7 @@ along with GCC; see the file COPYING3. If not see - bool - linux_libc_has_function (enum function_class fn_class) - { -- if (OPTION_GLIBC) -+ if (OPTION_GLIBC || OPTION_MUSL) - return true; - if (OPTION_BIONIC) - if (fn_class == function_c94 ---- a/src/gcc/config/mips/mips.c -+++ b/src/gcc/config/mips/mips.c -@@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see - #include "target.h" - #include "rtl.h" - #include "tree.h" -+#include "memmodel.h" - #include "gimple.h" - #include "cfghooks.h" - #include "df.h" ---- a/src/gcc/config/rs6000/rs6000.c -+++ b/src/gcc/config/rs6000/rs6000.c -@@ -24,6 +24,7 @@ - #include "backend.h" - #include "rtl.h" - #include "tree.h" -+#include "memmodel.h" - #include "gimple.h" - #include "cfghooks.h" - #include "cfgloop.h" ---- a/src/gcc/config/sparc/sparc.c -+++ b/src/gcc/config/sparc/sparc.c -@@ -27,6 +27,7 @@ along with GCC; see the file COPYING3. If not see - #include "target.h" - #include "rtl.h" - #include "tree.h" -+#include "memmodel.h" - #include "gimple.h" - #include "df.h" - #include "tm_p.h" ---- a/src/gcc/configure -+++ b/src/gcc/configure -@@ -1711,7 +1711,8 @@ Optional Packages: - --with-stabs arrange to use stabs instead of host debug format - --with-dwarf2 force the default debug format to be DWARF 2 - --with-specs=SPECS add SPECS to driver command-line processing -- --with-pkgversion=PKG Use PKG in the version string in place of "GCC" -+ --with-pkgversion=PKG Use PKG in the version string in place of "Linaro -+ GCC `cat $srcdir/LINARO-VERSION`" - --with-bugurl=URL Direct users to URL to report a bug - --with-multilib-list select multilibs (AArch64, SH and x86-64 only) - --with-gnu-ld assume the C compiler uses GNU ld default=no -@@ -7658,7 +7659,7 @@ if test "${with_pkgversion+set}" = set; then : - *) PKGVERSION="($withval) " ;; - esac - else -- PKGVERSION="(GCC) " -+ PKGVERSION="(Linaro GCC `cat $srcdir/LINARO-VERSION`) " - - fi - -@@ -18460,7 +18461,7 @@ else - lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 - lt_status=$lt_dlunknown - cat > conftest.$ac_ext <<_LT_EOF --#line 18463 "configure" -+#line 18464 "configure" - #include "confdefs.h" - - #if HAVE_DLFCN_H -@@ -18566,7 +18567,7 @@ else - lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 - lt_status=$lt_dlunknown - cat > conftest.$ac_ext <<_LT_EOF --#line 18569 "configure" -+#line 18570 "configure" - #include "confdefs.h" - - #if HAVE_DLFCN_H ---- a/src/gcc/configure.ac -+++ b/src/gcc/configure.ac -@@ -910,7 +910,7 @@ AC_ARG_WITH(specs, - ) - AC_SUBST(CONFIGURE_SPECS) - --ACX_PKGVERSION([GCC]) -+ACX_PKGVERSION([Linaro GCC `cat $srcdir/LINARO-VERSION`]) - ACX_BUGURL([http://gcc.gnu.org/bugs.html]) - - # Sanity check enable_languages in case someone does not run the toplevel ---- a/src/gcc/cppbuiltin.c -+++ b/src/gcc/cppbuiltin.c -@@ -52,18 +52,41 @@ parse_basever (int *major, int *minor, int *patchlevel) - *patchlevel = s_patchlevel; - } - -+/* Parse a LINAROVER version string of the format "M.m-year.month[-spin][~dev]" -+ to create Linaro release number YYYYMM and spin version. */ -+static void -+parse_linarover (int *release, int *spin) -+{ -+ static int s_year = -1, s_month, s_spin; -+ -+ if (s_year == -1) -+ if (sscanf (LINAROVER, "%*[^-]-%d.%d-%d", &s_year, &s_month, &s_spin) != 3) -+ { -+ sscanf (LINAROVER, "%*[^-]-%d.%d", &s_year, &s_month); -+ s_spin = 0; -+ } -+ -+ if (release) -+ *release = s_year * 100 + s_month; -+ -+ if (spin) -+ *spin = s_spin; -+} - - /* Define __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__ and __VERSION__. */ - static void - define__GNUC__ (cpp_reader *pfile) - { -- int major, minor, patchlevel; -+ int major, minor, patchlevel, linaro_release, linaro_spin; - - parse_basever (&major, &minor, &patchlevel); -+ parse_linarover (&linaro_release, &linaro_spin); - cpp_define_formatted (pfile, "__GNUC__=%d", major); - cpp_define_formatted (pfile, "__GNUC_MINOR__=%d", minor); - cpp_define_formatted (pfile, "__GNUC_PATCHLEVEL__=%d", patchlevel); - cpp_define_formatted (pfile, "__VERSION__=\"%s\"", version_string); -+ cpp_define_formatted (pfile, "__LINARO_RELEASE__=%d", linaro_release); -+ cpp_define_formatted (pfile, "__LINARO_SPIN__=%d", linaro_spin); - cpp_define_formatted (pfile, "__ATOMIC_RELAXED=%d", MEMMODEL_RELAXED); - cpp_define_formatted (pfile, "__ATOMIC_SEQ_CST=%d", MEMMODEL_SEQ_CST); - cpp_define_formatted (pfile, "__ATOMIC_ACQUIRE=%d", MEMMODEL_ACQUIRE); ---- a/src/gcc/defaults.h -+++ b/src/gcc/defaults.h -@@ -971,11 +971,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - #define REG_WORDS_BIG_ENDIAN WORDS_BIG_ENDIAN - #endif - --#ifdef TARGET_FLT_EVAL_METHOD --#define TARGET_FLT_EVAL_METHOD_NON_DEFAULT 1 --#else -+#ifndef TARGET_FLT_EVAL_METHOD - #define TARGET_FLT_EVAL_METHOD 0 --#define TARGET_FLT_EVAL_METHOD_NON_DEFAULT 0 - #endif - - #ifndef TARGET_DEC_EVAL_METHOD ---- a/src/gcc/expmed.c -+++ b/src/gcc/expmed.c -@@ -2522,16 +2522,8 @@ expand_variable_shift (enum tree_code code, machine_mode mode, rtx shifted, - } - - --/* Indicates the type of fixup needed after a constant multiplication. -- BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that -- the result should be negated, and ADD_VARIANT means that the -- multiplicand should be added to the result. */ --enum mult_variant {basic_variant, negate_variant, add_variant}; -- - static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, - const struct mult_cost *, machine_mode mode); --static bool choose_mult_variant (machine_mode, HOST_WIDE_INT, -- struct algorithm *, enum mult_variant *, int); - static rtx expand_mult_const (machine_mode, rtx, HOST_WIDE_INT, rtx, - const struct algorithm *, enum mult_variant); - static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); -@@ -3021,7 +3013,7 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, - Return true if the cheapest of these cost less than MULT_COST, - describing the algorithm in *ALG and final fixup in *VARIANT. */ - --static bool -+bool - choose_mult_variant (machine_mode mode, HOST_WIDE_INT val, - struct algorithm *alg, enum mult_variant *variant, - int mult_cost) ---- a/src/gcc/expmed.h -+++ b/src/gcc/expmed.h -@@ -35,6 +35,15 @@ enum alg_code { - alg_impossible - }; - -+/* Indicates the type of fixup needed after a constant multiplication. -+ BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that -+ the result should be negated, and ADD_VARIANT means that the -+ multiplicand should be added to the result. */ -+enum mult_variant {basic_variant, negate_variant, add_variant}; -+ -+bool choose_mult_variant (machine_mode, HOST_WIDE_INT, -+ struct algorithm *, enum mult_variant *, int); -+ - /* This structure holds the "cost" of a multiply sequence. The - "cost" field holds the total rtx_cost of every operator in the - synthetic multiplication sequence, hence cost(a op b) is defined ---- a/src/gcc/fold-const.c -+++ b/src/gcc/fold-const.c -@@ -7230,7 +7230,16 @@ native_encode_real (const_tree expr, unsigned char *ptr, int len, int off) - offset += byte % UNITS_PER_WORD; - } - else -- offset = BYTES_BIG_ENDIAN ? 3 - byte : byte; -+ { -+ offset = byte; -+ if (BYTES_BIG_ENDIAN) -+ { -+ /* Reverse bytes within each long, or within the entire float -+ if it's smaller than a long (for HFmode). */ -+ offset = MIN (3, total_bytes - 1) - offset; -+ gcc_assert (offset >= 0); -+ } -+ } - offset = offset + ((bitpos / BITS_PER_UNIT) & ~3); - if (offset >= off - && offset - off < len) ---- a/src/gcc/fortran/options.c -+++ b/src/gcc/fortran/options.c -@@ -208,8 +208,7 @@ gfc_post_options (const char **pfilename) - - /* Excess precision other than "fast" requires front-end - support. */ -- if (flag_excess_precision_cmdline == EXCESS_PRECISION_STANDARD -- && TARGET_FLT_EVAL_METHOD_NON_DEFAULT) -+ if (flag_excess_precision_cmdline == EXCESS_PRECISION_STANDARD) - sorry ("-fexcess-precision=standard for Fortran"); - flag_excess_precision_cmdline = EXCESS_PRECISION_FAST; - ---- a/src/gcc/genconditions.c -+++ b/src/gcc/genconditions.c -@@ -94,6 +94,7 @@ write_header (void) - #include \"resource.h\"\n\ - #include \"diagnostic-core.h\"\n\ - #include \"reload.h\"\n\ -+#include \"memmodel.h\"\n\ - #include \"tm-constrs.h\"\n"); - - if (saw_eh_return) ---- a/src/gcc/genemit.c -+++ b/src/gcc/genemit.c -@@ -792,6 +792,7 @@ from the machine description file `md'. */\n\n"); - printf ("#include \"reload.h\"\n"); - printf ("#include \"diagnostic-core.h\"\n"); - printf ("#include \"regs.h\"\n"); -+ printf ("#include \"memmodel.h\"\n"); - printf ("#include \"tm-constrs.h\"\n"); - printf ("#include \"ggc.h\"\n"); - printf ("#include \"dumpfile.h\"\n"); ---- a/src/gcc/genmultilib -+++ b/src/gcc/genmultilib -@@ -186,7 +186,8 @@ fi - EOF - chmod +x tmpmultilib - --combinations=`initial=/ ./tmpmultilib ${options}` -+combination_space=`initial=/ ./tmpmultilib ${options}` -+combinations="$combination_space" - - # If there exceptions, weed them out now - if [ -n "${exceptions}" ]; then -@@ -472,14 +473,19 @@ for rrule in ${multilib_reuse}; do - # in this variable, it means no multilib will be built for current reuse - # rule. Thus the reuse purpose specified by current rule is meaningless. - if expr "${combinations} " : ".*/${combo}/.*" > /dev/null; then -- combo="/${combo}/" -- dirout=`./tmpmultilib3 "${combo}" "${todirnames}" "${toosdirnames}" "${enable_multilib}"` -- copts="/${copts}/" -- optout=`./tmpmultilib4 "${copts}" "${options}"` -- # Output the line with all appropriate matches. -- dirout="${dirout}" optout="${optout}" ./tmpmultilib2 -+ if expr "${combination_space} " : ".*/${copts}/.*" > /dev/null; then -+ combo="/${combo}/" -+ dirout=`./tmpmultilib3 "${combo}" "${todirnames}" "${toosdirnames}" "${enable_multilib}"` -+ copts="/${copts}/" -+ optout=`./tmpmultilib4 "${copts}" "${options}"` -+ # Output the line with all appropriate matches. -+ dirout="${dirout}" optout="${optout}" ./tmpmultilib2 -+ else -+ echo "The rule ${rrule} contains an option absent from MULTILIB_OPTIONS." >&2 -+ exit 1 -+ fi - else -- echo "The rule ${rrule} is trying to reuse nonexistent multilib." -+ echo "The rule ${rrule} is trying to reuse nonexistent multilib." >&2 - exit 1 - fi - done ---- a/src/gcc/genoutput.c -+++ b/src/gcc/genoutput.c -@@ -231,6 +231,7 @@ output_prologue (void) - printf ("#include \"diagnostic-core.h\"\n"); - printf ("#include \"output.h\"\n"); - printf ("#include \"target.h\"\n"); -+ printf ("#include \"memmodel.h\"\n"); - printf ("#include \"tm-constrs.h\"\n"); - } - ---- a/src/gcc/genpeep.c -+++ b/src/gcc/genpeep.c -@@ -373,6 +373,7 @@ from the machine description file `md'. */\n\n"); - printf ("#include \"except.h\"\n"); - printf ("#include \"diagnostic-core.h\"\n"); - printf ("#include \"flags.h\"\n"); -+ printf ("#include \"memmodel.h\"\n"); - printf ("#include \"tm-constrs.h\"\n\n"); - - printf ("extern rtx peep_operand[];\n\n"); ---- a/src/gcc/genpreds.c -+++ b/src/gcc/genpreds.c -@@ -1577,6 +1577,7 @@ write_insn_preds_c (void) - #include \"reload.h\"\n\ - #include \"regs.h\"\n\ - #include \"emit-rtl.h\"\n\ -+#include \"memmodel.h\"\n\ - #include \"tm-constrs.h\"\n"); - - FOR_ALL_PREDICATES (p) ---- a/src/gcc/genrecog.c -+++ b/src/gcc/genrecog.c -@@ -4172,6 +4172,7 @@ write_header (void) - #include \"diagnostic-core.h\"\n\ - #include \"reload.h\"\n\ - #include \"regs.h\"\n\ -+#include \"memmodel.h\"\n\ - #include \"tm-constrs.h\"\n\ - \n"); - ---- a/src/gcc/gimple-fold.c -+++ b/src/gcc/gimple-fold.c -@@ -1379,6 +1379,55 @@ gimple_fold_builtin_strncpy (gimple_stmt_iterator *gsi, - return true; - } - -+/* Simplify strchr (str, 0) into str + strlen (str). -+ In general strlen is significantly faster than strchr -+ due to being a simpler operation. */ -+static bool -+gimple_fold_builtin_strchr (gimple_stmt_iterator *gsi) -+{ -+ gimple *stmt = gsi_stmt (*gsi); -+ tree str = gimple_call_arg (stmt, 0); -+ tree c = gimple_call_arg (stmt, 1); -+ location_t loc = gimple_location (stmt); -+ -+ if (optimize_function_for_size_p (cfun)) -+ return false; -+ -+ if (!integer_zerop (c) || !gimple_call_lhs (stmt)) -+ return false; -+ -+ tree len; -+ tree strlen_fn = builtin_decl_implicit (BUILT_IN_STRLEN); -+ -+ if (!strlen_fn) -+ return false; -+ -+ /* Create newstr = strlen (str). */ -+ gimple_seq stmts = NULL; -+ gimple *new_stmt = gimple_build_call (strlen_fn, 1, str); -+ gimple_set_location (new_stmt, loc); -+ if (gimple_in_ssa_p (cfun)) -+ len = make_ssa_name (size_type_node); -+ else -+ len = create_tmp_reg (size_type_node); -+ gimple_call_set_lhs (new_stmt, len); -+ gimple_seq_add_stmt_without_update (&stmts, new_stmt); -+ -+ /* Create (str p+ strlen (str)). */ -+ new_stmt = gimple_build_assign (gimple_call_lhs (stmt), -+ POINTER_PLUS_EXPR, str, len); -+ gimple_seq_add_stmt_without_update (&stmts, new_stmt); -+ gsi_replace_with_seq_vops (gsi, stmts); -+ /* gsi now points at the assignment to the lhs, get a -+ stmt iterator to the strlen. -+ ??? We can't use gsi_for_stmt as that doesn't work when the -+ CFG isn't built yet. */ -+ gimple_stmt_iterator gsi2 = *gsi; -+ gsi_prev (&gsi2); -+ fold_stmt (&gsi2); -+ return true; -+} -+ - /* Simplify a call to the strcat builtin. DST and SRC are the arguments - to the call. - -@@ -2820,6 +2869,11 @@ gimple_fold_builtin (gimple_stmt_iterator *gsi) - gimple_call_arg (stmt, 1)); - case BUILT_IN_STRNCAT: - return gimple_fold_builtin_strncat (gsi); -+ case BUILT_IN_STRCHR: -+ if (gimple_fold_builtin_strchr (gsi)) -+ return true; -+ /* Perform additional folding in builtin.c. */ -+ break; - case BUILT_IN_FPUTS: - return gimple_fold_builtin_fputs (gsi, gimple_call_arg (stmt, 0), - gimple_call_arg (stmt, 1), false); ---- a/src/gcc/ifcvt.c -+++ b/src/gcc/ifcvt.c -@@ -813,10 +813,15 @@ struct noce_if_info - - /* Estimated cost of the particular branch instruction. */ - unsigned int branch_cost; -+ -+ /* The name of the noce transform that succeeded in if-converting -+ this structure. Used for debugging. */ -+ const char *transform_name; - }; - - static rtx noce_emit_store_flag (struct noce_if_info *, rtx, int, int); - static int noce_try_move (struct noce_if_info *); -+static int noce_try_ifelse_collapse (struct noce_if_info *); - static int noce_try_store_flag (struct noce_if_info *); - static int noce_try_addcc (struct noce_if_info *); - static int noce_try_store_flag_constants (struct noce_if_info *); -@@ -1115,11 +1120,45 @@ noce_try_move (struct noce_if_info *if_info) - emit_insn_before_setloc (seq, if_info->jump, - INSN_LOCATION (if_info->insn_a)); - } -+ if_info->transform_name = "noce_try_move"; - return TRUE; - } - return FALSE; - } - -+/* Try forming an IF_THEN_ELSE (cond, b, a) and collapsing that -+ through simplify_rtx. Sometimes that can eliminate the IF_THEN_ELSE. -+ If that is the case, emit the result into x. */ -+ -+static int -+noce_try_ifelse_collapse (struct noce_if_info * if_info) -+{ -+ if (!noce_simple_bbs (if_info)) -+ return FALSE; -+ -+ machine_mode mode = GET_MODE (if_info->x); -+ rtx if_then_else = simplify_gen_ternary (IF_THEN_ELSE, mode, mode, -+ if_info->cond, if_info->b, -+ if_info->a); -+ -+ if (GET_CODE (if_then_else) == IF_THEN_ELSE) -+ return FALSE; -+ -+ rtx_insn *seq; -+ start_sequence (); -+ noce_emit_move_insn (if_info->x, if_then_else); -+ seq = end_ifcvt_sequence (if_info); -+ if (!seq) -+ return FALSE; -+ -+ emit_insn_before_setloc (seq, if_info->jump, -+ INSN_LOCATION (if_info->insn_a)); -+ -+ if_info->transform_name = "noce_try_ifelse_collapse"; -+ return TRUE; -+} -+ -+ - /* Convert "if (test) x = 1; else x = 0". - - Only try 0 and STORE_FLAG_VALUE here. Other combinations will be -@@ -1163,6 +1202,7 @@ noce_try_store_flag (struct noce_if_info *if_info) - - emit_insn_before_setloc (seq, if_info->jump, - INSN_LOCATION (if_info->insn_a)); -+ if_info->transform_name = "noce_try_store_flag"; - return TRUE; - } - else -@@ -1241,6 +1281,7 @@ noce_try_inverse_constants (struct noce_if_info *if_info) - - emit_insn_before_setloc (seq, if_info->jump, - INSN_LOCATION (if_info->insn_a)); -+ if_info->transform_name = "noce_try_inverse_constants"; - return true; - } - -@@ -1461,6 +1502,8 @@ noce_try_store_flag_constants (struct noce_if_info *if_info) - - emit_insn_before_setloc (seq, if_info->jump, - INSN_LOCATION (if_info->insn_a)); -+ if_info->transform_name = "noce_try_store_flag_constants"; -+ - return TRUE; - } - -@@ -1513,6 +1556,8 @@ noce_try_addcc (struct noce_if_info *if_info) - - emit_insn_before_setloc (seq, if_info->jump, - INSN_LOCATION (if_info->insn_a)); -+ if_info->transform_name = "noce_try_addcc"; -+ - return TRUE; - } - end_sequence (); -@@ -1553,6 +1598,7 @@ noce_try_addcc (struct noce_if_info *if_info) - - emit_insn_before_setloc (seq, if_info->jump, - INSN_LOCATION (if_info->insn_a)); -+ if_info->transform_name = "noce_try_addcc"; - return TRUE; - } - end_sequence (); -@@ -1617,6 +1663,8 @@ noce_try_store_flag_mask (struct noce_if_info *if_info) - - emit_insn_before_setloc (seq, if_info->jump, - INSN_LOCATION (if_info->insn_a)); -+ if_info->transform_name = "noce_try_store_flag_mask"; -+ - return TRUE; - } - -@@ -1767,6 +1815,8 @@ noce_try_cmove (struct noce_if_info *if_info) - - emit_insn_before_setloc (seq, if_info->jump, - INSN_LOCATION (if_info->insn_a)); -+ if_info->transform_name = "noce_try_cmove"; -+ - return TRUE; - } - /* If both a and b are constants try a last-ditch transformation: -@@ -1820,6 +1870,7 @@ noce_try_cmove (struct noce_if_info *if_info) - - emit_insn_before_setloc (seq, if_info->jump, - INSN_LOCATION (if_info->insn_a)); -+ if_info->transform_name = "noce_try_cmove"; - return TRUE; - } - else -@@ -2273,6 +2324,7 @@ noce_try_cmove_arith (struct noce_if_info *if_info) - - emit_insn_before_setloc (ifcvt_seq, if_info->jump, - INSN_LOCATION (if_info->insn_a)); -+ if_info->transform_name = "noce_try_cmove_arith"; - return TRUE; - - end_seq_and_fail: -@@ -2364,28 +2416,32 @@ noce_get_alt_condition (struct noce_if_info *if_info, rtx target, - switch (code) - { - case LT: -- if (actual_val == desired_val + 1) -+ if (desired_val != HOST_WIDE_INT_MAX -+ && actual_val == desired_val + 1) - { - code = LE; - op_b = GEN_INT (desired_val); - } - break; - case LE: -- if (actual_val == desired_val - 1) -+ if (desired_val != HOST_WIDE_INT_MIN -+ && actual_val == desired_val - 1) - { - code = LT; - op_b = GEN_INT (desired_val); - } - break; - case GT: -- if (actual_val == desired_val - 1) -+ if (desired_val != HOST_WIDE_INT_MIN -+ && actual_val == desired_val - 1) - { - code = GE; - op_b = GEN_INT (desired_val); - } - break; - case GE: -- if (actual_val == desired_val + 1) -+ if (desired_val != HOST_WIDE_INT_MAX -+ && actual_val == desired_val + 1) - { - code = GT; - op_b = GEN_INT (desired_val); -@@ -2525,6 +2581,7 @@ noce_try_minmax (struct noce_if_info *if_info) - emit_insn_before_setloc (seq, if_info->jump, INSN_LOCATION (if_info->insn_a)); - if_info->cond = cond; - if_info->cond_earliest = earliest; -+ if_info->transform_name = "noce_try_minmax"; - - return TRUE; - } -@@ -2691,6 +2748,7 @@ noce_try_abs (struct noce_if_info *if_info) - emit_insn_before_setloc (seq, if_info->jump, INSN_LOCATION (if_info->insn_a)); - if_info->cond = cond; - if_info->cond_earliest = earliest; -+ if_info->transform_name = "noce_try_abs"; - - return TRUE; - } -@@ -2772,6 +2830,8 @@ noce_try_sign_mask (struct noce_if_info *if_info) - return FALSE; - - emit_insn_before_setloc (seq, if_info->jump, INSN_LOCATION (if_info->insn_a)); -+ if_info->transform_name = "noce_try_sign_mask"; -+ - return TRUE; - } - -@@ -2877,6 +2937,7 @@ noce_try_bitop (struct noce_if_info *if_info) - emit_insn_before_setloc (seq, if_info->jump, - INSN_LOCATION (if_info->insn_a)); - } -+ if_info->transform_name = "noce_try_bitop"; - return TRUE; - } - -@@ -3167,6 +3228,41 @@ noce_convert_multiple_sets (struct noce_if_info *if_info) - if (if_info->then_else_reversed) - std::swap (old_val, new_val); - -+ -+ /* We allow simple lowpart register subreg SET sources in -+ bb_ok_for_noce_convert_multiple_sets. Be careful when processing -+ sequences like: -+ (set (reg:SI r1) (reg:SI r2)) -+ (set (reg:HI r3) (subreg:HI (r1))) -+ For the second insn new_val or old_val (r1 in this example) will be -+ taken from the temporaries and have the wider mode which will not -+ match with the mode of the other source of the conditional move, so -+ we'll end up trying to emit r4:HI = cond ? (r1:SI) : (r3:HI). -+ Wrap the two cmove operands into subregs if appropriate to prevent -+ that. */ -+ if (GET_MODE (new_val) != GET_MODE (temp)) -+ { -+ machine_mode src_mode = GET_MODE (new_val); -+ machine_mode dst_mode = GET_MODE (temp); -+ if (GET_MODE_SIZE (src_mode) <= GET_MODE_SIZE (dst_mode)) -+ { -+ end_sequence (); -+ return FALSE; -+ } -+ new_val = lowpart_subreg (dst_mode, new_val, src_mode); -+ } -+ if (GET_MODE (old_val) != GET_MODE (temp)) -+ { -+ machine_mode src_mode = GET_MODE (old_val); -+ machine_mode dst_mode = GET_MODE (temp); -+ if (GET_MODE_SIZE (src_mode) <= GET_MODE_SIZE (dst_mode)) -+ { -+ end_sequence (); -+ return FALSE; -+ } -+ old_val = lowpart_subreg (dst_mode, old_val, src_mode); -+ } -+ - /* Actually emit the conditional move. */ - rtx temp_dest = noce_emit_cmove (if_info, temp, cond_code, - x, y, new_val, old_val); -@@ -3240,6 +3336,7 @@ noce_convert_multiple_sets (struct noce_if_info *if_info) - } - - num_updated_if_blocks++; -+ if_info->transform_name = "noce_convert_multiple_sets"; - return TRUE; - } - -@@ -3277,9 +3374,15 @@ bb_ok_for_noce_convert_multiple_sets (basic_block test_bb, - rtx src = SET_SRC (set); - - /* We can possibly relax this, but for now only handle REG to REG -- moves. This avoids any issues that might come from introducing -- loads/stores that might violate data-race-freedom guarantees. */ -- if (!(REG_P (src) && REG_P (dest))) -+ (including subreg) moves. This avoids any issues that might come -+ from introducing loads/stores that might violate data-race-freedom -+ guarantees. */ -+ if (!REG_P (dest)) -+ return false; -+ -+ if (!(REG_P (src) -+ || (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src)) -+ && subreg_lowpart_p (src)))) - return false; - - /* Destination must be appropriate for a conditional write. */ -@@ -3336,7 +3439,12 @@ noce_process_if_block (struct noce_if_info *if_info) - && bb_ok_for_noce_convert_multiple_sets (then_bb, if_info)) - { - if (noce_convert_multiple_sets (if_info)) -- return TRUE; -+ { -+ if (dump_file && if_info->transform_name) -+ fprintf (dump_file, "if-conversion succeeded through %s\n", -+ if_info->transform_name); -+ return TRUE; -+ } - } - - if (! bb_valid_for_noce_process_p (then_bb, cond, &if_info->then_cost, -@@ -3493,6 +3601,8 @@ noce_process_if_block (struct noce_if_info *if_info) - - if (noce_try_move (if_info)) - goto success; -+ if (noce_try_ifelse_collapse (if_info)) -+ goto success; - if (noce_try_store_flag (if_info)) - goto success; - if (noce_try_bitop (if_info)) -@@ -3533,6 +3643,9 @@ noce_process_if_block (struct noce_if_info *if_info) - return FALSE; - - success: -+ if (dump_file && if_info->transform_name) -+ fprintf (dump_file, "if-conversion succeeded through %s\n", -+ if_info->transform_name); - - /* If we used a temporary, fix it up now. */ - if (orig_x != x) ---- a/src/gcc/internal-fn.c -+++ b/src/gcc/internal-fn.c -@@ -1812,11 +1812,7 @@ expand_arith_overflow (enum tree_code code, gimple *stmt) - /* For sub-word operations, retry with a wider type first. */ - if (orig_precres == precres && precop <= BITS_PER_WORD) - { --#if WORD_REGISTER_OPERATIONS -- int p = BITS_PER_WORD; --#else -- int p = precop; --#endif -+ int p = WORD_REGISTER_OPERATIONS ? BITS_PER_WORD : precop; - enum machine_mode m = smallest_mode_for_size (p, MODE_INT); - tree optype = build_nonstandard_integer_type (GET_MODE_PRECISION (m), - uns0_p && uns1_p ---- a/src/gcc/java/lang.c -+++ b/src/gcc/java/lang.c -@@ -569,8 +569,7 @@ java_post_options (const char **pfilename) - - /* Excess precision other than "fast" requires front-end - support. */ -- if (flag_excess_precision_cmdline == EXCESS_PRECISION_STANDARD -- && TARGET_FLT_EVAL_METHOD_NON_DEFAULT) -+ if (flag_excess_precision_cmdline == EXCESS_PRECISION_STANDARD) - sorry ("-fexcess-precision=standard for Java"); - flag_excess_precision_cmdline = EXCESS_PRECISION_FAST; - ---- a/src/gcc/lra-constraints.c -+++ b/src/gcc/lra-constraints.c -@@ -1326,7 +1326,22 @@ process_addr_reg (rtx *loc, bool check_only_p, rtx_insn **before, rtx_insn **aft - - subreg_p = GET_CODE (*loc) == SUBREG; - if (subreg_p) -- loc = &SUBREG_REG (*loc); -+ { -+ reg = SUBREG_REG (*loc); -+ mode = GET_MODE (reg); -+ -+ /* For mode with size bigger than ptr_mode, there unlikely to be "mov" -+ between two registers with different classes, but there normally will -+ be "mov" which transfers element of vector register into the general -+ register, and this normally will be a subreg which should be reloaded -+ as a whole. This is particularly likely to be triggered when -+ -fno-split-wide-types specified. */ -+ if (!REG_P (reg) -+ || in_class_p (reg, cl, &new_class) -+ || GET_MODE_SIZE (mode) <= GET_MODE_SIZE (ptr_mode)) -+ loc = &SUBREG_REG (*loc); -+ } -+ - reg = *loc; - mode = GET_MODE (reg); - if (! REG_P (reg)) -@@ -2475,14 +2490,29 @@ process_alt_operands (int only_alternative) - /* We are trying to spill pseudo into memory. It is - usually more costly than moving to a hard register - although it might takes the same number of -- reloads. */ -- if (no_regs_p && REG_P (op) && hard_regno[nop] >= 0) -+ reloads. -+ -+ Non-pseudo spill may happen also. Suppose a target allows both -+ register and memory in the operand constraint alternatives, -+ then it's typical that an eliminable register has a substition -+ of "base + offset" which can either be reloaded by a simple -+ "new_reg <= base + offset" which will match the register -+ constraint, or a similar reg addition followed by further spill -+ to and reload from memory which will match the memory -+ constraint, but this memory spill will be much more costly -+ usually. -+ -+ Code below increases the reject for both pseudo and non-pseudo -+ spill. */ -+ if (no_regs_p -+ && !(MEM_P (op) && offmemok) -+ && !(REG_P (op) && hard_regno[nop] < 0)) - { - if (lra_dump_file != NULL) - fprintf - (lra_dump_file, -- " %d Spill pseudo into memory: reject+=3\n", -- nop); -+ " %d Spill %spseudo into memory: reject+=3\n", -+ nop, REG_P (op) ? "" : "Non-"); - reject += 3; - if (VECTOR_MODE_P (mode)) - { ---- a/src/gcc/lto/lto-partition.c -+++ b/src/gcc/lto/lto-partition.c -@@ -447,7 +447,7 @@ add_sorted_nodes (vec<symtab_node *> &next_nodes, ltrans_partition partition) - and in-partition calls was reached. */ - - void --lto_balanced_map (int n_lto_partitions) -+lto_balanced_map (int n_lto_partitions, int max_partition_size) - { - int n_nodes = 0; - int n_varpool_nodes = 0, varpool_pos = 0, best_varpool_pos = 0; -@@ -511,6 +511,9 @@ lto_balanced_map (int n_lto_partitions) - varpool_order.qsort (varpool_node_cmp); - - /* Compute partition size and create the first partition. */ -+ if (PARAM_VALUE (MIN_PARTITION_SIZE) > max_partition_size) -+ fatal_error (input_location, "min partition size cannot be greater than max partition size"); -+ - partition_size = total_size / n_lto_partitions; - if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE)) - partition_size = PARAM_VALUE (MIN_PARTITION_SIZE); -@@ -719,7 +722,8 @@ lto_balanced_map (int n_lto_partitions) - best_cost, best_internal, best_i); - /* Partition is too large, unwind into step when best cost was reached and - start new partition. */ -- if (partition->insns > 2 * partition_size) -+ if (partition->insns > 2 * partition_size -+ || partition->insns > max_partition_size) - { - if (best_i != i) - { ---- a/src/gcc/lto/lto-partition.h -+++ b/src/gcc/lto/lto-partition.h -@@ -35,7 +35,7 @@ extern vec<ltrans_partition> ltrans_partitions; - - void lto_1_to_1_map (void); - void lto_max_map (void); --void lto_balanced_map (int); -+void lto_balanced_map (int, int); - void lto_promote_cross_file_statics (void); - void free_ltrans_partitions (void); - void lto_promote_statics_nonwpa (void); ---- a/src/gcc/lto/lto.c -+++ b/src/gcc/lto/lto.c -@@ -3123,9 +3123,10 @@ do_whole_program_analysis (void) - else if (flag_lto_partition == LTO_PARTITION_MAX) - lto_max_map (); - else if (flag_lto_partition == LTO_PARTITION_ONE) -- lto_balanced_map (1); -+ lto_balanced_map (1, INT_MAX); - else if (flag_lto_partition == LTO_PARTITION_BALANCED) -- lto_balanced_map (PARAM_VALUE (PARAM_LTO_PARTITIONS)); -+ lto_balanced_map (PARAM_VALUE (PARAM_LTO_PARTITIONS), -+ PARAM_VALUE (MAX_PARTITION_SIZE)); - else - gcc_unreachable (); - ---- a/src/gcc/match.pd -+++ b/src/gcc/match.pd -@@ -468,6 +468,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) - (bit_and:c (convert? @0) (convert? (bit_not @0))) - { build_zero_cst (type); }) - -+/* PR71636: Transform x & ((1U << b) - 1) -> x & ~(~0U << b); */ -+(simplify -+ (bit_and:c @0 (plus:s (lshift:s integer_onep @1) integer_minus_onep)) -+ (if (TYPE_UNSIGNED (type)) -+ (bit_and @0 (bit_not (lshift { build_all_ones_cst (type); } @1))))) -+ - /* Fold (A & ~B) - (A & B) into (A ^ B) - B. */ - (simplify - (minus (bit_and:cs @0 (bit_not @1)) (bit_and:cs @0 @1)) ---- /dev/null -+++ b/src/gcc/memmodel.h -@@ -0,0 +1,86 @@ -+/* Prototypes of memory model helper functions. -+ Copyright (C) 2015-2016 Free Software Foundation, Inc. -+ -+This file is part of GCC. -+ -+GCC is free software; you can redistribute it and/or modify it under -+the terms of the GNU General Public License as published by the Free -+Software Foundation; either version 3, or (at your option) any later -+version. -+ -+GCC is distributed in the hope that it will be useful, but WITHOUT ANY -+WARRANTY; without even the implied warranty of MERCHANTABILITY or -+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+for more details. -+ -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+<http://www.gnu.org/licenses/>. */ -+ -+#ifndef GCC_MEMMODEL_H -+#define GCC_MEMMODEL_H -+ -+/* Return the memory model from a host integer. */ -+static inline enum memmodel -+memmodel_from_int (unsigned HOST_WIDE_INT val) -+{ -+ return (enum memmodel) (val & MEMMODEL_MASK); -+} -+ -+/* Return the base memory model from a host integer. */ -+static inline enum memmodel -+memmodel_base (unsigned HOST_WIDE_INT val) -+{ -+ return (enum memmodel) (val & MEMMODEL_BASE_MASK); -+} -+ -+/* Return TRUE if the memory model is RELAXED. */ -+static inline bool -+is_mm_relaxed (enum memmodel model) -+{ -+ return (model & MEMMODEL_BASE_MASK) == MEMMODEL_RELAXED; -+} -+ -+/* Return TRUE if the memory model is CONSUME. */ -+static inline bool -+is_mm_consume (enum memmodel model) -+{ -+ return (model & MEMMODEL_BASE_MASK) == MEMMODEL_CONSUME; -+} -+ -+/* Return TRUE if the memory model is ACQUIRE. */ -+static inline bool -+is_mm_acquire (enum memmodel model) -+{ -+ return (model & MEMMODEL_BASE_MASK) == MEMMODEL_ACQUIRE; -+} -+ -+/* Return TRUE if the memory model is RELEASE. */ -+static inline bool -+is_mm_release (enum memmodel model) -+{ -+ return (model & MEMMODEL_BASE_MASK) == MEMMODEL_RELEASE; -+} -+ -+/* Return TRUE if the memory model is ACQ_REL. */ -+static inline bool -+is_mm_acq_rel (enum memmodel model) -+{ -+ return (model & MEMMODEL_BASE_MASK) == MEMMODEL_ACQ_REL; -+} -+ -+/* Return TRUE if the memory model is SEQ_CST. */ -+static inline bool -+is_mm_seq_cst (enum memmodel model) -+{ -+ return (model & MEMMODEL_BASE_MASK) == MEMMODEL_SEQ_CST; -+} -+ -+/* Return TRUE if the memory model is a SYNC variant. */ -+static inline bool -+is_mm_sync (enum memmodel model) -+{ -+ return (model & MEMMODEL_SYNC); -+} -+ -+#endif /* GCC_MEMMODEL_H */ ---- a/src/gcc/optabs.c -+++ b/src/gcc/optabs.c -@@ -25,6 +25,7 @@ along with GCC; see the file COPYING3. If not see - #include "target.h" - #include "rtl.h" - #include "tree.h" -+#include "memmodel.h" - #include "predict.h" - #include "tm_p.h" - #include "expmed.h" ---- a/src/gcc/params.def -+++ b/src/gcc/params.def -@@ -1027,7 +1027,12 @@ DEFPARAM (PARAM_LTO_PARTITIONS, - DEFPARAM (MIN_PARTITION_SIZE, - "lto-min-partition", - "Minimal size of a partition for LTO (in estimated instructions).", -- 1000, 0, 0) -+ 10000, 0, 0) -+ -+DEFPARAM (MAX_PARTITION_SIZE, -+ "lto-max-partition", -+ "Maximal size of a partition for LTO (in estimated instructions).", -+ 1000000, 0, INT_MAX) - - /* Diagnostic parameters. */ - ---- a/src/gcc/rtlanal.c -+++ b/src/gcc/rtlanal.c -@@ -3663,6 +3663,16 @@ subreg_get_info (unsigned int xregno, machine_mode xmode, - info->offset = offset / regsize_xmode; - return; - } -+ /* It's not valid to extract a subreg of mode YMODE at OFFSET that -+ would go outside of XMODE. */ -+ if (!rknown -+ && GET_MODE_SIZE (ymode) + offset > GET_MODE_SIZE (xmode)) -+ { -+ info->representable_p = false; -+ info->nregs = nregs_ymode; -+ info->offset = offset / regsize_xmode; -+ return; -+ } - /* Quick exit for the simple and common case of extracting whole - subregisters from a multiregister value. */ - /* ??? It would be better to integrate this into the code below, -@@ -4590,13 +4600,14 @@ nonzero_bits1 (const_rtx x, machine_mode mode, const_rtx known_x, - nonzero &= cached_nonzero_bits (SUBREG_REG (x), mode, - known_x, known_mode, known_ret); - --#if WORD_REGISTER_OPERATIONS && defined (LOAD_EXTEND_OP) -+#ifdef LOAD_EXTEND_OP - /* If this is a typical RISC machine, we only have to worry - about the way loads are extended. */ -- if ((LOAD_EXTEND_OP (inner_mode) == SIGN_EXTEND -- ? val_signbit_known_set_p (inner_mode, nonzero) -- : LOAD_EXTEND_OP (inner_mode) != ZERO_EXTEND) -- || !MEM_P (SUBREG_REG (x))) -+ if (WORD_REGISTER_OPERATIONS -+ && ((LOAD_EXTEND_OP (inner_mode) == SIGN_EXTEND -+ ? val_signbit_known_set_p (inner_mode, nonzero) -+ : LOAD_EXTEND_OP (inner_mode) != ZERO_EXTEND) -+ || !MEM_P (SUBREG_REG (x)))) - #endif - { - /* On many CISC machines, accessing an object in a wider mode ---- a/src/gcc/simplify-rtx.c -+++ b/src/gcc/simplify-rtx.c -@@ -5274,6 +5274,50 @@ simplify_const_relational_operation (enum rtx_code code, - - return 0; - } -+ -+/* Recognize expressions of the form (X CMP 0) ? VAL : OP (X) -+ where OP is CLZ or CTZ and VAL is the value from CLZ_DEFINED_VALUE_AT_ZERO -+ or CTZ_DEFINED_VALUE_AT_ZERO respectively and return OP (X) if the expression -+ can be simplified to that or NULL_RTX if not. -+ Assume X is compared against zero with CMP_CODE and the true -+ arm is TRUE_VAL and the false arm is FALSE_VAL. */ -+ -+static rtx -+simplify_cond_clz_ctz (rtx x, rtx_code cmp_code, rtx true_val, rtx false_val) -+{ -+ if (cmp_code != EQ && cmp_code != NE) -+ return NULL_RTX; -+ -+ /* Result on X == 0 and X !=0 respectively. */ -+ rtx on_zero, on_nonzero; -+ if (cmp_code == EQ) -+ { -+ on_zero = true_val; -+ on_nonzero = false_val; -+ } -+ else -+ { -+ on_zero = false_val; -+ on_nonzero = true_val; -+ } -+ -+ rtx_code op_code = GET_CODE (on_nonzero); -+ if ((op_code != CLZ && op_code != CTZ) -+ || !rtx_equal_p (XEXP (on_nonzero, 0), x) -+ || !CONST_INT_P (on_zero)) -+ return NULL_RTX; -+ -+ HOST_WIDE_INT op_val; -+ if (((op_code == CLZ -+ && CLZ_DEFINED_VALUE_AT_ZERO (GET_MODE (on_nonzero), op_val)) -+ || (op_code == CTZ -+ && CTZ_DEFINED_VALUE_AT_ZERO (GET_MODE (on_nonzero), op_val))) -+ && op_val == INTVAL (on_zero)) -+ return on_nonzero; -+ -+ return NULL_RTX; -+} -+ - - /* Simplify CODE, an operation with result mode MODE and three operands, - OP0, OP1, and OP2. OP0_MODE was the mode of OP0 before it became -@@ -5407,6 +5451,19 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode, - } - } - -+ /* Convert x == 0 ? N : clz (x) into clz (x) when -+ CLZ_DEFINED_VALUE_AT_ZERO is defined to N for the mode of x. -+ Similarly for ctz (x). */ -+ if (COMPARISON_P (op0) && !side_effects_p (op0) -+ && XEXP (op0, 1) == const0_rtx) -+ { -+ rtx simplified -+ = simplify_cond_clz_ctz (XEXP (op0, 0), GET_CODE (op0), -+ op1, op2); -+ if (simplified) -+ return simplified; -+ } -+ - if (COMPARISON_P (op0) && ! side_effects_p (op0)) - { - machine_mode cmp_mode = (GET_MODE (XEXP (op0, 0)) == VOIDmode ---- a/src/gcc/system.h -+++ b/src/gcc/system.h -@@ -971,7 +971,8 @@ extern void fancy_abort (const char *, int, const char *) ATTRIBUTE_NORETURN; - EXTRA_ADDRESS_CONSTRAINT CONST_DOUBLE_OK_FOR_CONSTRAINT_P \ - CALLER_SAVE_PROFITABLE LARGEST_EXPONENT_IS_NORMAL \ - ROUND_TOWARDS_ZERO SF_SIZE DF_SIZE XF_SIZE TF_SIZE LIBGCC2_TF_CEXT \ -- LIBGCC2_LONG_DOUBLE_TYPE_SIZE STRUCT_VALUE EH_FRAME_IN_DATA_SECTION -+ LIBGCC2_LONG_DOUBLE_TYPE_SIZE STRUCT_VALUE \ -+ EH_FRAME_IN_DATA_SECTION TARGET_FLT_EVAL_METHOD_NON_DEFAULT - - /* Hooks that are no longer used. */ - #pragma GCC poison LANG_HOOKS_FUNCTION_MARK LANG_HOOKS_FUNCTION_FREE \ ---- a/src/gcc/testsuite/c-c++-common/asan/clone-test-1.c -+++ b/src/gcc/testsuite/c-c++-common/asan/clone-test-1.c -@@ -29,6 +29,10 @@ int main(int argc, char **argv) { - char *sp = child_stack + kStackSize; /* Stack grows down. */ - printf("Parent: %p\n", sp); - pid_t clone_pid = clone(Child, sp, CLONE_FILES | CLONE_VM, NULL, 0, 0, 0); -+ if (clone_pid == -1) { -+ perror("clone"); -+ return 1; -+ } - int status; - pid_t wait_result = waitpid(clone_pid, &status, __WCLONE); - if (wait_result < 0) { ---- a/src/gcc/testsuite/g++.dg/ext/arm-fp16/arm-fp16-ops-3.C -+++ b/src/gcc/testsuite/g++.dg/ext/arm-fp16/arm-fp16-ops-3.C -@@ -1,5 +1,6 @@ - /* Test various operators on __fp16 and mixed __fp16/float operands. */ - /* { dg-do run { target arm*-*-* } } */ -+/* { dg-require-effective-target arm_fp16_alternative_ok } */ - /* { dg-options "-mfp16-format=alternative" } */ - - #include "arm-fp16-ops.h" ---- a/src/gcc/testsuite/g++.dg/ext/arm-fp16/arm-fp16-ops-4.C -+++ b/src/gcc/testsuite/g++.dg/ext/arm-fp16/arm-fp16-ops-4.C -@@ -1,5 +1,6 @@ - /* Test various operators on __fp16 and mixed __fp16/float operands. */ - /* { dg-do run { target arm*-*-* } } */ -+/* { dg-require-effective-target arm_fp16_alternative_ok } */ - /* { dg-options "-mfp16-format=alternative -ffast-math" } */ - - #include "arm-fp16-ops.h" ---- a/src/gcc/testsuite/g++.dg/ext/arm-fp16/fp16-param-1.C -+++ b/src/gcc/testsuite/g++.dg/ext/arm-fp16/fp16-param-1.C -@@ -1,10 +1,14 @@ - /* { dg-do compile { target arm*-*-* } } */ - /* { dg-options "-mfp16-format=ieee" } */ - --/* Functions cannot have parameters of type __fp16. */ --extern void f (__fp16); /* { dg-error "parameters cannot have __fp16 type" } */ --extern void (*pf) (__fp16); /* { dg-error "parameters cannot have __fp16 type" } */ -+/* Test that the ACLE macro is defined. */ -+#if __ARM_FP16_ARGS != 1 -+#error Unexpected value for __ARM_FP16_ARGS -+#endif -+ -+/* Test that __fp16 is supported as a parameter type. */ -+extern void f (__fp16); -+extern void (*pf) (__fp16); - --/* These should be OK. */ - extern void g (__fp16 *); - extern void (*pg) (__fp16 *); ---- a/src/gcc/testsuite/g++.dg/ext/arm-fp16/fp16-return-1.C -+++ b/src/gcc/testsuite/g++.dg/ext/arm-fp16/fp16-return-1.C -@@ -1,10 +1,9 @@ - /* { dg-do compile { target arm*-*-* } } */ - /* { dg-options "-mfp16-format=ieee" } */ - --/* Functions cannot return type __fp16. */ --extern __fp16 f (void); /* { dg-error "cannot return __fp16" } */ --extern __fp16 (*pf) (void); /* { dg-error "cannot return __fp16" } */ -+/* Test that __fp16 is supported as a return type. */ -+extern __fp16 f (void); -+extern __fp16 (*pf) (void); - --/* These should be OK. */ - extern __fp16 *g (void); - extern __fp16 *(*pg) (void); ---- a/src/gcc/testsuite/g++.dg/inherit/thunk1.C -+++ b/src/gcc/testsuite/g++.dg/inherit/thunk1.C -@@ -1,4 +1,5 @@ --// { dg-do run { target i?86-*-* x86_64-*-* s390*-*-* alpha*-*-* ia64-*-* sparc*-*-* } } -+// { dg-do run { target arm*-*-* aarch64*-*-* i?86-*-* x86_64-*-* s390*-*-* alpha*-*-* ia64-*-* sparc*-*-* } } -+// { dg-skip-if "" { arm_thumb1_ok } } - - #include <stdarg.h> - ---- a/src/gcc/testsuite/g++.dg/lto/pr69589_0.C -+++ b/src/gcc/testsuite/g++.dg/lto/pr69589_0.C -@@ -1,6 +1,8 @@ - // { dg-lto-do link } --// { dg-lto-options "-O2 -rdynamic" } -+// { dg-lto-options "-O2 -rdynamic" } - // { dg-extra-ld-options "-r -nostdlib" } -+// { dg-skip-if "Skip targets without -rdynamic support" { arm*-none-eabi aarch64*-*-elf } { "*" } { "" } } -+ - #pragma GCC visibility push(hidden) - struct A { int &operator[] (long); }; - template <typename> struct B; ---- /dev/null -+++ b/src/gcc/testsuite/g++.dg/opt/pr78201.C -@@ -0,0 +1,13 @@ -+// PR middle-end/78201 -+// { dg-do compile } -+// { dg-options "-O2" } -+ -+struct B { long d (); } *c; -+long e; -+ -+void -+foo () -+{ -+ char a[e] = ""; -+ c && c->d(); -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.c-torture/compile/pr71112.c -@@ -0,0 +1,10 @@ -+/* PR target/71112. */ -+/* { dg-additional-options "-fpie" { target pie } } */ -+ -+extern int dbs[100]; -+void f (int *); -+int nscd_init (void) -+{ -+ f (dbs); -+ return 0; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.c-torture/compile/pr71295.c -@@ -0,0 +1,12 @@ -+extern void fn2 (long long); -+int a; -+ -+void -+fn1 () -+{ -+ long long b[3]; -+ a = 0; -+ for (; a < 3; a++) -+ b[a] = 1; -+ fn2 (b[1]); -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.c-torture/compile/pr78362.c -@@ -0,0 +1,11 @@ -+/* PR target/78362. */ -+ -+long a; -+ -+void -+foo (void) -+{ -+ for (;; a--) -+ if ((int) a) -+ break; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.c-torture/compile/pr78694.c -@@ -0,0 +1,118 @@ -+/* PR target/78694. */ -+ -+enum -+{ -+ MEMMODEL_RELAXED, -+ MEMMODEL_ACQUIRE, -+ PRIORITY_INSERT_END -+}; -+enum -+{ -+ PQ_CHILDREN, -+ PQ_TASKGROUP -+}; -+struct gomp_team_state -+{ -+ struct gomp_team *team; -+}; -+enum gomp_task_kind -+{ -+ GOMP_TASK_UNDEFERRED, -+ GOMP_TASK_WAITING -+}; -+struct gomp_taskwait -+{ -+ _Bool in_taskwait; -+}; -+struct gomp_task -+{ -+ struct gomp_task *parent; -+ int children_queue; -+ struct gomp_taskgroup *taskgroup; -+ int dependers; -+ struct gomp_taskwait taskwait; -+ enum gomp_task_kind kind; -+ _Bool in_tied_task; -+} j, q, *n; -+struct gomp_taskgroup -+{ -+ _Bool in_taskgroup_wait; -+ int num_children; -+} l; -+struct gomp_team -+{ -+ int task_queue; -+ int task_running_count; -+}; -+struct gomp_thread -+{ -+ struct gomp_team_state ts; -+ struct gomp_task task; -+} extern __thread a; -+ -+int b, c, d, e, f, g, h, i, k, m, o, p, r; -+ -+void priority_queue_next_task (struct gomp_task *, int, int); -+int gomp_task_run_pre (struct gomp_task *, struct gomp_task, struct gomp_team); -+void priority_queue_insert (int, struct gomp_task); -+void priority_queue_insert2 (int, struct gomp_task, int, int, int); -+void priority_queue_insert3 (int, struct gomp_task, int, int, int); -+void gomp_sem_post (int); -+void free (void *); -+ -+_Bool s; -+int -+GOMP_taskgroup_end () -+{ -+ struct gomp_thread *t = &a; -+ struct gomp_team u = *t->ts.team; -+ struct gomp_task *v = &t->task, *w; -+ if (__atomic_load_n (&l.num_children, MEMMODEL_ACQUIRE)) -+ while (1) -+ { -+ if (l.num_children) -+ priority_queue_next_task (v, u.task_queue, r); -+ else if (w) -+ free (w); -+ if (n->kind == GOMP_TASK_WAITING) -+ { -+ s = gomp_task_run_pre (n, q, u); -+ if (__builtin_expect (s, 0)) -+ { -+ if (w) -+ free (w); -+ goto finish_cancelled; -+ } -+ n = 0; -+ l.in_taskgroup_wait = 1; -+ } -+ if (w) -+ { -+ t->task = *n; -+ if (__builtin_expect (p, 0)) -+ if (o) -+ t->task = *v; -+ } -+ if (n) -+ { -+ struct gomp_task x = x; -+ for (; i; b++) -+ { -+ struct gomp_task y = j; -+ if (g) -+ continue; -+ priority_queue_insert (PQ_CHILDREN, x); -+ if (x.taskwait.in_taskwait) -+ priority_queue_insert2 (PQ_TASKGROUP, y, e, 0, d); -+ if (h) -+ gomp_sem_post (f); -+ priority_queue_insert3 (k, y, PRIORITY_INSERT_END, 0, d); -+ ++c; -+ } -+ } -+ finish_cancelled: -+ w = (struct gomp_task *) (n - u.task_running_count - v); -+ } -+ v->taskgroup = (struct gomp_taskgroup *) m; -+ return 1; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.c-torture/execute/pr37780.c -@@ -0,0 +1,49 @@ -+/* PR middle-end/37780. */ -+ -+#define VAL (8 * sizeof (int)) -+ -+int __attribute__ ((noinline, noclone)) -+fooctz (int i) -+{ -+ return (i == 0) ? VAL : __builtin_ctz (i); -+} -+ -+int __attribute__ ((noinline, noclone)) -+fooctz2 (int i) -+{ -+ return (i != 0) ? __builtin_ctz (i) : VAL; -+} -+ -+unsigned int __attribute__ ((noinline, noclone)) -+fooctz3 (unsigned int i) -+{ -+ return (i > 0) ? __builtin_ctz (i) : VAL; -+} -+ -+int __attribute__ ((noinline, noclone)) -+fooclz (int i) -+{ -+ return (i == 0) ? VAL : __builtin_clz (i); -+} -+ -+int __attribute__ ((noinline, noclone)) -+fooclz2 (int i) -+{ -+ return (i != 0) ? __builtin_clz (i) : VAL; -+} -+ -+unsigned int __attribute__ ((noinline, noclone)) -+fooclz3 (unsigned int i) -+{ -+ return (i > 0) ? __builtin_clz (i) : VAL; -+} -+ -+int -+main (void) -+{ -+ if (fooctz (0) != VAL || fooctz2 (0) != VAL || fooctz3 (0) != VAL -+ || fooclz (0) != VAL || fooclz2 (0) != VAL || fooclz3 (0) != VAL) -+ __builtin_abort (); -+ -+ return 0; -+} -\ No newline at end of file ---- /dev/null -+++ b/src/gcc/testsuite/gcc.c-torture/execute/pr66940.c -@@ -0,0 +1,20 @@ -+long long __attribute__ ((noinline, noclone)) -+foo (long long ival) -+{ -+ if (ival <= 0) -+ return -0x7fffffffffffffffL - 1; -+ -+ return 0x7fffffffffffffffL; -+} -+ -+int -+main (void) -+{ -+ if (foo (-1) != (-0x7fffffffffffffffL - 1)) -+ __builtin_abort (); -+ -+ if (foo (1) != 0x7fffffffffffffffL) -+ __builtin_abort (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.dg/asr_div1.c -+++ b/src/gcc/testsuite/gcc.dg/asr_div1.c -@@ -1,6 +1,7 @@ - /* Test division by const int generates only one shift. */ - /* { dg-do run } */ - /* { dg-options "-O2 -fdump-rtl-combine-all" } */ -+/* { dg-options "-O2 -fdump-rtl-combine-all -mtune=cortex-a53" { target aarch64*-*-* } } */ - - extern void abort (void); - ---- a/src/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c -+++ b/src/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c -@@ -24,7 +24,7 @@ - | FE_OVERFLOW \ - | FE_UNDERFLOW) - --#if defined __alpha__ -+#if defined __alpha__ || defined __aarch64__ - #define ITER_COUNT 100 - #else - #define ITER_COUNT 10000 ---- a/src/gcc/testsuite/gcc.dg/cpp/trad/include.c -+++ b/src/gcc/testsuite/gcc.dg/cpp/trad/include.c -@@ -2,11 +2,5 @@ - - /* Test that macros are not expanded in the <> quotes of #inlcude. */ - --/* vxWorksCommon.h uses the "#" operator to construct the name of an -- include file, thus making the file incompatible with -traditional-cpp. -- Newlib uses ## when including stdlib.h as of 2007-09-07. */ --/* { dg-do preprocess { target { { ! vxworks_kernel } && { ! newlib } } } } */ -- --#define __STDC__ 1 /* Stop complaints about non-ISO compilers. */ --#define stdlib 1 --#include <stdlib.h> /* { dg-bogus "o such file or directory" } */ -+#define builtins 1 -+#include <builtins.h> /* { dg-bogus "o such file or directory" } */ ---- a/src/gcc/testsuite/gcc.dg/cpp/trad/trad.exp -+++ b/src/gcc/testsuite/gcc.dg/cpp/trad/trad.exp -@@ -29,7 +29,7 @@ load_lib gcc-dg.exp - # If a testcase doesn't have special options, use these. - global DEFAULT_TRADCPPFLAGS - if ![info exists DEFAULT_TRADCPPFLAGS] then { -- set DEFAULT_TRADCPPFLAGS " -traditional-cpp" -+ set DEFAULT_TRADCPPFLAGS " -traditional-cpp -I$srcdir/$subdir/" - } - - # Initialize `dg'. ---- a/src/gcc/testsuite/gcc.dg/cpp/warn-undef-2.c -+++ b/src/gcc/testsuite/gcc.dg/cpp/warn-undef-2.c -@@ -1,5 +1,5 @@ - // { dg-do preprocess } - // { dg-options "-std=gnu99 -fdiagnostics-show-option -Werror=undef" } - /* { dg-message "some warnings being treated as errors" "" {target "*-*-*"} 0 } */ --#if x // { dg-error "\"x\" is not defined .-Werror=undef." } -+#if x // { dg-error "\"x\" is not defined, evaluates to 0 .-Werror=undef." } - #endif ---- a/src/gcc/testsuite/gcc.dg/cpp/warn-undef.c -+++ b/src/gcc/testsuite/gcc.dg/cpp/warn-undef.c -@@ -1,5 +1,5 @@ - // { dg-do preprocess } - // { dg-options "-std=gnu99 -fdiagnostics-show-option -Wundef" } - --#if x // { dg-warning "\"x\" is not defined .-Wundef." } -+#if x // { dg-warning "\"x\" is not defined, evaluates to 0 .-Wundef." } - #endif ---- a/src/gcc/testsuite/gcc.dg/lto/pr54709_0.c -+++ b/src/gcc/testsuite/gcc.dg/lto/pr54709_0.c -@@ -1,6 +1,7 @@ - /* { dg-lto-do link } */ - /* { dg-require-visibility "hidden" } */ - /* { dg-require-effective-target fpic } */ -+/* { dg-require-effective-target shared } */ - /* { dg-extra-ld-options { -shared } } */ - /* { dg-lto-options { { -fPIC -fvisibility=hidden -flto } } } */ - ---- a/src/gcc/testsuite/gcc.dg/lto/pr61526_0.c -+++ b/src/gcc/testsuite/gcc.dg/lto/pr61526_0.c -@@ -1,4 +1,5 @@ - /* { dg-require-effective-target fpic } */ -+/* { dg-require-effective-target shared } */ - /* { dg-lto-do link } */ - /* { dg-lto-options { { -fPIC -flto -flto-partition=1to1 } } } */ - /* { dg-extra-ld-options { -shared } } */ ---- a/src/gcc/testsuite/gcc.dg/lto/pr64415_0.c -+++ b/src/gcc/testsuite/gcc.dg/lto/pr64415_0.c -@@ -1,5 +1,6 @@ - /* { dg-lto-do link } */ - /* { dg-require-effective-target fpic } */ -+/* { dg-require-effective-target shared } */ - /* { dg-lto-options { { -O -flto -fpic } } } */ - /* { dg-extra-ld-options { -shared } } */ - /* { dg-extra-ld-options "-Wl,-undefined,dynamic_lookup" { target *-*-darwin* } } */ ---- a/src/gcc/testsuite/gcc.dg/plugin/plugin.exp -+++ b/src/gcc/testsuite/gcc.dg/plugin/plugin.exp -@@ -87,6 +87,12 @@ foreach plugin_test $plugin_test_list { - if ![runtest_file_p $runtests $plugin_src] then { - continue - } -+ # Skip tail call tests on targets that do not have sibcall_epilogue. -+ if {[regexp ".*must_tail_call_plugin.c" $plugin_src] -+ && [istarget arm*-*-*] -+ && [check_effective_target_arm_thumb1]} then { -+ continue -+ } - set plugin_input_tests [lreplace $plugin_test 0 0] - plugin-test-execute $plugin_src $plugin_input_tests - } ---- /dev/null -+++ b/src/gcc/testsuite/gcc.dg/pr59833.c -@@ -0,0 +1,18 @@ -+/* { dg-do run { target { *-*-linux* *-*-gnu* } } } */ -+/* { dg-options "-O0 -lm" } */ -+/* { dg-require-effective-target issignaling } */ -+ -+#define _GNU_SOURCE -+#include <math.h> -+ -+int main (void) -+{ -+ float sNaN = __builtin_nansf (""); -+ double x = (double) sNaN; -+ if (issignaling(x)) -+ { -+ __builtin_abort(); -+ } -+ -+ return 0; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.dg/pr68217.c -@@ -0,0 +1,14 @@ -+ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-vrp1" } */ -+ -+int foo (void) -+{ -+ volatile int a = -1; -+ long long b = (1LL << (sizeof (b) * 8 - 1)); // LLONG_MIN -+ long long x = (a & b); // x == 0x8000000000000000 -+ if (x < 1LL) { ; } else { __builtin_abort(); } -+ return 0; -+} -+ -+/* { dg-final { scan-tree-dump "\\\[-INF, 0\\\]" "vrp1" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.dg/pr71636-1.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+/* { dg-options "-fdump-tree-gimple" } */ -+ -+unsigned f(unsigned x, unsigned b) -+{ -+ return x & ((1U << b) - 1); -+} -+ -+/* { dg-final { scan-tree-dump-not "1 <<" "gimple" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.dg/pr71636-2.c -@@ -0,0 +1,12 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-forwprop-details" } */ -+ -+unsigned f(unsigned x, unsigned b) -+{ -+ unsigned t1 = 1U << b; -+ unsigned t2 = t1 - 1; -+ unsigned t3 = x & t2; -+ return t3; -+} -+ -+/* { dg-final { scan-tree-dump "_\[0-9\] = ~_\[0-9\]" "forwprop1" } } */ ---- a/src/gcc/testsuite/gcc.dg/strlenopt-20.c -+++ b/src/gcc/testsuite/gcc.dg/strlenopt-20.c -@@ -86,9 +86,9 @@ main () - return 0; - } - --/* { dg-final { scan-tree-dump-times "strlen \\(" 1 "strlen" } } */ -+/* { dg-final { scan-tree-dump-times "strlen \\(" 2 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "memcpy \\(" 4 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "strcpy \\(" 0 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "strcat \\(" 0 "strlen" } } */ --/* { dg-final { scan-tree-dump-times "strchr \\(" 1 "strlen" } } */ -+/* { dg-final { scan-tree-dump-times "strchr \\(" 0 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "stpcpy \\(" 0 "strlen" } } */ ---- a/src/gcc/testsuite/gcc.dg/strlenopt-21.c -+++ b/src/gcc/testsuite/gcc.dg/strlenopt-21.c -@@ -57,9 +57,9 @@ main () - return 0; - } - --/* { dg-final { scan-tree-dump-times "strlen \\(" 1 "strlen" } } */ -+/* { dg-final { scan-tree-dump-times "strlen \\(" 2 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "memcpy \\(" 3 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "strcpy \\(" 0 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "strcat \\(" 0 "strlen" } } */ --/* { dg-final { scan-tree-dump-times "strchr \\(" 1 "strlen" } } */ -+/* { dg-final { scan-tree-dump-times "strchr \\(" 0 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "stpcpy \\(" 0 "strlen" } } */ ---- a/src/gcc/testsuite/gcc.dg/strlenopt-22.c -+++ b/src/gcc/testsuite/gcc.dg/strlenopt-22.c -@@ -31,9 +31,9 @@ main () - return 0; - } - --/* { dg-final { scan-tree-dump-times "strlen \\(" 3 "strlen" } } */ -+/* { dg-final { scan-tree-dump-times "strlen \\(" 4 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "memcpy \\(" 1 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "strcpy \\(" 1 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "strcat \\(" 0 "strlen" } } */ --/* { dg-final { scan-tree-dump-times "strchr \\(" 1 "strlen" } } */ -+/* { dg-final { scan-tree-dump-times "strchr \\(" 0 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "stpcpy \\(" 0 "strlen" } } */ ---- a/src/gcc/testsuite/gcc.dg/strlenopt-22g.c -+++ b/src/gcc/testsuite/gcc.dg/strlenopt-22g.c -@@ -5,9 +5,9 @@ - #define USE_GNU - #include "strlenopt-22.c" - --/* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen" } } */ -+/* { dg-final { scan-tree-dump-times "strlen \\(" 1 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "memcpy \\(" 1 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "strcpy \\(" 0 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "strcat \\(" 0 "strlen" } } */ --/* { dg-final { scan-tree-dump-times "strchr \\(" 1 "strlen" } } */ -+/* { dg-final { scan-tree-dump-times "strchr \\(" 0 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "stpcpy \\(" 1 "strlen" } } */ ---- a/src/gcc/testsuite/gcc.dg/strlenopt-26.c -+++ b/src/gcc/testsuite/gcc.dg/strlenopt-26.c -@@ -21,4 +21,5 @@ main (void) - return 0; - } - --/* { dg-final { scan-tree-dump-times "strlen \\(" 1 "strlen" } } */ -+/* { dg-final { scan-tree-dump-times "strlen \\(" 2 "strlen" } } */ -+/* { dg-final { scan-tree-dump-times "strchr \\(" 0 "strlen" } } */ ---- a/src/gcc/testsuite/gcc.dg/strlenopt-5.c -+++ b/src/gcc/testsuite/gcc.dg/strlenopt-5.c -@@ -48,9 +48,9 @@ main () - return 0; - } - --/* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen" } } */ -+/* { dg-final { scan-tree-dump-times "strlen \\(" 2 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "memcpy \\(" 2 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "strcpy \\(" 1 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "strcat \\(" 0 "strlen" } } */ --/* { dg-final { scan-tree-dump-times "strchr \\(" 2 "strlen" } } */ -+/* { dg-final { scan-tree-dump-times "strchr \\(" 0 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "stpcpy \\(" 0 "strlen" } } */ ---- a/src/gcc/testsuite/gcc.dg/strlenopt-7.c -+++ b/src/gcc/testsuite/gcc.dg/strlenopt-7.c -@@ -40,11 +40,11 @@ main () - return 0; - } - --/* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen" } } */ -+/* { dg-final { scan-tree-dump-times "strlen \\(" 1 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "memcpy \\(" 2 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "strcpy \\(" 0 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "strcat \\(" 0 "strlen" } } */ --/* { dg-final { scan-tree-dump-times "strchr \\(" 1 "strlen" } } */ -+/* { dg-final { scan-tree-dump-times "strchr \\(" 0 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "stpcpy \\(" 0 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "\\*r_\[0-9\]* = 0;" 1 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "return 3;" 1 "optimized" } } */ ---- a/src/gcc/testsuite/gcc.dg/strlenopt-9.c -+++ b/src/gcc/testsuite/gcc.dg/strlenopt-9.c -@@ -98,10 +98,10 @@ main () - return 0; - } - --/* { dg-final { scan-tree-dump-times "strlen \\(" 3 "strlen" } } */ -+/* { dg-final { scan-tree-dump-times "strlen \\(" 5 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "memcpy \\(" 6 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "strcpy \\(" 1 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "strcat \\(" 0 "strlen" } } */ --/* { dg-final { scan-tree-dump-times "strchr \\(" 3 "strlen" } } */ -+/* { dg-final { scan-tree-dump-times "strchr \\(" 0 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "stpcpy \\(" 0 "strlen" } } */ - /* { dg-final { scan-tree-dump-times "return 4;" 1 "optimized" } } */ ---- a/src/gcc/testsuite/gcc.dg/torture/arm-fp16-int-convert-alt.c -+++ b/src/gcc/testsuite/gcc.dg/torture/arm-fp16-int-convert-alt.c -@@ -1,5 +1,6 @@ - /* Test floating-point conversions. Standard types and __fp16. */ - /* { dg-do run { target arm*-*-* } } */ -+/* { dg-require-effective-target arm_fp16_alternative_ok } - /* { dg-options "-mfp16-format=alternative" } */ - - #include "fp-int-convert.h" ---- a/src/gcc/testsuite/gcc.dg/torture/arm-fp16-ops-3.c -+++ b/src/gcc/testsuite/gcc.dg/torture/arm-fp16-ops-3.c -@@ -1,5 +1,6 @@ - /* Test various operators on __fp16 and mixed __fp16/float operands. */ - /* { dg-do run { target arm*-*-* } } */ -+/* { dg-require-effective-target arm_fp16_alternative_ok } - /* { dg-options "-mfp16-format=alternative" } */ - - #include "arm-fp16-ops.h" ---- a/src/gcc/testsuite/gcc.dg/torture/arm-fp16-ops-4.c -+++ b/src/gcc/testsuite/gcc.dg/torture/arm-fp16-ops-4.c -@@ -1,5 +1,6 @@ - /* Test various operators on __fp16 and mixed __fp16/float operands. */ - /* { dg-do run { target arm*-*-* } } */ -+/* { dg-require-effective-target arm_fp16_alternative_ok } - /* { dg-options "-mfp16-format=alternative -ffast-math" } */ - - #include "arm-fp16-ops.h" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.dg/torture/pr71594.c -@@ -0,0 +1,15 @@ -+/* { dg-do compile } */ -+/* { dg-options "--param max-rtl-if-conversion-insns=2" } */ -+ -+unsigned short a; -+int b, c; -+int *d; -+void fn1() { -+ *d = 24; -+ for (; *d <= 65;) { -+ unsigned short *e = &a; -+ b = (a &= 0 <= 0) < (c ?: (*e %= *d)); -+ for (; *d <= 83;) -+ ; -+ } -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr61839_1.c -@@ -0,0 +1,44 @@ -+/* PR tree-optimization/61839. */ -+/* { dg-do run } */ -+/* { dg-options "-O2 -fdump-tree-vrp1 -fdump-tree-optimized" } */ -+/* { dg-require-effective-target int32plus } */ -+ -+__attribute__ ((noinline)) -+int foo () -+{ -+ int a = -1; -+ volatile unsigned b = 1U; -+ int c = 1; -+ c = (a + 972195718) >> (1LU <= b); -+ if (c == 486097858) -+ ; -+ else -+ __builtin_abort (); -+ return 0; -+} -+ -+__attribute__ ((noinline)) -+int bar () -+{ -+ int a = -1; -+ volatile unsigned b = 1U; -+ int c = 1; -+ c = (a + 972195718) >> (b ? 2 : 3); -+ if (c == 243048929) -+ ; -+ else -+ __builtin_abort (); -+ return 0; -+} -+ -+int main () -+{ -+ foo (); -+ bar (); -+} -+ -+/* Scan for c = 972195717) >> [0, 1] in function foo. */ -+/* { dg-final { scan-tree-dump-times "486097858 : 972195717" 1 "vrp1" } } */ -+/* Scan for c = 972195717) >> [2, 3] in function bar. */ -+/* { dg-final { scan-tree-dump-times "243048929 : 121524464" 2 "vrp1" } } */ -+/* { dg-final { scan-tree-dump-times "486097858" 0 "optimized" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr61839_2.c -@@ -0,0 +1,54 @@ -+/* PR tree-optimization/61839. */ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-vrp1" } */ -+/* { dg-require-effective-target int32plus } */ -+ -+__attribute__ ((noinline)) -+int foo () -+{ -+ int a = -1; -+ volatile unsigned b = 1U; -+ int c = 1; -+ c = (a + 972195718) / (b ? 1 : 0); -+ if (c == 972195717) -+ ; -+ else -+ __builtin_abort (); -+ return 0; -+} -+ -+__attribute__ ((noinline)) -+int bar () -+{ -+ int a = -1; -+ volatile unsigned b = 1U; -+ int c = 1; -+ c = (a + 972195718) % (b ? 1 : 0); -+ if (c == 972195717) -+ ; -+ else -+ __builtin_abort (); -+ return 0; -+} -+ -+__attribute__ ((noinline)) -+int bar2 () -+{ -+ int a = -1; -+ volatile unsigned b = 1U; -+ int c = 1; -+ c = (a + 972195716) % (b ? 1 : 2); -+ if (c == 972195715) -+ ; -+ else -+ __builtin_abort (); -+ return 0; -+} -+ -+ -+/* Dont optimize 972195717 / 0 in function foo. */ -+/* { dg-final { scan-tree-dump-times "972195717 / _" 1 "vrp1" } } */ -+/* Dont optimize 972195717 % 0 in function bar. */ -+/* { dg-final { scan-tree-dump-times "972195717 % _" 1 "vrp1" } } */ -+/* Optimize in function bar2. */ -+/* { dg-final { scan-tree-dump-times "972195715 % _" 0 "vrp1" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr61839_3.c -@@ -0,0 +1,26 @@ -+/* PR tree-optimization/61839. */ -+/* { dg-do run } */ -+/* { dg-options "-O2 -fdump-tree-vrp1 -fdump-tree-optimized" } */ -+ -+__attribute__ ((noinline)) -+int foo (int a, unsigned b) -+{ -+ int c = 1; -+ b = a ? 12 : 13; -+ c = b << 8; -+ if (c == 3072) -+ ; -+ else -+ __builtin_abort (); -+ return 0; -+} -+ -+int main () -+{ -+ volatile unsigned b = 1U; -+ foo (-1, b); -+} -+ -+/* Scan for c [12, 13] << 8 in function foo. */ -+/* { dg-final { scan-tree-dump-times "3072 : 3328" 2 "vrp1" } } */ -+/* { dg-final { scan-tree-dump-times "3072" 0 "optimized" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr61839_4.c -@@ -0,0 +1,28 @@ -+/* PR tree-optimization/61839. */ -+/* { dg-do run } */ -+/* { dg-options "-O2 -fdump-tree-vrp1 -fdump-tree-optimized" } */ -+/* { dg-require-effective-target int32plus } */ -+ -+__attribute__ ((noinline)) -+int foo (int a, unsigned b) -+{ -+ unsigned c = 1; -+ if (b >= 1 && b <= ((unsigned)(-1) - 1)) -+ return 0; -+ c = b >> 4; -+ if (c == 268435455) -+ ; -+ else -+ __builtin_abort (); -+ return 0; -+} -+ -+int main () -+{ -+ volatile unsigned b = (unsigned)(-1); -+ foo (-1, b); -+} -+ -+/* Scan for ~[1, 4294967294] >> 4 in function foo. */ -+/* { dg-final { scan-tree-dump-times "0 : 268435455" 1 "vrp1" } } */ -+/* { dg-final { scan-tree-dump-times "268435455" 0 "optimized" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/scev-11.c -@@ -0,0 +1,28 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */ -+ -+int a[128]; -+extern int b[]; -+ -+int bar (int *); -+ -+int -+foo (int n) -+{ -+ int i; -+ -+ for (i = 0; i < n; i++) -+ { -+ unsigned char uc = (unsigned char)i; -+ a[i] = i; -+ b[uc] = 0; -+ } -+ -+ bar (a); -+ return 0; -+} -+ -+/* Address of array reference to b is scev. */ -+/* { dg-final { scan-tree-dump-times "use \[0-9\]\n address" 2 "ivopts" } } */ -+ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/scev-12.c -@@ -0,0 +1,30 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-tree-ivopts-details" } */ -+ -+int a[128]; -+extern int b[]; -+ -+int bar (int *); -+ -+int -+foo (int x, int n) -+{ -+ int i; -+ -+ for (i = 0; i < n; i++) -+ { -+ unsigned char uc = (unsigned char)i; -+ if (x) -+ a[i] = i; -+ b[uc] = 0; -+ } -+ -+ bar (a); -+ return 0; -+} -+ -+/* Address of array reference to b is not scev. */ -+/* { dg-final { scan-tree-dump-times "use \[0-9\]\n address" 1 "ivopts" } } */ -+ -+ -+ ---- a/src/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c -+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c -@@ -25,6 +25,7 @@ f1 (int i, ...) - /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -45,6 +46,7 @@ f2 (int i, ...) - /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 8 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 1 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 8 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -60,6 +62,7 @@ f3 (int i, ...) - /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ - /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */ - /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and 1 FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and 16 FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[1-9\]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[1-9\]\[0-9\]* GPR units" "stdarg" { target ia64-*-* } } } */ -@@ -78,6 +81,7 @@ f4 (int i, ...) - /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -96,6 +100,7 @@ f5 (int i, ...) - /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -116,6 +121,7 @@ f6 (int i, ...) - /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 3 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 24 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -133,6 +139,7 @@ f7 (int i, ...) - /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -152,6 +159,7 @@ f8 (int i, ...) - /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -169,6 +177,7 @@ f9 (int i, ...) - /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -188,6 +197,7 @@ f10 (int i, ...) - /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -208,6 +218,7 @@ f11 (int i, ...) - /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 3 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 24 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -228,6 +239,7 @@ f12 (int i, ...) - /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */ - /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 24 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and 3 FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and 48 FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -248,6 +260,7 @@ f13 (int i, ...) - /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */ - /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 24 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and 3 FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and 48 FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -268,6 +281,7 @@ f14 (int i, ...) - /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[148\] GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */ - /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 24 GPR units and 3" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 1 GPR units and 2 FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 8 GPR units and 32 FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -291,6 +305,7 @@ f15 (int i, ...) - /* { dg-final { scan-tree-dump "f15: va_list escapes 0, needs to save \[148\] GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ - /* { dg-final { scan-tree-dump "f15: va_list escapes 0, needs to save \[148\] GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */ - /* { dg-final { scan-tree-dump "f15: va_list escapes 0, needs to save 1 GPR units and 2 FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f15: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ - - /* We may be able to improve upon this after fixing PR66010/PR66013. */ - /* { dg-final { scan-tree-dump "f15: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ ---- a/src/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c -+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c -@@ -24,6 +24,7 @@ f1 (int i, ...) - /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -39,6 +40,7 @@ f2 (int i, ...) - /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -57,6 +59,7 @@ f3 (int i, ...) - /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -73,6 +76,7 @@ f4 (int i, ...) - /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -89,6 +93,7 @@ f5 (int i, ...) - /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -107,6 +112,7 @@ f6 (int i, ...) - /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -123,6 +129,7 @@ f7 (int i, ...) - /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -139,6 +146,7 @@ f8 (int i, ...) - /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -155,6 +163,7 @@ f10 (int i, ...) - /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -171,6 +180,7 @@ f11 (int i, ...) - /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -187,6 +197,7 @@ f12 (int i, ...) - /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ ---- a/src/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c -+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c -@@ -27,6 +27,7 @@ f1 (int i, ...) - /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f1: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f1: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f1: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -44,6 +45,7 @@ f2 (int i, ...) - /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */ - /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f2: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f2: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f2: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -67,6 +69,7 @@ f3 (int i, ...) - /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 1 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ -@@ -88,6 +91,7 @@ f4 (int i, ...) - /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */ - /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 8 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and 1 FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and 16 FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ ---- a/src/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c -+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c -@@ -25,6 +25,7 @@ f1 (int i, ...) - /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ - /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */ - - void - f2 (int i, ...) -@@ -38,6 +39,7 @@ f2 (int i, ...) - /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ - /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */ - - /* Here va_arg can be executed at most as many times as va_start. */ - void -@@ -56,6 +58,7 @@ f3 (int i, ...) - /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ - /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 32 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 1 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */ - - void - f4 (int i, ...) -@@ -74,6 +77,7 @@ f4 (int i, ...) - /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 16 GPR units and 16 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ - /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 2 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 24 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */ - - void - f5 (int i, ...) -@@ -88,6 +92,7 @@ f5 (int i, ...) - /* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 16 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ - /* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 32 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save (4|2) GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 16 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */ - - void - f6 (int i, ...) -@@ -102,6 +107,7 @@ f6 (int i, ...) - /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 8 GPR units and 32 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ - /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 32 GPR units and 3" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|2) GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 8 GPR units and 32 FPR units" "stdarg" { target aarch64*-*-* } } } */ - - void - f7 (int i, ...) -@@ -116,3 +122,4 @@ f7 (int i, ...) - /* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 0 GPR units and 64 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */ - /* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 32 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 2 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 0 GPR units and 64 FPR units" "stdarg" { target aarch64*-*-* } } } */ ---- a/src/gcc/testsuite/gcc.dg/tree-ssa/stdarg-6.c -+++ b/src/gcc/testsuite/gcc.dg/tree-ssa/stdarg-6.c -@@ -30,6 +30,7 @@ bar (int x, char const *y, ...) - /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */ - /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */ - /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */ -+/* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */ - /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */ - /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units" "stdarg" { target ia64-*-* } } } */ - /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */ ---- a/src/gcc/testsuite/gcc.dg/uninit-pred-8_a.c -+++ b/src/gcc/testsuite/gcc.dg/uninit-pred-8_a.c -@@ -1,6 +1,8 @@ - - /* { dg-do compile } */ - /* { dg-options "-Wuninitialized -O2" } */ -+/* Pick a particular tuning to pin down BRANCH_COST. */ -+/* { dg-additional-options "-mtune=cortex-a15" { target arm*-*-* } } */ - - int g; - void bar(); ---- /dev/null -+++ b/src/gcc/testsuite/gcc.dg/vect/aligned-section-anchors-vect-70.c -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target section_anchors } */ -+/* { dg-require-effective-target vect_int } */ -+ -+#define N 32 -+ -+/* Increase alignment of struct if an array's offset is multiple of alignment of -+ vector type corresponding to it's scalar type. -+ For the below test-case: -+ offsetof(e) == 8 bytes. -+ i) For arm: let x = alignment of vector type corresponding to int, -+ x == 8 bytes. -+ Since offsetof(e) % x == 0, set DECL_ALIGN(a, b, c) to x. -+ ii) For aarch64, ppc: x == 16 bytes. -+ Since offsetof(e) % x != 0, don't increase alignment of a, b, c. -+*/ -+ -+static struct A { -+ int p1, p2; -+ int e[N]; -+} a, b, c; -+ -+int foo(void) -+{ -+ for (int i = 0; i < N; i++) -+ a.e[i] = b.e[i] + c.e[i]; -+ -+ return a.e[0]; -+} -+ -+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 "increase_alignment" { target aarch64*-*-* } } } */ -+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 "increase_alignment" { target powerpc64*-*-* } } } */ -+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 3 "increase_alignment" { target arm*-*-* } } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.dg/vect/aligned-section-anchors-vect-71.c -@@ -0,0 +1,25 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target section_anchors } */ -+/* { dg-require-effective-target vect_int } */ -+ -+/* Should not increase alignment of the struct because -+ sizeof (A.e) < sizeof(corresponding vector type). */ -+ -+#define N 3 -+ -+static struct A { -+ int p1, p2; -+ int e[N]; -+} a, b, c; -+ -+int foo(void) -+{ -+ for (int i = 0; i < N; i++) -+ a.e[i] = b.e[i] + c.e[i]; -+ -+ return a.e[0]; -+} -+ -+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 "increase_alignment" { target aarch64*-*-* } } } */ -+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 "increase_alignment" { target powerpc64*-*-* } } } */ -+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 "increase_alignment" { target arm*-*-* } } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.dg/vect/aligned-section-anchors-vect-72.c -@@ -0,0 +1,29 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target section_anchors } */ -+/* { dg-require-effective-target vect_int } */ -+ -+#define N 32 -+ -+/* Clone of section-anchors-vect-70.c having nested struct. */ -+ -+struct S -+{ -+ int e[N]; -+}; -+ -+static struct A { -+ int p1, p2; -+ struct S s; -+} a, b, c; -+ -+int foo(void) -+{ -+ for (int i = 0; i < N; i++) -+ a.s.e[i] = b.s.e[i] + c.s.e[i]; -+ -+ return a.s.e[0]; -+} -+ -+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 "increase_alignment" { target aarch64*-*-* } } } */ -+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 "increase_alignment" { target powerpc64*-*-* } } } */ -+/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 3 "increase_alignment" { target arm*-*-* } } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.dg/vect/pr57206.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target vect_float } */ -+ -+void bad0(float * d, unsigned int n) -+{ -+ unsigned int i; -+ for (i=n; i>0; --i) -+ d[n-i] = 0.0; -+} -+ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.dg/vect/pr65951.c -@@ -0,0 +1,63 @@ -+/* { dg-require-effective-target vect_int } */ -+ -+#include <stdarg.h> -+#include "tree-vect.h" -+ -+#define N 512 -+ -+/* These multiplications should be vectorizable with additions when -+ no vector shift is available. */ -+ -+__attribute__ ((noinline)) void -+foo (int *arr) -+{ -+ for (int i = 0; i < N; i++) -+ arr[i] *= 2; -+} -+ -+__attribute__ ((noinline)) void -+foo2 (int *arr) -+{ -+ for (int i = 0; i < N; i++) -+ arr[i] *= 4; -+} -+ -+int -+main (void) -+{ -+ check_vect (); -+ int data[N]; -+ int i; -+ -+ for (i = 0; i < N; i++) -+ { -+ data[i] = i; -+ __asm__ volatile (""); -+ } -+ -+ foo (data); -+ for (i = 0; i < N; i++) -+ { -+ if (data[i] / 2 != i) -+ __builtin_abort (); -+ __asm__ volatile (""); -+ } -+ -+ for (i = 0; i < N; i++) -+ { -+ data[i] = i; -+ __asm__ volatile (""); -+ } -+ -+ foo2 (data); -+ for (i = 0; i < N; i++) -+ { -+ if (data[i] / 4 != i) -+ __builtin_abort (); -+ __asm__ volatile (""); -+ } -+ -+ return 0; -+} -+ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.dg/vect/pr71818.c -@@ -0,0 +1,16 @@ -+/* { dg-do compile } */ -+ -+char a; -+short b; -+int c, d; -+void fn1() { -+ char e = 75, g; -+ unsigned char *f = &e; -+ a = 21; -+ for (; a <= 48; a++) { -+ for (; e <= 6;) -+ ; -+ g -= e -= b || g <= c; -+ } -+ d = *f; -+} ---- a/src/gcc/testsuite/gcc.dg/vect/vect-iv-9.c -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-iv-9.c -@@ -33,5 +33,4 @@ int main (void) - return 0; - } - --/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_int_mult } } } */ --/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target {! vect_int_mult } } } } */ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-load-lanes-peeling-1.c -@@ -0,0 +1,13 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target vect_int } */ -+/* { dg-require-effective-target vect_load_lanes } */ -+ -+void -+f (int *__restrict a, int *__restrict b) -+{ -+ for (int i = 0; i < 96; ++i) -+ a[i] = b[i * 3] + b[i * 3 + 1] + b[i * 3 + 2]; -+} -+ -+/* { dg-final { scan-tree-dump-not "Data access with gaps" "vect" } } */ -+/* { dg-final { scan-tree-dump-not "epilog loop required" "vect" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-mult-const-pattern-1.c -@@ -0,0 +1,41 @@ -+/* { dg-require-effective-target vect_int } */ -+/* { dg-require-effective-target vect_shift } */ -+ -+#include <stdarg.h> -+#include "tree-vect.h" -+ -+#define N 256 -+ -+__attribute__ ((noinline)) void -+foo (long long *arr) -+{ -+ for (int i = 0; i < N; i++) -+ arr[i] *= 123; -+} -+ -+int -+main (void) -+{ -+ check_vect (); -+ long long data[N]; -+ int i; -+ -+ for (i = 0; i < N; i++) -+ { -+ data[i] = i; -+ __asm__ volatile (""); -+ } -+ -+ foo (data); -+ for (i = 0; i < N; i++) -+ { -+ if (data[i] / 123 != i) -+ __builtin_abort (); -+ __asm__ volatile (""); -+ } -+ -+ return 0; -+} -+ -+/* { dg-final { scan-tree-dump-times "vect_recog_mult_pattern: detected" 2 "vect" { target aarch64*-*-* } } } */ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target aarch64*-*-* } } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.dg/vect/vect-mult-const-pattern-2.c -@@ -0,0 +1,40 @@ -+/* { dg-require-effective-target vect_int } */ -+ -+#include <stdarg.h> -+#include "tree-vect.h" -+ -+#define N 256 -+ -+__attribute__ ((noinline)) void -+foo (long long *arr) -+{ -+ for (int i = 0; i < N; i++) -+ arr[i] *= -19594LL; -+} -+ -+int -+main (void) -+{ -+ check_vect (); -+ long long data[N]; -+ int i; -+ -+ for (i = 0; i < N; i++) -+ { -+ data[i] = i; -+ __asm__ volatile (""); -+ } -+ -+ foo (data); -+ for (i = 0; i < N; i++) -+ { -+ if (data[i] / -19594LL != i) -+ __builtin_abort (); -+ __asm__ volatile (""); -+ } -+ -+ return 0; -+} -+ -+/* { dg-final { scan-tree-dump-times "vect_recog_mult_pattern: detected" 2 "vect" { target aarch64*-*-* } } } */ -+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target aarch64*-*-* } } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/advsimd-intrinsics.exp -@@ -53,7 +53,10 @@ torture-init - set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS - - # Make sure Neon flags are provided, if necessary. Use fp16 if we can. --if {[check_effective_target_arm_neon_fp16_ok]} then { -+# Use fp16 arithmetic operations if the hardware supports it. -+if {[check_effective_target_arm_v8_2a_fp16_neon_hw]} then { -+ set additional_flags [add_options_for_arm_v8_2a_fp16_neon ""] -+} elseif {[check_effective_target_arm_neon_fp16_ok]} then { - set additional_flags [add_options_for_arm_neon_fp16 ""] - } else { - set additional_flags [add_options_for_arm_neon ""] ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h -@@ -16,6 +16,14 @@ extern void *memset(void *, int, size_t); - extern void *memcpy(void *, const void *, size_t); - extern size_t strlen(const char *); - -+/* Helper macro to select FP16 tests. */ -+#if (defined (__ARM_FP16_FORMAT_IEEE) \ -+ || defined (__ARM_FP16_FORMAT_ALTERNATIVE)) -+#define FP16_SUPPORTED (1) -+#else -+#undef FP16_SUPPORTED -+#endif -+ - /* Various string construction helpers. */ - - /* -@@ -24,6 +32,13 @@ extern size_t strlen(const char *); - VECT_VAR(expected, int, 16, 4) -> expected_int16x4 - VECT_VAR_DECL(expected, int, 16, 4) -> int16x4_t expected_int16x4 - */ -+/* Some instructions don't exist on ARM. -+ Use this macro to guard against them. */ -+#ifdef __aarch64__ -+#define AARCH64_ONLY(X) X -+#else -+#define AARCH64_ONLY(X) -+#endif - - #define xSTR(X) #X - #define STR(X) xSTR(X) -@@ -81,7 +96,7 @@ extern size_t strlen(const char *); - abort(); \ - } \ - } \ -- fprintf(stderr, "CHECKED %s\n", MSG); \ -+ fprintf(stderr, "CHECKED %s %s\n", STR(VECT_TYPE(T, W, N)), MSG); \ - } - - /* Floating-point variant. */ -@@ -110,7 +125,36 @@ extern size_t strlen(const char *); - abort(); \ - } \ - } \ -- fprintf(stderr, "CHECKED %s\n", MSG); \ -+ fprintf(stderr, "CHECKED %s %s\n", STR(VECT_TYPE(T, W, N)), MSG); \ -+ } -+ -+/* poly variant. */ -+#define CHECK_POLY(MSG,T,W,N,FMT,EXPECTED,COMMENT) \ -+ { \ -+ int i; \ -+ for(i=0; i<N ; i++) \ -+ { \ -+ union poly_operand { \ -+ uint##W##_t i; \ -+ poly##W##_t p; \ -+ } tmp_res, tmp_exp; \ -+ tmp_res.p = VECT_VAR(result, T, W, N)[i]; \ -+ tmp_exp.i = VECT_VAR(EXPECTED, T, W, N)[i]; \ -+ if (tmp_res.i != tmp_exp.i) { \ -+ fprintf(stderr, \ -+ "ERROR in %s (%s line %d in buffer '%s') at type %s " \ -+ "index %d: got 0x%" FMT " != 0x%" FMT " %s\n", \ -+ MSG, __FILE__, __LINE__, \ -+ STR(EXPECTED), \ -+ STR(VECT_NAME(T, W, N)), \ -+ i, \ -+ tmp_res.i, \ -+ tmp_exp.i, \ -+ strlen(COMMENT) > 0 ? COMMENT : ""); \ -+ abort(); \ -+ } \ -+ } \ -+ fprintf(stderr, "CHECKED %s %s\n", STR(VECT_TYPE(T, W, N)), MSG); \ - } - - /* Clean buffer with a non-zero pattern to help diagnose buffer -@@ -133,10 +177,16 @@ static ARRAY(result, uint, 32, 2); - static ARRAY(result, uint, 64, 1); - static ARRAY(result, poly, 8, 8); - static ARRAY(result, poly, 16, 4); -+#if defined (__ARM_FEATURE_CRYPTO) -+static ARRAY(result, poly, 64, 1); -+#endif - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) - static ARRAY(result, float, 16, 4); - #endif - static ARRAY(result, float, 32, 2); -+#ifdef __aarch64__ -+static ARRAY(result, float, 64, 1); -+#endif - static ARRAY(result, int, 8, 16); - static ARRAY(result, int, 16, 8); - static ARRAY(result, int, 32, 4); -@@ -147,6 +197,9 @@ static ARRAY(result, uint, 32, 4); - static ARRAY(result, uint, 64, 2); - static ARRAY(result, poly, 8, 16); - static ARRAY(result, poly, 16, 8); -+#if defined (__ARM_FEATURE_CRYPTO) -+static ARRAY(result, poly, 64, 2); -+#endif - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) - static ARRAY(result, float, 16, 8); - #endif -@@ -169,6 +222,7 @@ extern ARRAY(expected, poly, 8, 8); - extern ARRAY(expected, poly, 16, 4); - extern ARRAY(expected, hfloat, 16, 4); - extern ARRAY(expected, hfloat, 32, 2); -+extern ARRAY(expected, hfloat, 64, 1); - extern ARRAY(expected, int, 8, 16); - extern ARRAY(expected, int, 16, 8); - extern ARRAY(expected, int, 32, 4); -@@ -193,8 +247,8 @@ extern ARRAY(expected, hfloat, 64, 2); - CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment); \ - CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \ - CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \ -- CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \ -- CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \ -+ CHECK_POLY(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \ -+ CHECK_POLY(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \ - CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \ - \ - CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment); \ -@@ -205,8 +259,8 @@ extern ARRAY(expected, hfloat, 64, 2); - CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment); \ - CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \ - CHECK(test_name, uint, 64, 2, PRIx64, EXPECTED, comment); \ -- CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \ -- CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \ -+ CHECK_POLY(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \ -+ CHECK_POLY(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \ - CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \ - } \ - -@@ -335,7 +389,8 @@ extern int VECT_VAR(expected_cumulative_sat, uint, 64, 2); - strlen(COMMENT) > 0 ? " " COMMENT : ""); \ - abort(); \ - } \ -- fprintf(stderr, "CHECKED CUMULATIVE SAT %s\n", MSG); \ -+ fprintf(stderr, "CHECKED CUMULATIVE SAT %s %s\n", \ -+ STR(VECT_TYPE(T, W, N)), MSG); \ - } - - #define CHECK_CUMULATIVE_SAT_NAMED(test_name,EXPECTED,comment) \ -@@ -379,6 +434,9 @@ static void clean_results (void) - CLEAN(result, uint, 64, 1); - CLEAN(result, poly, 8, 8); - CLEAN(result, poly, 16, 4); -+#if defined (__ARM_FEATURE_CRYPTO) -+ CLEAN(result, poly, 64, 1); -+#endif - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) - CLEAN(result, float, 16, 4); - #endif -@@ -394,6 +452,9 @@ static void clean_results (void) - CLEAN(result, uint, 64, 2); - CLEAN(result, poly, 8, 16); - CLEAN(result, poly, 16, 8); -+#if defined (__ARM_FEATURE_CRYPTO) -+ CLEAN(result, poly, 64, 2); -+#endif - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) - CLEAN(result, float, 16, 8); - #endif -@@ -419,6 +480,13 @@ static void clean_results (void) - #define DECL_VARIABLE(VAR, T1, W, N) \ - VECT_TYPE(T1, W, N) VECT_VAR(VAR, T1, W, N) - -+#if defined (__ARM_FEATURE_CRYPTO) -+#define DECL_VARIABLE_CRYPTO(VAR, T1, W, N) \ -+ DECL_VARIABLE(VAR, T1, W, N) -+#else -+#define DECL_VARIABLE_CRYPTO(VAR, T1, W, N) -+#endif -+ - /* Declare only 64 bits signed variants. */ - #define DECL_VARIABLE_64BITS_SIGNED_VARIANTS(VAR) \ - DECL_VARIABLE(VAR, int, 8, 8); \ -@@ -454,6 +522,7 @@ static void clean_results (void) - DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \ - DECL_VARIABLE(VAR, poly, 8, 8); \ - DECL_VARIABLE(VAR, poly, 16, 4); \ -+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1); \ - DECL_VARIABLE(VAR, float, 16, 4); \ - DECL_VARIABLE(VAR, float, 32, 2) - #else -@@ -462,6 +531,7 @@ static void clean_results (void) - DECL_VARIABLE_64BITS_UNSIGNED_VARIANTS(VAR); \ - DECL_VARIABLE(VAR, poly, 8, 8); \ - DECL_VARIABLE(VAR, poly, 16, 4); \ -+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 1); \ - DECL_VARIABLE(VAR, float, 32, 2) - #endif - -@@ -472,6 +542,7 @@ static void clean_results (void) - DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \ - DECL_VARIABLE(VAR, poly, 8, 16); \ - DECL_VARIABLE(VAR, poly, 16, 8); \ -+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2); \ - DECL_VARIABLE(VAR, float, 16, 8); \ - DECL_VARIABLE(VAR, float, 32, 4) - #else -@@ -480,6 +551,7 @@ static void clean_results (void) - DECL_VARIABLE_128BITS_UNSIGNED_VARIANTS(VAR); \ - DECL_VARIABLE(VAR, poly, 8, 16); \ - DECL_VARIABLE(VAR, poly, 16, 8); \ -+ DECL_VARIABLE_CRYPTO(VAR, poly, 64, 2); \ - DECL_VARIABLE(VAR, float, 32, 4) - #endif - /* Declare all variants. */ -@@ -500,15 +572,6 @@ static void clean_results (void) - /* Helpers to initialize vectors. */ - #define VDUP(VAR, Q, T1, T2, W, N, V) \ - VECT_VAR(VAR, T1, W, N) = vdup##Q##_n_##T2##W(V) --#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) --/* Work around that there is no vdup_n_f16 intrinsic. */ --#define vdup_n_f16(VAL) \ -- __extension__ \ -- ({ \ -- float16_t f = VAL; \ -- vld1_dup_f16(&f); \ -- }) --#endif - - #define VSET_LANE(VAR, Q, T1, T2, W, N, L, V) \ - VECT_VAR(VAR, T1, W, N) = vset##Q##_lane_##T2##W(V, \ -@@ -521,6 +584,13 @@ static void clean_results (void) - - /* Helpers to call macros with 1 constant and 5 variable - arguments. */ -+#if defined (__ARM_FEATURE_CRYPTO) -+#define MACRO_CRYPTO(MACRO, VAR1, VAR2, T1, T2, T3, W, N) \ -+ MACRO(VAR1, VAR2, T1, T2, T3, W, N) -+#else -+#define MACRO_CRYPTO(MACRO, VAR1, VAR2, T1, T2, T3, W, N) -+#endif -+ - #define TEST_MACRO_64BITS_SIGNED_VARIANTS_1_5(MACRO, VAR) \ - MACRO(VAR, , int, s, 8, 8); \ - MACRO(VAR, , int, s, 16, 4); \ -@@ -591,13 +661,15 @@ static void clean_results (void) - TEST_MACRO_64BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \ - TEST_MACRO_64BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \ - MACRO(VAR1, VAR2, , poly, p, 8, 8); \ -- MACRO(VAR1, VAR2, , poly, p, 16, 4) -+ MACRO(VAR1, VAR2, , poly, p, 16, 4); \ -+ MACRO_CRYPTO(MACRO, VAR1, VAR2, , poly, p, 64, 1) - - #define TEST_MACRO_128BITS_VARIANTS_2_5(MACRO, VAR1, VAR2) \ - TEST_MACRO_128BITS_SIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \ - TEST_MACRO_128BITS_UNSIGNED_VARIANTS_2_5(MACRO, VAR1, VAR2); \ - MACRO(VAR1, VAR2, q, poly, p, 8, 16); \ -- MACRO(VAR1, VAR2, q, poly, p, 16, 8) -+ MACRO(VAR1, VAR2, q, poly, p, 16, 8); \ -+ MACRO_CRYPTO(MACRO, VAR1, VAR2, q, poly, p, 64, 2) - - #define TEST_MACRO_ALL_VARIANTS_2_5(MACRO, VAR1, VAR2) \ - TEST_MACRO_64BITS_VARIANTS_2_5(MACRO, VAR1, VAR2); \ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_float.inc -@@ -0,0 +1,170 @@ -+/* Floating-point only version of binary_op_no64.inc template. Currently only -+ float16_t is used. */ -+ -+#include <math.h> -+ -+#define FNNAME1(NAME) exec_ ## NAME -+#define FNNAME(NAME) FNNAME1(NAME) -+ -+void FNNAME (INSN_NAME) (void) -+{ -+ int i; -+ -+ /* Basic test: z = INSN (x, y), then store the result. */ -+#define TEST_BINARY_OP1(INSN, Q, T1, T2, W, N) \ -+ VECT_VAR(vector_res, T1, W, N) = \ -+ INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ -+ VECT_VAR(vector2, T1, W, N)); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) -+ -+#define TEST_BINARY_OP(INSN, Q, T1, T2, W, N) \ -+ TEST_BINARY_OP1(INSN, Q, T1, T2, W, N) \ -+ -+#ifdef HAS_FLOAT16_VARIANT -+ DECL_VARIABLE(vector, float, 16, 4); -+ DECL_VARIABLE(vector2, float, 16, 4); -+ DECL_VARIABLE(vector_res, float, 16, 4); -+ -+ DECL_VARIABLE(vector, float, 16, 8); -+ DECL_VARIABLE(vector2, float, 16, 8); -+ DECL_VARIABLE(vector_res, float, 16, 8); -+#endif -+ -+#ifdef HAS_FLOAT_VARIANT -+ DECL_VARIABLE(vector, float, 32, 2); -+ DECL_VARIABLE(vector2, float, 32, 2); -+ DECL_VARIABLE(vector_res, float, 32, 2); -+ -+ DECL_VARIABLE(vector, float, 32, 4); -+ DECL_VARIABLE(vector2, float, 32, 4); -+ DECL_VARIABLE(vector_res, float, 32, 4); -+#endif -+ -+ clean_results (); -+ -+ /* Initialize input "vector" from "buffer". */ -+#ifdef HAS_FLOAT16_VARIANT -+ VLOAD(vector, buffer, , float, f, 16, 4); -+ VLOAD(vector, buffer, q, float, f, 16, 8); -+#endif -+#ifdef HAS_FLOAT_VARIANT -+ VLOAD(vector, buffer, , float, f, 32, 2); -+ VLOAD(vector, buffer, q, float, f, 32, 4); -+#endif -+ -+ /* Choose init value arbitrarily, will be used as comparison value. */ -+#ifdef HAS_FLOAT16_VARIANT -+ VDUP(vector2, , float, f, 16, 4, -15.5f); -+ VDUP(vector2, q, float, f, 16, 8, -14.5f); -+#endif -+#ifdef HAS_FLOAT_VARIANT -+ VDUP(vector2, , float, f, 32, 2, -15.5f); -+ VDUP(vector2, q, float, f, 32, 4, -14.5f); -+#endif -+ -+#ifdef HAS_FLOAT16_VARIANT -+#define FLOAT16_VARIANT(MACRO, VAR) \ -+ MACRO(VAR, , float, f, 16, 4); \ -+ MACRO(VAR, q, float, f, 16, 8); -+#else -+#define FLOAT16_VARIANT(MACRO, VAR) -+#endif -+ -+#ifdef HAS_FLOAT_VARIANT -+#define FLOAT_VARIANT(MACRO, VAR) \ -+ MACRO(VAR, , float, f, 32, 2); \ -+ MACRO(VAR, q, float, f, 32, 4); -+#else -+#define FLOAT_VARIANT(MACRO, VAR) -+#endif -+ -+#define TEST_MACRO_NO64BIT_VARIANT_1_5(MACRO, VAR) \ -+ -+ /* Apply a binary operator named INSN_NAME. */ -+ FLOAT16_VARIANT(TEST_BINARY_OP, INSN_NAME); -+ FLOAT_VARIANT(TEST_BINARY_OP, INSN_NAME); -+ -+#ifdef HAS_FLOAT16_VARIANT -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, ""); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, ""); -+ -+ /* Extra FP tests with special values (NaN, ....) */ -+ VDUP(vector, q, float, f, 16, 8, 1.0f); -+ VDUP(vector2, q, float, f, 16, 8, NAN); -+ TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_nan, -+ " FP special (NaN)"); -+ -+ VDUP(vector, q, float, f, 16, 8, -NAN); -+ VDUP(vector2, q, float, f, 16, 8, 1.0f); -+ TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_mnan, -+ " FP special (-NaN)"); -+ -+ VDUP(vector, q, float, f, 16, 8, 1.0f); -+ VDUP(vector2, q, float, f, 16, 8, HUGE_VALF); -+ TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_inf, -+ " FP special (inf)"); -+ -+ VDUP(vector, q, float, f, 16, 8, -HUGE_VALF); -+ VDUP(vector2, q, float, f, 16, 8, 1.0f); -+ TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_minf, -+ " FP special (-inf)"); -+ -+ VDUP(vector, q, float, f, 16, 8, 0.0f); -+ VDUP(vector2, q, float, f, 16, 8, -0.0f); -+ TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_zero1, -+ " FP special (-0.0)"); -+ -+ VDUP(vector, q, float, f, 16, 8, -0.0f); -+ VDUP(vector2, q, float, f, 16, 8, 0.0f); -+ TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_zero2, -+ " FP special (-0.0)"); -+#endif -+ -+#ifdef HAS_FLOAT_VARIANT -+ CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ""); -+ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, ""); -+ -+ /* Extra FP tests with special values (NaN, ....) */ -+ VDUP(vector, q, float, f, 32, 4, 1.0f); -+ VDUP(vector2, q, float, f, 32, 4, NAN); -+ TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); -+ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_nan, " FP special (NaN)"); -+ -+ VDUP(vector, q, float, f, 32, 4, -NAN); -+ VDUP(vector2, q, float, f, 32, 4, 1.0f); -+ TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); -+ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_mnan, " FP special (-NaN)"); -+ -+ VDUP(vector, q, float, f, 32, 4, 1.0f); -+ VDUP(vector2, q, float, f, 32, 4, HUGE_VALF); -+ TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); -+ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_inf, " FP special (inf)"); -+ -+ VDUP(vector, q, float, f, 32, 4, -HUGE_VALF); -+ VDUP(vector2, q, float, f, 32, 4, 1.0f); -+ TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); -+ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_minf, " FP special (-inf)"); -+ -+ VDUP(vector, q, float, f, 32, 4, 0.0f); -+ VDUP(vector2, q, float, f, 32, 4, -0.0f); -+ TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); -+ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_zero1, " FP special (-0.0)"); -+ -+ VDUP(vector, q, float, f, 32, 4, -0.0f); -+ VDUP(vector2, q, float, f, 32, 4, 0.0f); -+ TEST_BINARY_OP(INSN_NAME, q, float, f, 32, 4); -+ CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_zero2, " FP special (-0.0)"); -+#endif -+} -+ -+int main (void) -+{ -+ FNNAME (INSN_NAME) (); -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_op_no64.inc -@@ -28,6 +28,10 @@ void FNNAME (INSN_NAME) (void) - - /* Initialize input "vector" from "buffer". */ - TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); -+#ifdef HAS_FLOAT16_VARIANT -+ VLOAD(vector, buffer, , float, f, 16, 4); -+ VLOAD(vector, buffer, q, float, f, 16, 8); -+#endif - #ifdef HAS_FLOAT_VARIANT - VLOAD(vector, buffer, , float, f, 32, 2); - VLOAD(vector, buffer, q, float, f, 32, 4); -@@ -46,15 +50,27 @@ void FNNAME (INSN_NAME) (void) - VDUP(vector2, q, uint, u, 8, 16, 0xf9); - VDUP(vector2, q, uint, u, 16, 8, 0xfff2); - VDUP(vector2, q, uint, u, 32, 4, 0xfffffff1); -+#ifdef HAS_FLOAT16_VARIANT -+ VDUP(vector2, , float, f, 16, 4, -15.5f); -+ VDUP(vector2, q, float, f, 16, 8, -14.5f); -+#endif - #ifdef HAS_FLOAT_VARIANT - VDUP(vector2, , float, f, 32, 2, -15.5f); - VDUP(vector2, q, float, f, 32, 4, -14.5f); - #endif - -+#ifdef HAS_FLOAT16_VARIANT -+#define FLOAT16_VARIANT(MACRO, VAR) \ -+ MACRO(VAR, , float, f, 16, 4); \ -+ MACRO(VAR, q, float, f, 16, 8); -+#else -+#define FLOAT16_VARIANT(MACRO, VAR) -+#endif -+ - #ifdef HAS_FLOAT_VARIANT - #define FLOAT_VARIANT(MACRO, VAR) \ - MACRO(VAR, , float, f, 32, 2); \ -- MACRO(VAR, q, float, f, 32, 4) -+ MACRO(VAR, q, float, f, 32, 4); - #else - #define FLOAT_VARIANT(MACRO, VAR) - #endif -@@ -72,7 +88,8 @@ void FNNAME (INSN_NAME) (void) - MACRO(VAR, q, uint, u, 8, 16); \ - MACRO(VAR, q, uint, u, 16, 8); \ - MACRO(VAR, q, uint, u, 32, 4); \ -- FLOAT_VARIANT(MACRO, VAR) -+ FLOAT_VARIANT(MACRO, VAR); \ -+ FLOAT16_VARIANT(MACRO, VAR); - - /* Apply a binary operator named INSN_NAME. */ - TEST_MACRO_NO64BIT_VARIANT_1_5(TEST_BINARY_OP, INSN_NAME); -@@ -90,6 +107,42 @@ void FNNAME (INSN_NAME) (void) - CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); - -+#ifdef HAS_FLOAT16_VARIANT -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, ""); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, ""); -+ -+ /* Extra FP tests with special values (NaN, ....) */ -+ VDUP(vector, q, float, f, 16, 8, 1.0f); -+ VDUP(vector2, q, float, f, 16, 8, NAN); -+ TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_nan, " FP special (NaN)"); -+ -+ VDUP(vector, q, float, f, 16, 8, -NAN); -+ VDUP(vector2, q, float, f, 16, 8, 1.0f); -+ TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_mnan, " FP special (-NaN)"); -+ -+ VDUP(vector, q, float, f, 16, 8, 1.0f); -+ VDUP(vector2, q, float, f, 16, 8, HUGE_VALF); -+ TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_inf, " FP special (inf)"); -+ -+ VDUP(vector, q, float, f, 16, 8, -HUGE_VALF); -+ VDUP(vector2, q, float, f, 16, 8, 1.0f); -+ TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_minf, " FP special (-inf)"); -+ -+ VDUP(vector, q, float, f, 16, 8, 0.0f); -+ VDUP(vector2, q, float, f, 16, 8, -0.0f); -+ TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_zero1, " FP special (-0.0)"); -+ -+ VDUP(vector, q, float, f, 16, 8, -0.0f); -+ VDUP(vector2, q, float, f, 16, 8, 0.0f); -+ TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_zero2, " FP special (-0.0)"); -+#endif -+ - #ifdef HAS_FLOAT_VARIANT - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ""); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, ""); ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/binary_scalar_op.inc -@@ -0,0 +1,160 @@ -+/* Template file for binary scalar operator validation. -+ -+ This file is meant to be included by test files for binary scalar -+ operations. */ -+ -+/* Check for required settings. */ -+ -+#ifndef INSN_NAME -+#error INSN_NAME (the intrinsic to test) must be defined. -+#endif -+ -+#ifndef INPUT_TYPE -+#error INPUT_TYPE (basic type of an input value) must be defined. -+#endif -+ -+#ifndef OUTPUT_TYPE -+#error OUTPUT_TYPE (basic type of an output value) must be defined. -+#endif -+ -+#ifndef OUTPUT_TYPE_SIZE -+#error OUTPUT_TYPE_SIZE (size in bits of an output value) must be defined. -+#endif -+ -+/* Optional settings: -+ -+ INPUT_1: Input values for the first parameter. Must be of type INPUT_TYPE. -+ INPUT_2: Input values for the first parameter. Must be of type -+ INPUT_TYPE. */ -+ -+#ifndef TEST_MSG -+#define TEST_MSG "unnamed test" -+#endif -+ -+/* The test framework. */ -+ -+#include <stdio.h> -+ -+extern void abort (); -+ -+#define INFF __builtin_inf () -+ -+/* Stringify a macro. */ -+#define STR0(A) #A -+#define STR(A) STR0 (A) -+ -+/* Macro concatenation. */ -+#define CAT0(A, B) A##B -+#define CAT(A, B) CAT0 (A, B) -+ -+/* Format strings for error reporting. */ -+#define FMT16 "0x%04x" -+#define FMT32 "0x%08x" -+#define FMT CAT (FMT,OUTPUT_TYPE_SIZE) -+ -+/* Type construction: forms TS_t, where T is the base type and S the size in -+ bits. */ -+#define MK_TYPE0(T, S) T##S##_t -+#define MK_TYPE(T, S) MK_TYPE0 (T, S) -+ -+/* Convenience types for input and output data. */ -+typedef MK_TYPE (uint, OUTPUT_TYPE_SIZE) output_hex_type; -+ -+/* Conversion between typed values and their hexadecimal representation. */ -+typedef union -+{ -+ OUTPUT_TYPE value; -+ output_hex_type hex; -+} output_conv_type; -+ -+/* Default input values. */ -+ -+float16_t input_1_float16_t[] = -+{ -+ 0.0, -0.0, -+ 2.0, 3.1, -+ 20.0, 0.40, -+ -2.3, 1.33, -+ -7.6, 0.31, -+ 0.3353, 0.5, -+ 1.0, 13.13, -+ -6.3, 20.0, -+ (float16_t)INFF, (float16_t)-INFF, -+}; -+ -+float16_t input_2_float16_t[] = -+{ -+ 1.0, 1.0, -+ -4.33, 100.0, -+ 30.0, -0.02, -+ 0.5, -7.231, -+ -6.3, 20.0, -+ -7.231, 2.3, -+ -7.6, 5.1, -+ 0.31, 0.33353, -+ (float16_t)-INFF, (float16_t)INFF, -+}; -+ -+#ifndef INPUT_1 -+#define INPUT_1 CAT (input_1_,INPUT_TYPE) -+#endif -+ -+#ifndef INPUT_2 -+#define INPUT_2 CAT (input_2_,INPUT_TYPE) -+#endif -+ -+/* Support macros and routines for the test function. */ -+ -+#define CHECK() \ -+ { \ -+ output_conv_type actual; \ -+ output_conv_type expect; \ -+ \ -+ expect.hex = ((output_hex_type*)EXPECTED)[index]; \ -+ actual.value = INSN_NAME ((INPUT_1)[index], \ -+ (INPUT_2)[index]); \ -+ \ -+ if (actual.hex != expect.hex) \ -+ { \ -+ fprintf (stderr, \ -+ "ERROR in %s (%s line %d), buffer %s, " \ -+ "index %d: got " \ -+ FMT " != " FMT "\n", \ -+ TEST_MSG, __FILE__, __LINE__, \ -+ STR (EXPECTED), index, \ -+ actual.hex, expect.hex); \ -+ abort (); \ -+ } \ -+ fprintf (stderr, "CHECKED %s %s\n", \ -+ STR (EXPECTED), TEST_MSG); \ -+ } -+ -+#define FNNAME1(NAME) exec_ ## NAME -+#define FNNAME(NAME) FNNAME1 (NAME) -+ -+/* The test function. */ -+ -+void -+FNNAME (INSN_NAME) (void) -+{ -+ /* Basic test: y[i] = OP (x[i]), for each INPUT[i], then compare the result -+ against EXPECTED[i]. */ -+ -+ const int num_tests = sizeof (INPUT_1) / sizeof (INPUT_1[0]); -+ int index; -+ -+ for (index = 0; index < num_tests; index++) -+ CHECK (); -+ -+#ifdef EXTRA_TESTS -+ EXTRA_TESTS (); -+#endif -+} -+ -+int -+main (void) -+{ -+ FNNAME (INSN_NAME) (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_fp_op.inc -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_fp_op.inc -@@ -15,6 +15,10 @@ - each test file. */ - extern ARRAY(expected2, uint, 32, 2); - extern ARRAY(expected2, uint, 32, 4); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+extern ARRAY(expected2, uint, 16, 4); -+extern ARRAY(expected2, uint, 16, 8); -+#endif - - #define FNNAME1(NAME) exec_ ## NAME - #define FNNAME(NAME) FNNAME1(NAME) -@@ -37,17 +41,33 @@ void FNNAME (INSN_NAME) (void) - DECL_VARIABLE(vector2, float, 32, 4); - DECL_VARIABLE(vector_res, uint, 32, 2); - DECL_VARIABLE(vector_res, uint, 32, 4); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector, float, 16, 4); -+ DECL_VARIABLE(vector, float, 16, 8); -+ DECL_VARIABLE(vector2, float, 16, 4); -+ DECL_VARIABLE(vector2, float, 16, 8); -+ DECL_VARIABLE(vector_res, uint, 16, 4); -+ DECL_VARIABLE(vector_res, uint, 16, 8); -+#endif - - clean_results (); - - /* Initialize input "vector" from "buffer". */ - VLOAD(vector, buffer, , float, f, 32, 2); - VLOAD(vector, buffer, q, float, f, 32, 4); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VLOAD(vector, buffer, , float, f, 16, 4); -+ VLOAD(vector, buffer, q, float, f, 16, 8); -+#endif - - /* Choose init value arbitrarily, will be used for vector - comparison. */ - VDUP(vector2, , float, f, 32, 2, -16.0f); - VDUP(vector2, q, float, f, 32, 4, -14.0f); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector2, , float, f, 16, 4, -16.0f); -+ VDUP(vector2, q, float, f, 16, 8, -14.0f); -+#endif - - /* Apply operator named INSN_NAME. */ - TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); -@@ -56,15 +76,36 @@ void FNNAME (INSN_NAME) (void) - TEST_VCOMP(INSN_NAME, q, float, f, uint, 32, 4); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VCOMP(INSN_NAME, , float, f, uint, 16, 4); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); -+ -+ TEST_VCOMP(INSN_NAME, q, float, f, uint, 16, 8); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); -+#endif -+ - /* Test again, with different input values. */ - VDUP(vector2, , float, f, 32, 2, -10.0f); - VDUP(vector2, q, float, f, 32, 4, 10.0f); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector2, , float, f, 16, 4, -10.0f); -+ VDUP(vector2, q, float, f, 16, 8, 10.0f); -+#endif -+ - TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected2, ""); - - TEST_VCOMP(INSN_NAME, q, float, f, uint, 32, 4); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected2,""); -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VCOMP(INSN_NAME, , float, f, uint, 16, 4); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected2, ""); -+ -+ TEST_VCOMP(INSN_NAME, q, float, f, uint, 16, 8); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected2,""); -+#endif - } - - int main (void) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_op.inc -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_op.inc -@@ -11,6 +11,17 @@ extern ARRAY(expected_uint, uint, 32, 2); - extern ARRAY(expected_q_uint, uint, 8, 16); - extern ARRAY(expected_q_uint, uint, 16, 8); - extern ARRAY(expected_q_uint, uint, 32, 4); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+extern ARRAY(expected_float, uint, 16, 4); -+extern ARRAY(expected_q_float, uint, 16, 8); -+extern ARRAY(expected_nan, uint, 16, 4); -+extern ARRAY(expected_mnan, uint, 16, 4); -+extern ARRAY(expected_nan2, uint, 16, 4); -+extern ARRAY(expected_inf, uint, 16, 4); -+extern ARRAY(expected_minf, uint, 16, 4); -+extern ARRAY(expected_inf2, uint, 16, 4); -+extern ARRAY(expected_mzero, uint, 16, 4); -+#endif - extern ARRAY(expected_float, uint, 32, 2); - extern ARRAY(expected_q_float, uint, 32, 4); - extern ARRAY(expected_uint2, uint, 32, 2); -@@ -48,6 +59,9 @@ void FNNAME (INSN_NAME) (void) - DECL_VARIABLE(vector, uint, 8, 8); - DECL_VARIABLE(vector, uint, 16, 4); - DECL_VARIABLE(vector, uint, 32, 2); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE (vector, float, 16, 4); -+#endif - DECL_VARIABLE(vector, float, 32, 2); - DECL_VARIABLE(vector, int, 8, 16); - DECL_VARIABLE(vector, int, 16, 8); -@@ -55,6 +69,9 @@ void FNNAME (INSN_NAME) (void) - DECL_VARIABLE(vector, uint, 8, 16); - DECL_VARIABLE(vector, uint, 16, 8); - DECL_VARIABLE(vector, uint, 32, 4); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE (vector, float, 16, 8); -+#endif - DECL_VARIABLE(vector, float, 32, 4); - - DECL_VARIABLE(vector2, int, 8, 8); -@@ -63,6 +80,9 @@ void FNNAME (INSN_NAME) (void) - DECL_VARIABLE(vector2, uint, 8, 8); - DECL_VARIABLE(vector2, uint, 16, 4); - DECL_VARIABLE(vector2, uint, 32, 2); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE (vector2, float, 16, 4); -+#endif - DECL_VARIABLE(vector2, float, 32, 2); - DECL_VARIABLE(vector2, int, 8, 16); - DECL_VARIABLE(vector2, int, 16, 8); -@@ -70,6 +90,9 @@ void FNNAME (INSN_NAME) (void) - DECL_VARIABLE(vector2, uint, 8, 16); - DECL_VARIABLE(vector2, uint, 16, 8); - DECL_VARIABLE(vector2, uint, 32, 4); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE (vector2, float, 16, 8); -+#endif - DECL_VARIABLE(vector2, float, 32, 4); - - DECL_VARIABLE(vector_res, uint, 8, 8); -@@ -88,6 +111,9 @@ void FNNAME (INSN_NAME) (void) - VLOAD(vector, buffer, , uint, u, 8, 8); - VLOAD(vector, buffer, , uint, u, 16, 4); - VLOAD(vector, buffer, , uint, u, 32, 2); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VLOAD (vector, buffer, , float, f, 16, 4); -+#endif - VLOAD(vector, buffer, , float, f, 32, 2); - - VLOAD(vector, buffer, q, int, s, 8, 16); -@@ -96,6 +122,9 @@ void FNNAME (INSN_NAME) (void) - VLOAD(vector, buffer, q, uint, u, 8, 16); - VLOAD(vector, buffer, q, uint, u, 16, 8); - VLOAD(vector, buffer, q, uint, u, 32, 4); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VLOAD (vector, buffer, q, float, f, 16, 8); -+#endif - VLOAD(vector, buffer, q, float, f, 32, 4); - - /* Choose init value arbitrarily, will be used for vector -@@ -106,6 +135,9 @@ void FNNAME (INSN_NAME) (void) - VDUP(vector2, , uint, u, 8, 8, 0xF3); - VDUP(vector2, , uint, u, 16, 4, 0xFFF2); - VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF1); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP (vector2, , float, f, 16, 4, -15.0f); -+#endif - VDUP(vector2, , float, f, 32, 2, -15.0f); - - VDUP(vector2, q, int, s, 8, 16, -4); -@@ -114,6 +146,9 @@ void FNNAME (INSN_NAME) (void) - VDUP(vector2, q, uint, u, 8, 16, 0xF4); - VDUP(vector2, q, uint, u, 16, 8, 0xFFF6); - VDUP(vector2, q, uint, u, 32, 4, 0xFFFFFFF2); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP (vector2, q, float, f, 16, 8, -14.0f); -+#endif - VDUP(vector2, q, float, f, 32, 4, -14.0f); - - /* The comparison operators produce only unsigned results, which -@@ -154,9 +189,17 @@ void FNNAME (INSN_NAME) (void) - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_q_uint, ""); - - /* The float variants. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); -+ CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_float, ""); -+#endif - TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_float, ""); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VCOMP (INSN_NAME, q, float, f, uint, 16, 8); -+ CHECK (TEST_MSG, uint, 16, 8, PRIx16, expected_q_float, ""); -+#endif - TEST_VCOMP(INSN_NAME, q, float, f, uint, 32, 4); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_q_float, ""); - -@@ -176,6 +219,43 @@ void FNNAME (INSN_NAME) (void) - - - /* Extra FP tests with special values (NaN, ....). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP (vector, , float, f, 16, 4, 1.0); -+ VDUP (vector2, , float, f, 16, 4, NAN); -+ TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); -+ CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_nan, "FP special (NaN)"); -+ -+ VDUP (vector, , float, f, 16, 4, 1.0); -+ VDUP (vector2, , float, f, 16, 4, -NAN); -+ TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); -+ CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_mnan, " FP special (-NaN)"); -+ -+ VDUP (vector, , float, f, 16, 4, NAN); -+ VDUP (vector2, , float, f, 16, 4, 1.0); -+ TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); -+ CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_nan2, " FP special (NaN)"); -+ -+ VDUP (vector, , float, f, 16, 4, 1.0); -+ VDUP (vector2, , float, f, 16, 4, HUGE_VALF); -+ TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); -+ CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_inf, " FP special (inf)"); -+ -+ VDUP (vector, , float, f, 16, 4, 1.0); -+ VDUP (vector2, , float, f, 16, 4, -HUGE_VALF); -+ TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); -+ CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_minf, " FP special (-inf)"); -+ -+ VDUP (vector, , float, f, 16, 4, HUGE_VALF); -+ VDUP (vector2, , float, f, 16, 4, 1.0); -+ TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); -+ CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_inf2, " FP special (inf)"); -+ -+ VDUP (vector, , float, f, 16, 4, -0.0); -+ VDUP (vector2, , float, f, 16, 4, 0.0); -+ TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); -+ CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_mzero, " FP special (-0.0)"); -+#endif -+ - VDUP(vector, , float, f, 32, 2, 1.0); - VDUP(vector2, , float, f, 32, 2, NAN); - TEST_VCOMP(INSN_NAME, , float, f, uint, 32, 2); ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/cmp_zero_op.inc -@@ -0,0 +1,111 @@ -+/* Template file for the validation of compare against zero operators. -+ -+ This file is base on cmp_op.inc. It is meant to be included by the relevant -+ test files, which have to define the intrinsic family to test. If a given -+ intrinsic supports variants which are not supported by all the other -+ operators, these can be tested by providing a definition for EXTRA_TESTS. */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+#include <math.h> -+ -+/* Additional expected results declaration, they are initialized in -+ each test file. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+extern ARRAY(expected_float, uint, 16, 4); -+extern ARRAY(expected_q_float, uint, 16, 8); -+extern ARRAY(expected_uint2, uint, 16, 4); -+extern ARRAY(expected_uint3, uint, 16, 4); -+extern ARRAY(expected_uint4, uint, 16, 4); -+extern ARRAY(expected_nan, uint, 16, 4); -+extern ARRAY(expected_mnan, uint, 16, 4); -+extern ARRAY(expected_inf, uint, 16, 4); -+extern ARRAY(expected_minf, uint, 16, 4); -+extern ARRAY(expected_zero, uint, 16, 4); -+extern ARRAY(expected_mzero, uint, 16, 4); -+#endif -+ -+#define FNNAME1(NAME) exec_ ## NAME -+#define FNNAME(NAME) FNNAME1(NAME) -+ -+void FNNAME (INSN_NAME) (void) -+{ -+ /* Basic test: y=vcomp(x1,x2), then store the result. */ -+#define TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N) \ -+ VECT_VAR(vector_res, T3, W, N) = \ -+ INSN##Q##_##T2##W(VECT_VAR(vector, T1, W, N)); \ -+ vst1##Q##_u##W(VECT_VAR(result, T3, W, N), VECT_VAR(vector_res, T3, W, N)) -+ -+#define TEST_VCOMP(INSN, Q, T1, T2, T3, W, N) \ -+ TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N) -+ -+ /* No need for 64 bits elements. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE (vector, float, 16, 4); -+ DECL_VARIABLE (vector, float, 16, 8); -+#endif -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector_res, uint, 16, 4); -+ DECL_VARIABLE(vector_res, uint, 16, 8); -+#endif -+ -+ clean_results (); -+ -+ /* Choose init value arbitrarily, will be used for vector -+ comparison. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP (vector, , float, f, 16, 4, -15.0f); -+ VDUP (vector, q, float, f, 16, 8, 14.0f); -+#endif -+ -+ /* Float variants. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); -+ TEST_VCOMP (INSN_NAME, q, float, f, uint, 16, 8); -+#endif -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_float, ""); -+ CHECK (TEST_MSG, uint, 16, 8, PRIx16, expected_q_float, ""); -+#endif -+ -+ /* Extra FP tests with special values (NaN, ....). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP (vector, , float, f, 16, 4, NAN); -+ TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); -+ CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_nan, "FP special (NaN)"); -+ -+ VDUP (vector, , float, f, 16, 4, -NAN); -+ TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); -+ CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_mnan, " FP special (-NaN)"); -+ -+ VDUP (vector, , float, f, 16, 4, HUGE_VALF); -+ TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); -+ CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_inf, " FP special (inf)"); -+ -+ VDUP (vector, , float, f, 16, 4, -HUGE_VALF); -+ TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); -+ CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_minf, " FP special (-inf)"); -+ -+ VDUP (vector, , float, f, 16, 4, 0.0); -+ TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); -+ CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_zero, " FP special (0.0)"); -+ -+ VDUP (vector, , float, f, 16, 4, 0.0); -+ TEST_VCOMP (INSN_NAME, , float, f, uint, 16, 4); -+ CHECK (TEST_MSG, uint, 16, 4, PRIx16, expected_mzero, " FP special (-0.0)"); -+#endif -+ -+#ifdef EXTRA_TESTS -+ EXTRA_TESTS(); -+#endif -+} -+ -+int main (void) -+{ -+ FNNAME (INSN_NAME) (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/compute-ref-data.h -@@ -118,6 +118,10 @@ VECT_VAR_DECL_INIT(buffer, uint, 32, 2); - PAD(buffer_pad, uint, 32, 2); - VECT_VAR_DECL_INIT(buffer, uint, 64, 1); - PAD(buffer_pad, uint, 64, 1); -+#if defined (__ARM_FEATURE_CRYPTO) -+VECT_VAR_DECL_INIT(buffer, poly, 64, 1); -+PAD(buffer_pad, poly, 64, 1); -+#endif - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) - VECT_VAR_DECL_INIT(buffer, float, 16, 4); - PAD(buffer_pad, float, 16, 4); -@@ -144,6 +148,10 @@ VECT_VAR_DECL_INIT(buffer, poly, 8, 16); - PAD(buffer_pad, poly, 8, 16); - VECT_VAR_DECL_INIT(buffer, poly, 16, 8); - PAD(buffer_pad, poly, 16, 8); -+#if defined (__ARM_FEATURE_CRYPTO) -+VECT_VAR_DECL_INIT(buffer, poly, 64, 2); -+PAD(buffer_pad, poly, 64, 2); -+#endif - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) - VECT_VAR_DECL_INIT(buffer, float, 16, 8); - PAD(buffer_pad, float, 16, 8); -@@ -178,6 +186,10 @@ VECT_VAR_DECL_INIT(buffer_dup, poly, 8, 8); - VECT_VAR_DECL(buffer_dup_pad, poly, 8, 8); - VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 4); - VECT_VAR_DECL(buffer_dup_pad, poly, 16, 4); -+#if defined (__ARM_FEATURE_CRYPTO) -+VECT_VAR_DECL_INIT4(buffer_dup, poly, 64, 1); -+VECT_VAR_DECL(buffer_dup_pad, poly, 64, 1); -+#endif - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) - VECT_VAR_DECL_INIT4(buffer_dup, float, 16, 4); - VECT_VAR_DECL(buffer_dup_pad, float, 16, 4); -@@ -205,6 +217,10 @@ VECT_VAR_DECL_INIT(buffer_dup, poly, 8, 16); - VECT_VAR_DECL(buffer_dup_pad, poly, 8, 16); - VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 8); - VECT_VAR_DECL(buffer_dup_pad, poly, 16, 8); -+#if defined (__ARM_FEATURE_CRYPTO) -+VECT_VAR_DECL_INIT4(buffer_dup, poly, 64, 2); -+VECT_VAR_DECL(buffer_dup_pad, poly, 64, 2); -+#endif - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) - VECT_VAR_DECL_INIT(buffer_dup, float, 16, 8); - VECT_VAR_DECL(buffer_dup_pad, float, 16, 8); ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/p64_p128.c -@@ -0,0 +1,1024 @@ -+/* This file contains tests for all the *p64 intrinsics, except for -+ vreinterpret which have their own testcase. */ -+ -+/* { dg-require-effective-target arm_crypto_ok { target { arm*-*-* } } } */ -+/* { dg-add-options arm_crypto } */ -+/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+/* Expected results: vbsl. */ -+VECT_VAR_DECL(vbsl_expected,poly,64,1) [] = { 0xfffffff1 }; -+VECT_VAR_DECL(vbsl_expected,poly,64,2) [] = { 0xfffffff1, -+ 0xfffffff1 }; -+ -+/* Expected results: vceq. */ -+VECT_VAR_DECL(vceq_expected,uint,64,1) [] = { 0x0 }; -+ -+/* Expected results: vcombine. */ -+VECT_VAR_DECL(vcombine_expected,poly,64,2) [] = { 0xfffffffffffffff0, 0x88 }; -+ -+/* Expected results: vcreate. */ -+VECT_VAR_DECL(vcreate_expected,poly,64,1) [] = { 0x123456789abcdef0 }; -+ -+/* Expected results: vdup_lane. */ -+VECT_VAR_DECL(vdup_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(vdup_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0, -+ 0xfffffffffffffff0 }; -+ -+/* Expected results: vdup_n. */ -+VECT_VAR_DECL(vdup_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(vdup_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0, -+ 0xfffffffffffffff0 }; -+VECT_VAR_DECL(vdup_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 }; -+VECT_VAR_DECL(vdup_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1, -+ 0xfffffffffffffff1 }; -+VECT_VAR_DECL(vdup_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 }; -+VECT_VAR_DECL(vdup_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2, -+ 0xfffffffffffffff2 }; -+ -+/* Expected results: vmov_n. */ -+VECT_VAR_DECL(vmov_n_expected0,poly,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(vmov_n_expected0,poly,64,2) [] = { 0xfffffffffffffff0, -+ 0xfffffffffffffff0 }; -+VECT_VAR_DECL(vmov_n_expected1,poly,64,1) [] = { 0xfffffffffffffff1 }; -+VECT_VAR_DECL(vmov_n_expected1,poly,64,2) [] = { 0xfffffffffffffff1, -+ 0xfffffffffffffff1 }; -+VECT_VAR_DECL(vmov_n_expected2,poly,64,1) [] = { 0xfffffffffffffff2 }; -+VECT_VAR_DECL(vmov_n_expected2,poly,64,2) [] = { 0xfffffffffffffff2, -+ 0xfffffffffffffff2 }; -+ -+/* Expected results: vext. */ -+VECT_VAR_DECL(vext_expected,poly,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(vext_expected,poly,64,2) [] = { 0xfffffffffffffff1, 0x88 }; -+ -+/* Expected results: vget_low. */ -+VECT_VAR_DECL(vget_low_expected,poly,64,1) [] = { 0xfffffffffffffff0 }; -+ -+/* Expected results: vget_high. */ -+VECT_VAR_DECL(vget_high_expected,poly,64,1) [] = { 0xfffffffffffffff1 }; -+ -+/* Expected results: vld1. */ -+VECT_VAR_DECL(vld1_expected,poly,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(vld1_expected,poly,64,2) [] = { 0xfffffffffffffff0, -+ 0xfffffffffffffff1 }; -+ -+/* Expected results: vld1_dup. */ -+VECT_VAR_DECL(vld1_dup_expected0,poly,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(vld1_dup_expected0,poly,64,2) [] = { 0xfffffffffffffff0, -+ 0xfffffffffffffff0 }; -+VECT_VAR_DECL(vld1_dup_expected1,poly,64,1) [] = { 0xfffffffffffffff1 }; -+VECT_VAR_DECL(vld1_dup_expected1,poly,64,2) [] = { 0xfffffffffffffff1, -+ 0xfffffffffffffff1 }; -+VECT_VAR_DECL(vld1_dup_expected2,poly,64,1) [] = { 0xfffffffffffffff2 }; -+VECT_VAR_DECL(vld1_dup_expected2,poly,64,2) [] = { 0xfffffffffffffff2, -+ 0xfffffffffffffff2 }; -+ -+/* Expected results: vld1_lane. */ -+VECT_VAR_DECL(vld1_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(vld1_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0, -+ 0xaaaaaaaaaaaaaaaa }; -+ -+/* Expected results: vldX. */ -+VECT_VAR_DECL(vld2_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(vld2_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 }; -+VECT_VAR_DECL(vld3_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(vld3_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 }; -+VECT_VAR_DECL(vld3_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 }; -+VECT_VAR_DECL(vld4_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(vld4_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 }; -+VECT_VAR_DECL(vld4_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 }; -+VECT_VAR_DECL(vld4_expected_3,poly,64,1) [] = { 0xfffffffffffffff3 }; -+ -+/* Expected results: vldX_dup. */ -+VECT_VAR_DECL(vld2_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(vld2_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 }; -+VECT_VAR_DECL(vld3_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(vld3_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 }; -+VECT_VAR_DECL(vld3_dup_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 }; -+VECT_VAR_DECL(vld4_dup_expected_0,poly,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(vld4_dup_expected_1,poly,64,1) [] = { 0xfffffffffffffff1 }; -+VECT_VAR_DECL(vld4_dup_expected_2,poly,64,1) [] = { 0xfffffffffffffff2 }; -+VECT_VAR_DECL(vld4_dup_expected_3,poly,64,1) [] = { 0xfffffffffffffff3 }; -+ -+/* Expected results: vsli. */ -+VECT_VAR_DECL(vsli_expected,poly,64,1) [] = { 0x10 }; -+VECT_VAR_DECL(vsli_expected,poly,64,2) [] = { 0x7ffffffffffff0, -+ 0x7ffffffffffff1 }; -+VECT_VAR_DECL(vsli_expected_max_shift,poly,64,1) [] = { 0x7ffffffffffffff0 }; -+VECT_VAR_DECL(vsli_expected_max_shift,poly,64,2) [] = { 0xfffffffffffffff0, -+ 0xfffffffffffffff1 }; -+ -+/* Expected results: vsri. */ -+VECT_VAR_DECL(vsri_expected,poly,64,1) [] = { 0xe000000000000000 }; -+VECT_VAR_DECL(vsri_expected,poly,64,2) [] = { 0xfffffffffffff800, -+ 0xfffffffffffff800 }; -+VECT_VAR_DECL(vsri_expected_max_shift,poly,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(vsri_expected_max_shift,poly,64,2) [] = { 0xfffffffffffffff0, -+ 0xfffffffffffffff1 }; -+ -+/* Expected results: vst1_lane. */ -+VECT_VAR_DECL(vst1_lane_expected,poly,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(vst1_lane_expected,poly,64,2) [] = { 0xfffffffffffffff0, -+ 0x3333333333333333 }; -+ -+/* Expected results: vldX_lane. */ -+VECT_VAR_DECL(expected_vld_st2_0,poly,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(expected_vld_st2_0,poly,64,2) [] = { 0xfffffffffffffff0, -+ 0xfffffffffffffff1 }; -+VECT_VAR_DECL(expected_vld_st2_1,poly,64,1) [] = { 0xfffffffffffffff1 }; -+VECT_VAR_DECL(expected_vld_st2_1,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa, -+ 0xaaaaaaaaaaaaaaaa }; -+VECT_VAR_DECL(expected_vld_st3_0,poly,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(expected_vld_st3_0,poly,64,2) [] = { 0xfffffffffffffff0, -+ 0xfffffffffffffff1 }; -+VECT_VAR_DECL(expected_vld_st3_1,poly,64,1) [] = { 0xfffffffffffffff1 }; -+VECT_VAR_DECL(expected_vld_st3_1,poly,64,2) [] = { 0xfffffffffffffff2, -+ 0xaaaaaaaaaaaaaaaa }; -+VECT_VAR_DECL(expected_vld_st3_2,poly,64,1) [] = { 0xfffffffffffffff2 }; -+VECT_VAR_DECL(expected_vld_st3_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa, -+ 0xaaaaaaaaaaaaaaaa }; -+VECT_VAR_DECL(expected_vld_st4_0,poly,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(expected_vld_st4_0,poly,64,2) [] = { 0xfffffffffffffff0, -+ 0xfffffffffffffff1 }; -+VECT_VAR_DECL(expected_vld_st4_1,poly,64,1) [] = { 0xfffffffffffffff1 }; -+VECT_VAR_DECL(expected_vld_st4_1,poly,64,2) [] = { 0xfffffffffffffff2, -+ 0xfffffffffffffff3 }; -+VECT_VAR_DECL(expected_vld_st4_2,poly,64,1) [] = { 0xfffffffffffffff2 }; -+VECT_VAR_DECL(expected_vld_st4_2,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa, -+ 0xaaaaaaaaaaaaaaaa }; -+VECT_VAR_DECL(expected_vld_st4_3,poly,64,1) [] = { 0xfffffffffffffff3 }; -+VECT_VAR_DECL(expected_vld_st4_3,poly,64,2) [] = { 0xaaaaaaaaaaaaaaaa, -+ 0xaaaaaaaaaaaaaaaa }; -+ -+/* Expected results: vget_lane. */ -+VECT_VAR_DECL(vget_lane_expected,poly,64,1) = 0xfffffffffffffff0; -+VECT_VAR_DECL(vget_lane_expected,poly,64,2) = 0xfffffffffffffff0; -+ -+int main (void) -+{ -+ int i; -+ -+ /* vbsl_p64 tests. */ -+#define TEST_MSG "VBSL/VBSLQ" -+ -+#define TEST_VBSL(T3, Q, T1, T2, W, N) \ -+ VECT_VAR(vbsl_vector_res, T1, W, N) = \ -+ vbsl##Q##_##T2##W(VECT_VAR(vbsl_vector_first, T3, W, N), \ -+ VECT_VAR(vbsl_vector, T1, W, N), \ -+ VECT_VAR(vbsl_vector2, T1, W, N)); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vbsl_vector_res, T1, W, N)) -+ -+ DECL_VARIABLE(vbsl_vector, poly, 64, 1); -+ DECL_VARIABLE(vbsl_vector, poly, 64, 2); -+ DECL_VARIABLE(vbsl_vector2, poly, 64, 1); -+ DECL_VARIABLE(vbsl_vector2, poly, 64, 2); -+ DECL_VARIABLE(vbsl_vector_res, poly, 64, 1); -+ DECL_VARIABLE(vbsl_vector_res, poly, 64, 2); -+ -+ DECL_VARIABLE(vbsl_vector_first, uint, 64, 1); -+ DECL_VARIABLE(vbsl_vector_first, uint, 64, 2); -+ -+ CLEAN(result, poly, 64, 1); -+ CLEAN(result, poly, 64, 2); -+ -+ VLOAD(vbsl_vector, buffer, , poly, p, 64, 1); -+ VLOAD(vbsl_vector, buffer, q, poly, p, 64, 2); -+ -+ VDUP(vbsl_vector2, , poly, p, 64, 1, 0xFFFFFFF3); -+ VDUP(vbsl_vector2, q, poly, p, 64, 2, 0xFFFFFFF3); -+ -+ VDUP(vbsl_vector_first, , uint, u, 64, 1, 0xFFFFFFF2); -+ VDUP(vbsl_vector_first, q, uint, u, 64, 2, 0xFFFFFFF2); -+ -+ TEST_VBSL(uint, , poly, p, 64, 1); -+ TEST_VBSL(uint, q, poly, p, 64, 2); -+ -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vbsl_expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vbsl_expected, ""); -+ -+ /* vceq_p64 tests. */ -+#undef TEST_MSG -+#define TEST_MSG "VCEQ" -+ -+#define TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N) \ -+ VECT_VAR(vceq_vector_res, T3, W, N) = \ -+ INSN##Q##_##T2##W(VECT_VAR(vceq_vector, T1, W, N), \ -+ VECT_VAR(vceq_vector2, T1, W, N)); \ -+ vst1##Q##_u##W(VECT_VAR(result, T3, W, N), VECT_VAR(vceq_vector_res, T3, W, N)) -+ -+#define TEST_VCOMP(INSN, Q, T1, T2, T3, W, N) \ -+ TEST_VCOMP1(INSN, Q, T1, T2, T3, W, N) -+ -+ DECL_VARIABLE(vceq_vector, poly, 64, 1); -+ DECL_VARIABLE(vceq_vector2, poly, 64, 1); -+ DECL_VARIABLE(vceq_vector_res, uint, 64, 1); -+ -+ CLEAN(result, uint, 64, 1); -+ -+ VLOAD(vceq_vector, buffer, , poly, p, 64, 1); -+ -+ VDUP(vceq_vector2, , poly, p, 64, 1, 0x88); -+ -+ TEST_VCOMP(vceq, , poly, p, uint, 64, 1); -+ -+ CHECK(TEST_MSG, uint, 64, 1, PRIx64, vceq_expected, ""); -+ -+ /* vcombine_p64 tests. */ -+#undef TEST_MSG -+#define TEST_MSG "VCOMBINE" -+ -+#define TEST_VCOMBINE(T1, T2, W, N, N2) \ -+ VECT_VAR(vcombine_vector128, T1, W, N2) = \ -+ vcombine_##T2##W(VECT_VAR(vcombine_vector64_a, T1, W, N), \ -+ VECT_VAR(vcombine_vector64_b, T1, W, N)); \ -+ vst1q_##T2##W(VECT_VAR(result, T1, W, N2), VECT_VAR(vcombine_vector128, T1, W, N2)) -+ -+ DECL_VARIABLE(vcombine_vector64_a, poly, 64, 1); -+ DECL_VARIABLE(vcombine_vector64_b, poly, 64, 1); -+ DECL_VARIABLE(vcombine_vector128, poly, 64, 2); -+ -+ CLEAN(result, poly, 64, 2); -+ -+ VLOAD(vcombine_vector64_a, buffer, , poly, p, 64, 1); -+ -+ VDUP(vcombine_vector64_b, , poly, p, 64, 1, 0x88); -+ -+ TEST_VCOMBINE(poly, p, 64, 1, 2); -+ -+ CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vcombine_expected, ""); -+ -+ /* vcreate_p64 tests. */ -+#undef TEST_MSG -+#define TEST_MSG "VCREATE" -+ -+#define TEST_VCREATE(T1, T2, W, N) \ -+ VECT_VAR(vcreate_vector_res, T1, W, N) = \ -+ vcreate_##T2##W(VECT_VAR(vcreate_val, T1, W, N)); \ -+ vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vcreate_vector_res, T1, W, N)) -+ -+#define DECL_VAL(VAR, T1, W, N) \ -+ uint64_t VECT_VAR(VAR, T1, W, N) -+ -+ DECL_VAL(vcreate_val, poly, 64, 1); -+ DECL_VARIABLE(vcreate_vector_res, poly, 64, 1); -+ -+ CLEAN(result, poly, 64, 2); -+ -+ VECT_VAR(vcreate_val, poly, 64, 1) = 0x123456789abcdef0ULL; -+ -+ TEST_VCREATE(poly, p, 64, 1); -+ -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vcreate_expected, ""); -+ -+ /* vdup_lane_p64 tests. */ -+#undef TEST_MSG -+#define TEST_MSG "VDUP_LANE/VDUP_LANEQ" -+ -+#define TEST_VDUP_LANE(Q, T1, T2, W, N, N2, L) \ -+ VECT_VAR(vdup_lane_vector_res, T1, W, N) = \ -+ vdup##Q##_lane_##T2##W(VECT_VAR(vdup_lane_vector, T1, W, N2), L); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vdup_lane_vector_res, T1, W, N)) -+ -+ DECL_VARIABLE(vdup_lane_vector, poly, 64, 1); -+ DECL_VARIABLE(vdup_lane_vector, poly, 64, 2); -+ DECL_VARIABLE(vdup_lane_vector_res, poly, 64, 1); -+ DECL_VARIABLE(vdup_lane_vector_res, poly, 64, 2); -+ -+ CLEAN(result, poly, 64, 1); -+ CLEAN(result, poly, 64, 2); -+ -+ VLOAD(vdup_lane_vector, buffer, , poly, p, 64, 1); -+ -+ TEST_VDUP_LANE(, poly, p, 64, 1, 1, 0); -+ TEST_VDUP_LANE(q, poly, p, 64, 2, 1, 0); -+ -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vdup_lane_expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vdup_lane_expected, ""); -+ -+ /* vdup_n_p64 tests. */ -+#undef TEST_MSG -+#define TEST_MSG "VDUP/VDUPQ" -+ -+#define TEST_VDUP(Q, T1, T2, W, N) \ -+ VECT_VAR(vdup_n_vector, T1, W, N) = \ -+ vdup##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vdup_n_vector, T1, W, N)) -+ -+ DECL_VARIABLE(vdup_n_vector, poly, 64, 1); -+ DECL_VARIABLE(vdup_n_vector, poly, 64, 2); -+ -+ /* Try to read different places from the input buffer. */ -+ for (i=0; i< 3; i++) { -+ CLEAN(result, poly, 64, 1); -+ CLEAN(result, poly, 64, 2); -+ -+ TEST_VDUP(, poly, p, 64, 1); -+ TEST_VDUP(q, poly, p, 64, 2); -+ -+ switch (i) { -+ case 0: -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vdup_n_expected0, ""); -+ CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vdup_n_expected0, ""); -+ break; -+ case 1: -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vdup_n_expected1, ""); -+ CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vdup_n_expected1, ""); -+ break; -+ case 2: -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vdup_n_expected2, ""); -+ CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vdup_n_expected2, ""); -+ break; -+ default: -+ abort(); -+ } -+ } -+ -+ /* vexit_p64 tests. */ -+#undef TEST_MSG -+#define TEST_MSG "VEXT/VEXTQ" -+ -+#define TEST_VEXT(Q, T1, T2, W, N, V) \ -+ VECT_VAR(vext_vector_res, T1, W, N) = \ -+ vext##Q##_##T2##W(VECT_VAR(vext_vector1, T1, W, N), \ -+ VECT_VAR(vext_vector2, T1, W, N), \ -+ V); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vext_vector_res, T1, W, N)) -+ -+ DECL_VARIABLE(vext_vector1, poly, 64, 1); -+ DECL_VARIABLE(vext_vector1, poly, 64, 2); -+ DECL_VARIABLE(vext_vector2, poly, 64, 1); -+ DECL_VARIABLE(vext_vector2, poly, 64, 2); -+ DECL_VARIABLE(vext_vector_res, poly, 64, 1); -+ DECL_VARIABLE(vext_vector_res, poly, 64, 2); -+ -+ CLEAN(result, poly, 64, 1); -+ CLEAN(result, poly, 64, 2); -+ -+ VLOAD(vext_vector1, buffer, , poly, p, 64, 1); -+ VLOAD(vext_vector1, buffer, q, poly, p, 64, 2); -+ -+ VDUP(vext_vector2, , poly, p, 64, 1, 0x88); -+ VDUP(vext_vector2, q, poly, p, 64, 2, 0x88); -+ -+ TEST_VEXT(, poly, p, 64, 1, 0); -+ TEST_VEXT(q, poly, p, 64, 2, 1); -+ -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vext_expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vext_expected, ""); -+ -+ /* vget_low_p64 tests. */ -+#undef TEST_MSG -+#define TEST_MSG "VGET_LOW" -+ -+#define TEST_VGET_LOW(T1, T2, W, N, N2) \ -+ VECT_VAR(vget_low_vector64, T1, W, N) = \ -+ vget_low_##T2##W(VECT_VAR(vget_low_vector128, T1, W, N2)); \ -+ vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vget_low_vector64, T1, W, N)) -+ -+ DECL_VARIABLE(vget_low_vector64, poly, 64, 1); -+ DECL_VARIABLE(vget_low_vector128, poly, 64, 2); -+ -+ CLEAN(result, poly, 64, 1); -+ -+ VLOAD(vget_low_vector128, buffer, q, poly, p, 64, 2); -+ -+ TEST_VGET_LOW(poly, p, 64, 1, 2); -+ -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vget_low_expected, ""); -+ -+ /* vget_high_p64 tests. */ -+#undef TEST_MSG -+#define TEST_MSG "VGET_HIGH" -+ -+#define TEST_VGET_HIGH(T1, T2, W, N, N2) \ -+ VECT_VAR(vget_high_vector64, T1, W, N) = \ -+ vget_high_##T2##W(VECT_VAR(vget_high_vector128, T1, W, N2)); \ -+ vst1_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vget_high_vector64, T1, W, N)) -+ -+ DECL_VARIABLE(vget_high_vector64, poly, 64, 1); -+ DECL_VARIABLE(vget_high_vector128, poly, 64, 2); -+ -+ CLEAN(result, poly, 64, 1); -+ -+ VLOAD(vget_high_vector128, buffer, q, poly, p, 64, 2); -+ -+ TEST_VGET_HIGH(poly, p, 64, 1, 2); -+ -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vget_high_expected, ""); -+ -+ /* vld1_p64 tests. */ -+#undef TEST_MSG -+#define TEST_MSG "VLD1/VLD1Q" -+ -+#define TEST_VLD1(VAR, BUF, Q, T1, T2, W, N) \ -+ VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N)); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N)) -+ -+ DECL_VARIABLE(vld1_vector, poly, 64, 1); -+ DECL_VARIABLE(vld1_vector, poly, 64, 2); -+ -+ CLEAN(result, poly, 64, 1); -+ CLEAN(result, poly, 64, 2); -+ -+ VLOAD(vld1_vector, buffer, , poly, p, 64, 1); -+ VLOAD(vld1_vector, buffer, q, poly, p, 64, 2); -+ -+ TEST_VLD1(vld1_vector, buffer, , poly, p, 64, 1); -+ TEST_VLD1(vld1_vector, buffer, q, poly, p, 64, 2); -+ -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld1_expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vld1_expected, ""); -+ -+ /* vld1_dup_p64 tests. */ -+#undef TEST_MSG -+#define TEST_MSG "VLD1_DUP/VLD1_DUPQ" -+ -+#define TEST_VLD1_DUP(VAR, BUF, Q, T1, T2, W, N) \ -+ VECT_VAR(VAR, T1, W, N) = \ -+ vld1##Q##_dup_##T2##W(&VECT_VAR(BUF, T1, W, N)[i]); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N)) -+ -+ DECL_VARIABLE(vld1_dup_vector, poly, 64, 1); -+ DECL_VARIABLE(vld1_dup_vector, poly, 64, 2); -+ -+ /* Try to read different places from the input buffer. */ -+ for (i=0; i<3; i++) { -+ CLEAN(result, poly, 64, 1); -+ CLEAN(result, poly, 64, 2); -+ -+ TEST_VLD1_DUP(vld1_dup_vector, buffer_dup, , poly, p, 64, 1); -+ TEST_VLD1_DUP(vld1_dup_vector, buffer_dup, q, poly, p, 64, 2); -+ -+ switch (i) { -+ case 0: -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected0, ""); -+ CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected0, ""); -+ break; -+ case 1: -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected1, ""); -+ CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected1, ""); -+ break; -+ case 2: -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld1_dup_expected2, ""); -+ CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vld1_dup_expected2, ""); -+ break; -+ default: -+ abort(); -+ } -+ } -+ -+ /* vld1_lane_p64 tests. */ -+#undef TEST_MSG -+#define TEST_MSG "VLD1_LANE/VLD1_LANEQ" -+ -+#define TEST_VLD1_LANE(Q, T1, T2, W, N, L) \ -+ memset (VECT_VAR(vld1_lane_buffer_src, T1, W, N), 0xAA, W/8*N); \ -+ VECT_VAR(vld1_lane_vector_src, T1, W, N) = \ -+ vld1##Q##_##T2##W(VECT_VAR(vld1_lane_buffer_src, T1, W, N)); \ -+ VECT_VAR(vld1_lane_vector, T1, W, N) = \ -+ vld1##Q##_lane_##T2##W(VECT_VAR(buffer, T1, W, N), \ -+ VECT_VAR(vld1_lane_vector_src, T1, W, N), L); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vld1_lane_vector, T1, W, N)) -+ -+ DECL_VARIABLE(vld1_lane_vector, poly, 64, 1); -+ DECL_VARIABLE(vld1_lane_vector, poly, 64, 2); -+ DECL_VARIABLE(vld1_lane_vector_src, poly, 64, 1); -+ DECL_VARIABLE(vld1_lane_vector_src, poly, 64, 2); -+ -+ ARRAY(vld1_lane_buffer_src, poly, 64, 1); -+ ARRAY(vld1_lane_buffer_src, poly, 64, 2); -+ -+ CLEAN(result, poly, 64, 1); -+ CLEAN(result, poly, 64, 2); -+ -+ TEST_VLD1_LANE(, poly, p, 64, 1, 0); -+ TEST_VLD1_LANE(q, poly, p, 64, 2, 0); -+ -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld1_lane_expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vld1_lane_expected, ""); -+ -+ /* vldX_p64 tests. */ -+#define DECL_VLDX(T1, W, N, X) \ -+ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vldX_vector, T1, W, N, X); \ -+ VECT_VAR_DECL(vldX_result_bis_##X, T1, W, N)[X * N] -+ -+#define TEST_VLDX(Q, T1, T2, W, N, X) \ -+ VECT_ARRAY_VAR(vldX_vector, T1, W, N, X) = \ -+ /* Use dedicated init buffer, of size X */ \ -+ vld##X##Q##_##T2##W(VECT_ARRAY_VAR(buffer_vld##X, T1, W, N, X)); \ -+ vst##X##Q##_##T2##W(VECT_VAR(vldX_result_bis_##X, T1, W, N), \ -+ VECT_ARRAY_VAR(vldX_vector, T1, W, N, X)); \ -+ memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(vldX_result_bis_##X, T1, W, N), \ -+ sizeof(VECT_VAR(result, T1, W, N))); -+ -+ /* Overwrite "result" with the contents of "result_bis"[Y]. */ -+#define TEST_EXTRA_CHUNK(T1, W, N, X,Y) \ -+ memcpy(VECT_VAR(result, T1, W, N), \ -+ &(VECT_VAR(vldX_result_bis_##X, T1, W, N)[Y*N]), \ -+ sizeof(VECT_VAR(result, T1, W, N))); -+ -+ DECL_VLDX(poly, 64, 1, 2); -+ DECL_VLDX(poly, 64, 1, 3); -+ DECL_VLDX(poly, 64, 1, 4); -+ -+ VECT_ARRAY_INIT2(buffer_vld2, poly, 64, 1); -+ PAD(buffer_vld2_pad, poly, 64, 1); -+ VECT_ARRAY_INIT3(buffer_vld3, poly, 64, 1); -+ PAD(buffer_vld3_pad, poly, 64, 1); -+ VECT_ARRAY_INIT4(buffer_vld4, poly, 64, 1); -+ PAD(buffer_vld4_pad, poly, 64, 1); -+ -+#undef TEST_MSG -+#define TEST_MSG "VLD2/VLD2Q" -+ CLEAN(result, poly, 64, 1); -+ TEST_VLDX(, poly, p, 64, 1, 2); -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld2_expected_0, "chunk 0"); -+ CLEAN(result, poly, 64, 1); -+ TEST_EXTRA_CHUNK(poly, 64, 1, 2, 1); -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld2_expected_1, "chunk 1"); -+ -+#undef TEST_MSG -+#define TEST_MSG "VLD3/VLD3Q" -+ CLEAN(result, poly, 64, 1); -+ TEST_VLDX(, poly, p, 64, 1, 3); -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_0, "chunk 0"); -+ CLEAN(result, poly, 64, 1); -+ TEST_EXTRA_CHUNK(poly, 64, 1, 3, 1); -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_1, "chunk 1"); -+ CLEAN(result, poly, 64, 1); -+ TEST_EXTRA_CHUNK(poly, 64, 1, 3, 2); -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld3_expected_2, "chunk 2"); -+ -+#undef TEST_MSG -+#define TEST_MSG "VLD4/VLD4Q" -+ CLEAN(result, poly, 64, 1); -+ TEST_VLDX(, poly, p, 64, 1, 4); -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_0, "chunk 0"); -+ CLEAN(result, poly, 64, 1); -+ TEST_EXTRA_CHUNK(poly, 64, 1, 4, 1); -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_1, "chunk 1"); -+ CLEAN(result, poly, 64, 1); -+ TEST_EXTRA_CHUNK(poly, 64, 1, 4, 2); -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_2, "chunk 2"); -+ CLEAN(result, poly, 64, 1); -+ TEST_EXTRA_CHUNK(poly, 64, 1, 4, 3); -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld4_expected_3, "chunk 3"); -+ -+ /* vldX_dup_p64 tests. */ -+#define DECL_VLDX_DUP(T1, W, N, X) \ -+ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vldX_dup_vector, T1, W, N, X); \ -+ VECT_VAR_DECL(vldX_dup_result_bis_##X, T1, W, N)[X * N] -+ -+#define TEST_VLDX_DUP(Q, T1, T2, W, N, X) \ -+ VECT_ARRAY_VAR(vldX_dup_vector, T1, W, N, X) = \ -+ vld##X##Q##_dup_##T2##W(&VECT_VAR(buffer_dup, T1, W, N)[0]); \ -+ \ -+ vst##X##Q##_##T2##W(VECT_VAR(vldX_dup_result_bis_##X, T1, W, N), \ -+ VECT_ARRAY_VAR(vldX_dup_vector, T1, W, N, X)); \ -+ memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(vldX_dup_result_bis_##X, T1, W, N), \ -+ sizeof(VECT_VAR(result, T1, W, N))); -+ -+ /* Overwrite "result" with the contents of "result_bis"[Y]. */ -+#define TEST_VLDX_DUP_EXTRA_CHUNK(T1, W, N, X,Y) \ -+ memcpy(VECT_VAR(result, T1, W, N), \ -+ &(VECT_VAR(vldX_dup_result_bis_##X, T1, W, N)[Y*N]), \ -+ sizeof(VECT_VAR(result, T1, W, N))); -+ -+ DECL_VLDX_DUP(poly, 64, 1, 2); -+ DECL_VLDX_DUP(poly, 64, 1, 3); -+ DECL_VLDX_DUP(poly, 64, 1, 4); -+ -+ -+#undef TEST_MSG -+#define TEST_MSG "VLD2_DUP/VLD2Q_DUP" -+ CLEAN(result, poly, 64, 1); -+ TEST_VLDX_DUP(, poly, p, 64, 1, 2); -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld2_dup_expected_0, "chunk 0"); -+ CLEAN(result, poly, 64, 1); -+ TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 2, 1); -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld2_dup_expected_1, "chunk 1"); -+ -+#undef TEST_MSG -+#define TEST_MSG "VLD3_DUP/VLD3Q_DUP" -+ CLEAN(result, poly, 64, 1); -+ TEST_VLDX_DUP(, poly, p, 64, 1, 3); -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_0, "chunk 0"); -+ CLEAN(result, poly, 64, 1); -+ TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 3, 1); -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_1, "chunk 1"); -+ CLEAN(result, poly, 64, 1); -+ TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 3, 2); -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld3_dup_expected_2, "chunk 2"); -+ -+#undef TEST_MSG -+#define TEST_MSG "VLD4_DUP/VLD4Q_DUP" -+ CLEAN(result, poly, 64, 1); -+ TEST_VLDX_DUP(, poly, p, 64, 1, 4); -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_0, "chunk 0"); -+ CLEAN(result, poly, 64, 1); -+ TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 4, 1); -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_1, "chunk 1"); -+ CLEAN(result, poly, 64, 1); -+ TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 4, 2); -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_2, "chunk 2"); -+ CLEAN(result, poly, 64, 1); -+ TEST_VLDX_DUP_EXTRA_CHUNK(poly, 64, 1, 4, 3); -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vld4_dup_expected_3, "chunk 3"); -+ -+ /* vsli_p64 tests. */ -+#undef TEST_MSG -+#define TEST_MSG "VSLI" -+ -+#define TEST_VSXI1(INSN, Q, T1, T2, W, N, V) \ -+ VECT_VAR(vsXi_vector_res, T1, W, N) = \ -+ INSN##Q##_n_##T2##W(VECT_VAR(vsXi_vector, T1, W, N), \ -+ VECT_VAR(vsXi_vector2, T1, W, N), \ -+ V); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vsXi_vector_res, T1, W, N)) -+ -+#define TEST_VSXI(INSN, Q, T1, T2, W, N, V) \ -+ TEST_VSXI1(INSN, Q, T1, T2, W, N, V) -+ -+ DECL_VARIABLE(vsXi_vector, poly, 64, 1); -+ DECL_VARIABLE(vsXi_vector, poly, 64, 2); -+ DECL_VARIABLE(vsXi_vector2, poly, 64, 1); -+ DECL_VARIABLE(vsXi_vector2, poly, 64, 2); -+ DECL_VARIABLE(vsXi_vector_res, poly, 64, 1); -+ DECL_VARIABLE(vsXi_vector_res, poly, 64, 2); -+ -+ CLEAN(result, poly, 64, 1); -+ CLEAN(result, poly, 64, 2); -+ -+ VLOAD(vsXi_vector, buffer, , poly, p, 64, 1); -+ VLOAD(vsXi_vector, buffer, q, poly, p, 64, 2); -+ -+ VDUP(vsXi_vector2, , poly, p, 64, 1, 2); -+ VDUP(vsXi_vector2, q, poly, p, 64, 2, 3); -+ -+ TEST_VSXI(vsli, , poly, p, 64, 1, 3); -+ TEST_VSXI(vsli, q, poly, p, 64, 2, 53); -+ -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vsli_expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vsli_expected, ""); -+ -+ /* Test cases with maximum shift amount. */ -+ CLEAN(result, poly, 64, 1); -+ CLEAN(result, poly, 64, 2); -+ -+ TEST_VSXI(vsli, , poly, p, 64, 1, 63); -+ TEST_VSXI(vsli, q, poly, p, 64, 2, 63); -+ -+#define COMMENT "(max shift amount)" -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vsli_expected_max_shift, COMMENT); -+ CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vsli_expected_max_shift, COMMENT); -+ -+ /* vsri_p64 tests. */ -+#undef TEST_MSG -+#define TEST_MSG "VSRI" -+ -+ CLEAN(result, poly, 64, 1); -+ CLEAN(result, poly, 64, 2); -+ -+ VLOAD(vsXi_vector, buffer, , poly, p, 64, 1); -+ VLOAD(vsXi_vector, buffer, q, poly, p, 64, 2); -+ -+ VDUP(vsXi_vector2, , poly, p, 64, 1, 2); -+ VDUP(vsXi_vector2, q, poly, p, 64, 2, 3); -+ -+ TEST_VSXI(vsri, , poly, p, 64, 1, 3); -+ TEST_VSXI(vsri, q, poly, p, 64, 2, 53); -+ -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vsri_expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vsri_expected, ""); -+ -+ /* Test cases with maximum shift amount. */ -+ CLEAN(result, poly, 64, 1); -+ CLEAN(result, poly, 64, 2); -+ -+ TEST_VSXI(vsri, , poly, p, 64, 1, 64); -+ TEST_VSXI(vsri, q, poly, p, 64, 2, 64); -+ -+#define COMMENT "(max shift amount)" -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vsri_expected_max_shift, COMMENT); -+ CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vsri_expected_max_shift, COMMENT); -+ -+ /* vst1_lane_p64 tests. */ -+#undef TEST_MSG -+#define TEST_MSG "VST1_LANE/VST1_LANEQ" -+ -+#define TEST_VST1_LANE(Q, T1, T2, W, N, L) \ -+ VECT_VAR(vst1_lane_vector, T1, W, N) = \ -+ vld1##Q##_##T2##W(VECT_VAR(buffer, T1, W, N)); \ -+ vst1##Q##_lane_##T2##W(VECT_VAR(result, T1, W, N), \ -+ VECT_VAR(vst1_lane_vector, T1, W, N), L); -+ -+ DECL_VARIABLE(vst1_lane_vector, poly, 64, 1); -+ DECL_VARIABLE(vst1_lane_vector, poly, 64, 2); -+ -+ CLEAN(result, poly, 64, 1); -+ CLEAN(result, poly, 64, 2); -+ -+ TEST_VST1_LANE(, poly, p, 64, 1, 0); -+ TEST_VST1_LANE(q, poly, p, 64, 2, 0); -+ -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vst1_lane_expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vst1_lane_expected, ""); -+ -+#ifdef __aarch64__ -+ -+ /* vmov_n_p64 tests. */ -+#undef TEST_MSG -+#define TEST_MSG "VMOV/VMOVQ" -+ -+#define TEST_VMOV(Q, T1, T2, W, N) \ -+ VECT_VAR(vmov_n_vector, T1, W, N) = \ -+ vmov##Q##_n_##T2##W(VECT_VAR(buffer_dup, T1, W, N)[i]); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vmov_n_vector, T1, W, N)) -+ -+ DECL_VARIABLE(vmov_n_vector, poly, 64, 1); -+ DECL_VARIABLE(vmov_n_vector, poly, 64, 2); -+ -+ /* Try to read different places from the input buffer. */ -+ for (i=0; i< 3; i++) { -+ CLEAN(result, poly, 64, 1); -+ CLEAN(result, poly, 64, 2); -+ -+ TEST_VMOV(, poly, p, 64, 1); -+ TEST_VMOV(q, poly, p, 64, 2); -+ -+ switch (i) { -+ case 0: -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected0, ""); -+ CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected0, ""); -+ break; -+ case 1: -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected1, ""); -+ CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected1, ""); -+ break; -+ case 2: -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, vmov_n_expected2, ""); -+ CHECK_POLY(TEST_MSG, poly, 64, 2, PRIx64, vmov_n_expected2, ""); -+ break; -+ default: -+ abort(); -+ } -+ } -+ -+ /* vget_lane_p64 tests. */ -+#undef TEST_MSG -+#define TEST_MSG "VGET_LANE/VGETQ_LANE" -+ -+#define TEST_VGET_LANE(Q, T1, T2, W, N, L) \ -+ VECT_VAR(vget_lane_vector, T1, W, N) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \ -+ if (VECT_VAR(vget_lane_vector, T1, W, N) != VECT_VAR(vget_lane_expected, T1, W, N)) { \ -+ fprintf(stderr, \ -+ "ERROR in %s (%s line %d in result '%s') at type %s " \ -+ "got 0x%" PRIx##W " != 0x%" PRIx##W "\n", \ -+ TEST_MSG, __FILE__, __LINE__, \ -+ STR(VECT_VAR(vget_lane_expected, T1, W, N)), \ -+ STR(VECT_NAME(T1, W, N)), \ -+ (uint##W##_t)VECT_VAR(vget_lane_vector, T1, W, N), \ -+ (uint##W##_t)VECT_VAR(vget_lane_expected, T1, W, N)); \ -+ abort (); \ -+ } -+ -+ /* Initialize input values. */ -+ DECL_VARIABLE(vector, poly, 64, 1); -+ DECL_VARIABLE(vector, poly, 64, 2); -+ -+ VLOAD(vector, buffer, , poly, p, 64, 1); -+ VLOAD(vector, buffer, q, poly, p, 64, 2); -+ -+ VECT_VAR_DECL(vget_lane_vector, poly, 64, 1); -+ VECT_VAR_DECL(vget_lane_vector, poly, 64, 2); -+ -+ TEST_VGET_LANE( , poly, p, 64, 1, 0); -+ TEST_VGET_LANE(q, poly, p, 64, 2, 0); -+ -+ /* vldx_lane_p64 tests. */ -+#undef TEST_MSG -+#define TEST_MSG "VLDX_LANE/VLDXQ_LANE" -+ -+VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 64, 2); -+VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 64, 3); -+VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 64, 4); -+ -+ /* In this case, input variables are arrays of vectors. */ -+#define DECL_VLD_STX_LANE(T1, W, N, X) \ -+ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \ -+ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X); \ -+ VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N] -+ -+ /* We need to use a temporary result buffer (result_bis), because -+ the one used for other tests is not large enough. A subset of the -+ result data is moved from result_bis to result, and it is this -+ subset which is used to check the actual behavior. The next -+ macro enables to move another chunk of data from result_bis to -+ result. */ -+ /* We also use another extra input buffer (buffer_src), which we -+ fill with 0xAA, and which it used to load a vector from which we -+ read a given lane. */ -+ -+#define TEST_VLDX_LANE(Q, T1, T2, W, N, X, L) \ -+ memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, \ -+ sizeof(VECT_VAR(buffer_src, T1, W, N))); \ -+ \ -+ VECT_ARRAY_VAR(vector_src, T1, W, N, X) = \ -+ vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \ -+ \ -+ VECT_ARRAY_VAR(vector, T1, W, N, X) = \ -+ /* Use dedicated init buffer, of size. X */ \ -+ vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \ -+ VECT_ARRAY_VAR(vector_src, T1, W, N, X), \ -+ L); \ -+ vst##X##Q##_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \ -+ VECT_ARRAY_VAR(vector, T1, W, N, X)); \ -+ memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \ -+ sizeof(VECT_VAR(result, T1, W, N))) -+ -+ /* Overwrite "result" with the contents of "result_bis"[Y]. */ -+#undef TEST_EXTRA_CHUNK -+#define TEST_EXTRA_CHUNK(T1, W, N, X, Y) \ -+ memcpy(VECT_VAR(result, T1, W, N), \ -+ &(VECT_VAR(result_bis_##X, T1, W, N)[Y*N]), \ -+ sizeof(VECT_VAR(result, T1, W, N))); -+ -+ /* Add some padding to try to catch out of bound accesses. */ -+#define ARRAY1(V, T, W, N) VECT_VAR_DECL(V,T,W,N)[1]={42} -+#define DUMMY_ARRAY(V, T, W, N, L) \ -+ VECT_VAR_DECL(V,T,W,N)[N*L]={0}; \ -+ ARRAY1(V##_pad,T,W,N) -+ -+#define DECL_ALL_VLD_STX_LANE(X) \ -+ DECL_VLD_STX_LANE(poly, 64, 1, X); \ -+ DECL_VLD_STX_LANE(poly, 64, 2, X); -+ -+#define TEST_ALL_VLDX_LANE(X) \ -+ TEST_VLDX_LANE(, poly, p, 64, 1, X, 0); \ -+ TEST_VLDX_LANE(q, poly, p, 64, 2, X, 0); -+ -+#define TEST_ALL_EXTRA_CHUNKS(X,Y) \ -+ TEST_EXTRA_CHUNK(poly, 64, 1, X, Y) \ -+ TEST_EXTRA_CHUNK(poly, 64, 2, X, Y) -+ -+#define CHECK_RESULTS_VLD_STX_LANE(test_name,EXPECTED,comment) \ -+ CHECK_POLY(test_name, poly, 64, 1, PRIx64, EXPECTED, comment); \ -+ CHECK_POLY(test_name, poly, 64, 2, PRIx64, EXPECTED, comment); -+ -+ /* Declare the temporary buffers / variables. */ -+ DECL_ALL_VLD_STX_LANE(2); -+ DECL_ALL_VLD_STX_LANE(3); -+ DECL_ALL_VLD_STX_LANE(4); -+ -+ DUMMY_ARRAY(buffer_src, poly, 64, 1, 4); -+ DUMMY_ARRAY(buffer_src, poly, 64, 2, 4); -+ -+ /* Check vld2_lane/vld2q_lane. */ -+ clean_results (); -+#undef TEST_MSG -+#define TEST_MSG "VLD2_LANE/VLD2Q_LANE" -+ TEST_ALL_VLDX_LANE(2); -+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st2_0, " chunk 0"); -+ -+ TEST_ALL_EXTRA_CHUNKS(2, 1); -+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st2_1, " chunk 1"); -+ -+ /* Check vld3_lane/vld3q_lane. */ -+ clean_results (); -+#undef TEST_MSG -+#define TEST_MSG "VLD3_LANE/VLD3Q_LANE" -+ TEST_ALL_VLDX_LANE(3); -+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_0, " chunk 0"); -+ -+ TEST_ALL_EXTRA_CHUNKS(3, 1); -+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_1, " chunk 1"); -+ -+ TEST_ALL_EXTRA_CHUNKS(3, 2); -+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st3_2, " chunk 2"); -+ -+ /* Check vld4_lane/vld4q_lane. */ -+ clean_results (); -+#undef TEST_MSG -+#define TEST_MSG "VLD4_LANE/VLD4Q_LANE" -+ TEST_ALL_VLDX_LANE(4); -+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_0, " chunk 0"); -+ -+ TEST_ALL_EXTRA_CHUNKS(4, 1); -+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_1, " chunk 1"); -+ -+ TEST_ALL_EXTRA_CHUNKS(4, 2); -+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_2, " chunk 2"); -+ -+ TEST_ALL_EXTRA_CHUNKS(4, 3); -+ CHECK_RESULTS_VLD_STX_LANE (TEST_MSG, expected_vld_st4_3, " chunk 3"); -+ -+ /* In this case, input variables are arrays of vectors. */ -+#define DECL_VSTX_LANE(T1, W, N, X) \ -+ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector, T1, W, N, X); \ -+ VECT_ARRAY_TYPE(T1, W, N, X) VECT_ARRAY_VAR(vector_src, T1, W, N, X); \ -+ VECT_VAR_DECL(result_bis_##X, T1, W, N)[X * N] -+ -+ /* We need to use a temporary result buffer (result_bis), because -+ the one used for other tests is not large enough. A subset of the -+ result data is moved from result_bis to result, and it is this -+ subset which is used to check the actual behavior. The next -+ macro enables to move another chunk of data from result_bis to -+ result. */ -+ /* We also use another extra input buffer (buffer_src), which we -+ fill with 0xAA, and which it used to load a vector from which we -+ read a given lane. */ -+#define TEST_VSTX_LANE(Q, T1, T2, W, N, X, L) \ -+ memset (VECT_VAR(buffer_src, T1, W, N), 0xAA, \ -+ sizeof(VECT_VAR(buffer_src, T1, W, N))); \ -+ memset (VECT_VAR(result_bis_##X, T1, W, N), 0, \ -+ sizeof(VECT_VAR(result_bis_##X, T1, W, N))); \ -+ \ -+ VECT_ARRAY_VAR(vector_src, T1, W, N, X) = \ -+ vld##X##Q##_##T2##W(VECT_VAR(buffer_src, T1, W, N)); \ -+ \ -+ VECT_ARRAY_VAR(vector, T1, W, N, X) = \ -+ /* Use dedicated init buffer, of size X. */ \ -+ vld##X##Q##_lane_##T2##W(VECT_VAR(buffer_vld##X##_lane, T1, W, X), \ -+ VECT_ARRAY_VAR(vector_src, T1, W, N, X), \ -+ L); \ -+ vst##X##Q##_lane_##T2##W(VECT_VAR(result_bis_##X, T1, W, N), \ -+ VECT_ARRAY_VAR(vector, T1, W, N, X), \ -+ L); \ -+ memcpy(VECT_VAR(result, T1, W, N), VECT_VAR(result_bis_##X, T1, W, N), \ -+ sizeof(VECT_VAR(result, T1, W, N))); -+ -+#define TEST_ALL_VSTX_LANE(X) \ -+ TEST_VSTX_LANE(, poly, p, 64, 1, X, 0); \ -+ TEST_VSTX_LANE(q, poly, p, 64, 2, X, 0); -+ -+ /* Check vst2_lane/vst2q_lane. */ -+ clean_results (); -+#undef TEST_MSG -+#define TEST_MSG "VST2_LANE/VST2Q_LANE" -+ TEST_ALL_VSTX_LANE(2); -+ -+#define CMT " (chunk 0)" -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st2_0, CMT); -+ -+ TEST_ALL_EXTRA_CHUNKS(2, 1); -+#undef CMT -+#define CMT " chunk 1" -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st2_1, CMT); -+ -+ /* Check vst3_lane/vst3q_lane. */ -+ clean_results (); -+#undef TEST_MSG -+#define TEST_MSG "VST3_LANE/VST3Q_LANE" -+ TEST_ALL_VSTX_LANE(3); -+ -+#undef CMT -+#define CMT " (chunk 0)" -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_0, CMT); -+ -+ TEST_ALL_EXTRA_CHUNKS(3, 1); -+ -+#undef CMT -+#define CMT " (chunk 1)" -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_1, CMT); -+ -+ TEST_ALL_EXTRA_CHUNKS(3, 2); -+ -+#undef CMT -+#define CMT " (chunk 2)" -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st3_2, CMT); -+ -+ /* Check vst4_lane/vst4q_lane. */ -+ clean_results (); -+#undef TEST_MSG -+#define TEST_MSG "VST4_LANE/VST4Q_LANE" -+ TEST_ALL_VSTX_LANE(4); -+ -+#undef CMT -+#define CMT " (chunk 0)" -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_0, CMT); -+ -+ TEST_ALL_EXTRA_CHUNKS(4, 1); -+ -+#undef CMT -+#define CMT " (chunk 1)" -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_1, CMT); -+ -+ TEST_ALL_EXTRA_CHUNKS(4, 2); -+ -+#undef CMT -+#define CMT " (chunk 2)" -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_2, CMT); -+ -+ TEST_ALL_EXTRA_CHUNKS(4, 3); -+ -+#undef CMT -+#define CMT " (chunk 3)" -+ CHECK_POLY(TEST_MSG, poly, 64, 1, PRIx64, expected_vld_st4_3, CMT); -+ -+#endif /* __aarch64__. */ -+ -+ return 0; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/ternary_scalar_op.inc -@@ -0,0 +1,206 @@ -+/* Template file for ternary scalar operator validation. -+ -+ This file is meant to be included by test files for binary scalar -+ operations. */ -+ -+/* Check for required settings. */ -+ -+#ifndef INSN_NAME -+#error INSN_NAME (the intrinsic to test) must be defined. -+#endif -+ -+#ifndef INPUT_TYPE -+#error INPUT_TYPE (basic type of an input value) must be defined. -+#endif -+ -+#ifndef OUTPUT_TYPE -+#error OUTPUT_TYPE (basic type of an output value) must be defined. -+#endif -+ -+#ifndef OUTPUT_TYPE_SIZE -+#error OUTPUT_TYPE_SIZE (size in bits of an output value) must be defined. -+#endif -+ -+/* Optional settings: -+ -+ INPUT_1: Input values for the first parameter. Must be of type INPUT_TYPE. -+ INPUT_2: Input values for the second parameter. Must be of type INPUT_TYPE. -+ INPUT_3: Input values for the third parameter. Must be of type -+ INPUT_TYPE. */ -+ -+#ifndef TEST_MSG -+#define TEST_MSG "unnamed test" -+#endif -+ -+/* The test framework. */ -+ -+#include <stdio.h> -+ -+extern void abort (); -+ -+#define INFF __builtin_inf () -+ -+/* Stringify a macro. */ -+#define STR0(A) #A -+#define STR(A) STR0 (A) -+ -+/* Macro concatenation. */ -+#define CAT0(A, B) A##B -+#define CAT(A, B) CAT0 (A, B) -+ -+/* Format strings for error reporting. */ -+#define FMT16 "0x%04x" -+#define FMT32 "0x%08x" -+#define FMT CAT (FMT,OUTPUT_TYPE_SIZE) -+ -+/* Type construction: forms TS_t, where T is the base type and S the size in -+ bits. */ -+#define MK_TYPE0(T, S) T##S##_t -+#define MK_TYPE(T, S) MK_TYPE0 (T, S) -+ -+/* Convenience types for input and output data. */ -+typedef MK_TYPE (uint, OUTPUT_TYPE_SIZE) output_hex_type; -+ -+/* Conversion between typed values and their hexadecimal representation. */ -+typedef union -+{ -+ OUTPUT_TYPE value; -+ output_hex_type hex; -+} output_conv_type; -+ -+/* Default input values. */ -+ -+float16_t input_1_float16_t[] = -+{ -+ 0.0, -+ -0.0, -+ 2.0, -+ 3.1, -+ 20.0, -+ 0.40, -+ -2.3, -+ 1.33, -+ -7.6, -+ 0.31, -+ 0.3353, -+ 0.5, -+ 1.0, -+ 13.13, -+ -6.3, -+ 20.0, -+ (float16_t)INFF, -+ (float16_t)-INFF, -+}; -+ -+float16_t input_2_float16_t[] = -+{ -+ 1.0, -+ 1.0, -+ -4.33, -+ 100.0, -+ 30.0, -+ -0.02, -+ 0.5, -+ -7.231, -+ -6.3, -+ 20.0, -+ -7.231, -+ 2.3, -+ -7.6, -+ 5.1, -+ 0.31, -+ 0.33353, -+ (float16_t)-INFF, -+ (float16_t)INFF, -+}; -+ -+float16_t input_3_float16_t[] = -+{ -+ -0.0, -+ 0.0, -+ 0.31, -+ -0.31, -+ 1.31, -+ 2.1, -+ -6.3, -+ 1.0, -+ -1.5, -+ 5.1, -+ 0.3353, -+ 9.3, -+ -9.3, -+ -7.231, -+ 0.5, -+ -0.33, -+ (float16_t)INFF, -+ (float16_t)INFF, -+}; -+ -+#ifndef INPUT_1 -+#define INPUT_1 CAT (input_1_,INPUT_TYPE) -+#endif -+ -+#ifndef INPUT_2 -+#define INPUT_2 CAT (input_2_,INPUT_TYPE) -+#endif -+ -+#ifndef INPUT_3 -+#define INPUT_3 CAT (input_3_,INPUT_TYPE) -+#endif -+ -+/* Support macros and routines for the test function. */ -+ -+#define CHECK() \ -+ { \ -+ output_conv_type actual; \ -+ output_conv_type expect; \ -+ \ -+ expect.hex = ((output_hex_type*)EXPECTED)[index]; \ -+ actual.value = INSN_NAME ((INPUT_1)[index], \ -+ (INPUT_2)[index], \ -+ (INPUT_3)[index]); \ -+ \ -+ if (actual.hex != expect.hex) \ -+ { \ -+ fprintf (stderr, \ -+ "ERROR in %s (%s line %d), buffer %s, " \ -+ "index %d: got " \ -+ FMT " != " FMT "\n", \ -+ TEST_MSG, __FILE__, __LINE__, \ -+ STR (EXPECTED), index, \ -+ actual.hex, expect.hex); \ -+ abort (); \ -+ } \ -+ fprintf (stderr, "CHECKED %s %s\n", \ -+ STR (EXPECTED), TEST_MSG); \ -+ } -+ -+#define FNNAME1(NAME) exec_ ## NAME -+#define FNNAME(NAME) FNNAME1 (NAME) -+ -+/* The test function. */ -+ -+void -+FNNAME (INSN_NAME) (void) -+{ -+ /* Basic test: y[i] = OP (x[i]), for each INPUT[i], then compare the result -+ against EXPECTED[i]. */ -+ -+ const int num_tests = sizeof (INPUT_1) / sizeof (INPUT_1[0]); -+ int index; -+ -+ for (index = 0; index < num_tests; index++) -+ CHECK (); -+ -+#ifdef EXTRA_TESTS -+ EXTRA_TESTS (); -+#endif -+} -+ -+int -+main (void) -+{ -+ FNNAME (INSN_NAME) (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_sat_op.inc -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_sat_op.inc -@@ -61,11 +61,11 @@ void FNNAME (INSN_NAME) (void) - TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 32, 4, expected_cumulative_sat, ""); - - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); -- CHECK(TEST_MSG, int, 16, 4, PRIx8, expected, ""); -- CHECK(TEST_MSG, int, 32, 2, PRIx8, expected, ""); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); - CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, ""); -- CHECK(TEST_MSG, int, 16, 8, PRIx8, expected, ""); -- CHECK(TEST_MSG, int, 32, 4, PRIx8, expected, ""); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); - - #ifdef EXTRA_TESTS - EXTRA_TESTS(); ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/unary_scalar_op.inc -@@ -0,0 +1,200 @@ -+/* Template file for unary scalar operator validation. -+ -+ This file is meant to be included by test files for unary scalar -+ operations. */ -+ -+/* Check for required settings. */ -+ -+#ifndef INSN_NAME -+#error INSN_NAME (the intrinsic to test) must be defined. -+#endif -+ -+#ifndef INPUT_TYPE -+#error INPUT_TYPE (basic type of an input value) must be defined. -+#endif -+ -+#ifndef SCALAR_OPERANDS -+#ifndef EXPECTED -+#error EXPECTED (an array of expected output values) must be defined. -+#endif -+#endif -+ -+#ifndef OUTPUT_TYPE -+#error OUTPUT_TYPE (basic type of an output value) must be defined. -+#endif -+ -+#ifndef OUTPUT_TYPE_SIZE -+#error OUTPUT_TYPE_SIZE (size in bits of an output value) must be defined. -+#endif -+ -+/* Optional settings. */ -+ -+/* SCALAR_OPERANDS: Defined iff the intrinsic has a scalar operand. -+ -+ SCALAR_1, SCALAR_2, .., SCALAR_4: If SCALAR_OPERANDS is defined, SCALAR_<n> -+ is the scalar and EXPECTED_<n> is array of expected values. -+ -+ INPUT: Input values for the first parameter. Must be of type INPUT_TYPE. */ -+ -+/* Additional comments for the error message. */ -+#ifndef COMMENT -+#define COMMENT "" -+#endif -+ -+#ifndef TEST_MSG -+#define TEST_MSG "unnamed test" -+#endif -+ -+/* The test framework. */ -+ -+#include <stdio.h> -+ -+extern void abort (); -+ -+#define INFF __builtin_inf () -+ -+/* Stringify a macro. */ -+#define STR0(A) #A -+#define STR(A) STR0 (A) -+ -+/* Macro concatenation. */ -+#define CAT0(A, B) A##B -+#define CAT(A, B) CAT0 (A, B) -+ -+/* Format strings for error reporting. */ -+#define FMT16 "0x%04x" -+#define FMT32 "0x%08x" -+#define FMT64 "0x%016x" -+#define FMT CAT (FMT,OUTPUT_TYPE_SIZE) -+ -+/* Type construction: forms TS_t, where T is the base type and S the size in -+ bits. */ -+#define MK_TYPE0(T, S) T##S##_t -+#define MK_TYPE(T, S) MK_TYPE0 (T, S) -+ -+/* Convenience types for input and output data. */ -+typedef MK_TYPE (uint, OUTPUT_TYPE_SIZE) output_hex_type; -+ -+/* Conversion between typed values and their hexadecimal representation. */ -+typedef union -+{ -+ OUTPUT_TYPE value; -+ output_hex_type hex; -+} output_conv_type; -+ -+/* Default input values. */ -+ -+float16_t input_1_float16_t[] = -+{ -+ 0.0, -0.0, -+ 2.0, 3.1, -+ 20.0, 0.40, -+ -2.3, 1.33, -+ -7.6, 0.31, -+ 0.3353, 0.5, -+ 1.0, 13.13, -+ -6.3, 20.0, -+ (float16_t)INFF, (float16_t)-INFF, -+}; -+ -+#ifndef INPUT -+#define INPUT CAT(input_1_,INPUT_TYPE) -+#endif -+ -+/* Support macros and routines for the test function. */ -+ -+#define CHECK() \ -+ { \ -+ output_conv_type actual; \ -+ output_conv_type expect; \ -+ \ -+ expect.hex = ((output_hex_type*)EXPECTED)[index]; \ -+ actual.value = INSN_NAME ((INPUT)[index]); \ -+ \ -+ if (actual.hex != expect.hex) \ -+ { \ -+ fprintf (stderr, \ -+ "ERROR in %s (%s line %d), buffer %s, " \ -+ "index %d: got " \ -+ FMT " != " FMT "\n", \ -+ TEST_MSG, __FILE__, __LINE__, \ -+ STR (EXPECTED), index, \ -+ actual.hex, expect.hex); \ -+ abort (); \ -+ } \ -+ fprintf (stderr, "CHECKED %s %s\n", \ -+ STR (EXPECTED), TEST_MSG); \ -+ } -+ -+#define CHECK_N(SCALAR, EXPECTED) \ -+ { \ -+ output_conv_type actual; \ -+ output_conv_type expect; \ -+ \ -+ expect.hex \ -+ = ((output_hex_type*)EXPECTED)[index]; \ -+ actual.value = INSN_NAME ((INPUT)[index], (SCALAR)); \ -+ \ -+ if (actual.hex != expect.hex) \ -+ { \ -+ fprintf (stderr, \ -+ "ERROR in %s (%s line %d), buffer %s, " \ -+ "index %d: got " \ -+ FMT " != " FMT "\n", \ -+ TEST_MSG, __FILE__, __LINE__, \ -+ STR (EXPECTED), index, \ -+ actual.hex, expect.hex); \ -+ abort (); \ -+ } \ -+ fprintf (stderr, "CHECKED %s %s\n", \ -+ STR (EXPECTED), TEST_MSG); \ -+ } -+ -+#define FNNAME1(NAME) exec_ ## NAME -+#define FNNAME(NAME) FNNAME1 (NAME) -+ -+/* The test function. */ -+ -+void -+FNNAME (INSN_NAME) (void) -+{ -+ /* Basic test: y[i] = OP (x[i]), for each INPUT[i], then compare the result -+ against EXPECTED[i]. */ -+ -+ const int num_tests = sizeof (INPUT) / sizeof (INPUT[0]); -+ int index; -+ -+ for (index = 0; index < num_tests; index++) -+ { -+#if defined (SCALAR_OPERANDS) -+ -+#ifdef SCALAR_1 -+ CHECK_N (SCALAR_1, EXPECTED_1); -+#endif -+#ifdef SCALAR_2 -+ CHECK_N (SCALAR_2, EXPECTED_2); -+#endif -+#ifdef SCALAR_3 -+ CHECK_N (SCALAR_3, EXPECTED_3); -+#endif -+#ifdef SCALAR_4 -+ CHECK_N (SCALAR_4, EXPECTED_4); -+#endif -+ -+#else /* !defined (SCALAR_OPERAND). */ -+ CHECK (); -+#endif -+ } -+ -+#ifdef EXTRA_TESTS -+ EXTRA_TESTS (); -+#endif -+} -+ -+int -+main (void) -+{ -+ FNNAME (INSN_NAME) (); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabd.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabd.c -@@ -30,10 +30,20 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffd0, 0xffffffd1, - 0xffffffd2, 0xffffffd3 }; - VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x42407ae1, 0x423c7ae1, - 0x42387ae1, 0x42347ae1 }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0x4e13, 0x4dd3, -+ 0x4d93, 0x4d53 }; -+VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0x5204, 0x51e4, 0x51c4, 0x51a4, -+ 0x5184, 0x5164, 0x5144, 0x5124 }; -+#endif - - /* Additional expected results for float32 variants with specially - chosen input values. */ - VECT_VAR_DECL(expected_float32,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_float16, hfloat, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+#endif - - #define TEST_MSG "VABD/VABDQ" - void exec_vabd (void) -@@ -65,6 +75,17 @@ void exec_vabd (void) - DECL_VABD_VAR(vector2); - DECL_VABD_VAR(vector_res); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector1, float, 16, 4); -+ DECL_VARIABLE(vector1, float, 16, 8); -+ -+ DECL_VARIABLE(vector2, float, 16, 4); -+ DECL_VARIABLE(vector2, float, 16, 8); -+ -+ DECL_VARIABLE(vector_res, float, 16, 4); -+ DECL_VARIABLE(vector_res, float, 16, 8); -+#endif -+ - clean_results (); - - /* Initialize input "vector1" from "buffer". */ -@@ -82,6 +103,12 @@ void exec_vabd (void) - VLOAD(vector1, buffer, q, uint, u, 16, 8); - VLOAD(vector1, buffer, q, uint, u, 32, 4); - VLOAD(vector1, buffer, q, float, f, 32, 4); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VLOAD(vector1, buffer, , float, f, 16, 4); -+ VLOAD(vector1, buffer, , float, f, 16, 4); -+ VLOAD(vector1, buffer, q, float, f, 16, 8); -+ VLOAD(vector1, buffer, q, float, f, 16, 8); -+#endif - - /* Choose init value arbitrarily. */ - VDUP(vector2, , int, s, 8, 8, 1); -@@ -98,6 +125,10 @@ void exec_vabd (void) - VDUP(vector2, q, uint, u, 16, 8, 12); - VDUP(vector2, q, uint, u, 32, 4, 32); - VDUP(vector2, q, float, f, 32, 4, 32.12f); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector2, , float, f, 16, 4, 8.3f); -+ VDUP(vector2, q, float, f, 16, 8, 32.12f); -+#endif - - /* Execute the tests. */ - TEST_VABD(, int, s, 8, 8); -@@ -115,6 +146,11 @@ void exec_vabd (void) - TEST_VABD(q, uint, u, 32, 4); - TEST_VABD(q, float, f, 32, 4); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VABD(, float, f, 16, 4); -+ TEST_VABD(q, float, f, 16, 8); -+#endif -+ - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); - CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); - CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); -@@ -129,7 +165,10 @@ void exec_vabd (void) - CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, ""); -- -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, ""); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, ""); -+#endif - - /* Extra FP tests with special values (-0.0, ....) */ - VDUP(vector1, q, float, f, 32, 4, -0.0f); -@@ -137,11 +176,27 @@ void exec_vabd (void) - TEST_VABD(q, float, f, 32, 4); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, " FP special (-0.0)"); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector1, q, float, f, 16, 8, -0.0f); -+ VDUP(vector2, q, float, f, 16, 8, 0.0); -+ TEST_VABD(q, float, f, 16, 8); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_float16, -+ " FP special (-0.0)"); -+#endif -+ - /* Extra FP tests with special values (-0.0, ....) */ - VDUP(vector1, q, float, f, 32, 4, 0.0f); - VDUP(vector2, q, float, f, 32, 4, -0.0); - TEST_VABD(q, float, f, 32, 4); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, " FP special (-0.0)"); -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector1, q, float, f, 16, 8, 0.0f); -+ VDUP(vector2, q, float, f, 16, 8, -0.0); -+ TEST_VABD(q, float, f, 16, 8); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_float16, -+ " FP special (-0.0)"); -+#endif - } - - int main (void) ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabdh_f16_1.c -@@ -0,0 +1,44 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+#define INFF __builtin_inf () -+ -+/* Expected results. -+ Absolute difference between INPUT1 and INPUT2 in binary_scalar_op.inc. */ -+uint16_t expected[] = -+{ -+ 0x3C00, -+ 0x3C00, -+ 0x4654, -+ 0x560E, -+ 0x4900, -+ 0x36B8, -+ 0x419a, -+ 0x4848, -+ 0x3d34, -+ 0x4cec, -+ 0x4791, -+ 0x3f34, -+ 0x484d, -+ 0x4804, -+ 0x469c, -+ 0x4ceb, -+ 0x7c00, -+ 0x7c00 -+}; -+ -+#define TEST_MSG "VABDH_F16" -+#define INSN_NAME vabdh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "binary_scalar_op.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabs.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabs.c -@@ -21,24 +21,52 @@ VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd }; - /* Expected results for float32 variants. Needs to be separated since - the generic test function does not test floating-point - versions. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_float16, hfloat, 16, 4) [] = { 0x409a, 0x409a, -+ 0x409a, 0x409a }; -+VECT_VAR_DECL(expected_float16, hfloat, 16, 8) [] = { 0x42cd, 0x42cd, -+ 0x42cd, 0x42cd, -+ 0x42cd, 0x42cd, -+ 0x42cd, 0x42cd }; -+#endif - VECT_VAR_DECL(expected_float32,hfloat,32,2) [] = { 0x40133333, 0x40133333 }; - VECT_VAR_DECL(expected_float32,hfloat,32,4) [] = { 0x4059999a, 0x4059999a, - 0x4059999a, 0x4059999a }; - - void exec_vabs_f32(void) - { -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector, float, 16, 4); -+ DECL_VARIABLE(vector, float, 16, 8); -+#endif - DECL_VARIABLE(vector, float, 32, 2); - DECL_VARIABLE(vector, float, 32, 4); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector_res, float, 16, 4); -+ DECL_VARIABLE(vector_res, float, 16, 8); -+#endif - DECL_VARIABLE(vector_res, float, 32, 2); - DECL_VARIABLE(vector_res, float, 32, 4); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, , float, f, 16, 4, -2.3f); -+ VDUP(vector, q, float, f, 16, 8, 3.4f); -+#endif - VDUP(vector, , float, f, 32, 2, -2.3f); - VDUP(vector, q, float, f, 32, 4, 3.4f); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_UNARY_OP(INSN_NAME, , float, f, 16, 4); -+ TEST_UNARY_OP(INSN_NAME, q, float, f, 16, 8); -+#endif - TEST_UNARY_OP(INSN_NAME, , float, f, 32, 2); - TEST_UNARY_OP(INSN_NAME, q, float, f, 32, 4); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_float16, ""); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_float16, ""); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_float32, ""); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, ""); - } ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vabsh_f16_1.c -@@ -0,0 +1,40 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected[] = -+{ -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x4000 /* 2.000000 */, -+ 0x4233 /* 3.099609 */, -+ 0x4d00 /* 20.000000 */, -+ 0x3666 /* 0.399902 */, -+ 0x409a /* 2.300781 */, -+ 0x3d52 /* 1.330078 */, -+ 0x479a /* 7.601562 */, -+ 0x34f6 /* 0.310059 */, -+ 0x355d /* 0.335205 */, -+ 0x3800 /* 0.500000 */, -+ 0x3c00 /* 1.000000 */, -+ 0x4a91 /* 13.132812 */, -+ 0x464d /* 6.300781 */, -+ 0x4d00 /* 20.000000 */, -+ 0x7c00 /* inf */, -+ 0x7c00 /* inf */ -+}; -+ -+#define TEST_MSG "VABSH_F16" -+#define INSN_NAME vabsh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vadd.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vadd.c -@@ -43,6 +43,14 @@ VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff3, - VECT_VAR_DECL(expected_float32,hfloat,32,2) [] = { 0x40d9999a, 0x40d9999a }; - VECT_VAR_DECL(expected_float32,hfloat,32,4) [] = { 0x41100000, 0x41100000, - 0x41100000, 0x41100000 }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_float16, hfloat, 16, 4) [] = { 0x46cd, 0x46cd, -+ 0x46cd, 0x46cd }; -+VECT_VAR_DECL(expected_float16, hfloat, 16, 8) [] = { 0x4880, 0x4880, -+ 0x4880, 0x4880, -+ 0x4880, 0x4880, -+ 0x4880, 0x4880 }; -+#endif - - void exec_vadd_f32(void) - { -@@ -66,4 +74,27 @@ void exec_vadd_f32(void) - - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_float32, ""); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, ""); -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector, float, 16, 4); -+ DECL_VARIABLE(vector, float, 16, 8); -+ -+ DECL_VARIABLE(vector2, float, 16, 4); -+ DECL_VARIABLE(vector2, float, 16, 8); -+ -+ DECL_VARIABLE(vector_res, float, 16, 4); -+ DECL_VARIABLE(vector_res, float, 16, 8); -+ -+ VDUP(vector, , float, f, 16, 4, 2.3f); -+ VDUP(vector, q, float, f, 16, 8, 3.4f); -+ -+ VDUP(vector2, , float, f, 16, 4, 4.5f); -+ VDUP(vector2, q, float, f, 16, 8, 5.6f); -+ -+ TEST_BINARY_OP(INSN_NAME, , float, f, 16, 4); -+ TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); -+ -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_float16, ""); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_float16, ""); -+#endif - } ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vaddh_f16_1.c -@@ -0,0 +1,40 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected[] = -+{ -+ 0x3c00 /* 1.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0xc0a8 /* -2.328125 */, -+ 0x5672 /* 103.125000 */, -+ 0x5240 /* 50.000000 */, -+ 0x3614 /* 0.379883 */, -+ 0xbf34 /* -1.800781 */, -+ 0xc5e6 /* -5.898438 */, -+ 0xcaf4 /* -13.906250 */, -+ 0x4d14 /* 20.312500 */, -+ 0xc6e5 /* -6.894531 */, -+ 0x419a /* 2.800781 */, -+ 0xc69a /* -6.601562 */, -+ 0x4c8f /* 18.234375 */, -+ 0xc5fe /* -5.992188 */, -+ 0x4d15 /* 20.328125 */, -+ 0x7e00 /* nan */, -+ 0x7e00 /* nan */, -+}; -+ -+#define TEST_MSG "VADDH_F16" -+#define INSN_NAME vaddh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "binary_scalar_op.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vbsl.c -@@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffff1 }; - VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3, - 0xf7, 0xf7, 0xf7, 0xf7 }; - VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc09, 0xcb89, -+ 0xcb09, 0xca89 }; -+#endif - VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800004, 0xc1700004 }; - VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, - 0xf6, 0xf6, 0xf6, 0xf6, -@@ -43,6 +47,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf3, 0xf3, 0xf3, 0xf3, - 0xf7, 0xf7, 0xf7, 0xf7 }; - VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff2, 0xfff2, - 0xfff4, 0xfff4, 0xfff6, 0xfff6 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc09, 0xcb89, -+ 0xcb09, 0xca89, -+ 0xca09, 0xc989, -+ 0xc909, 0xc889 }; -+#endif - VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800001, 0xc1700001, - 0xc1600001, 0xc1500001 }; - -@@ -66,6 +76,10 @@ void exec_vbsl (void) - clean_results (); - - TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); -+#if defined (FP16_SUPPORTED) -+ VLOAD(vector, buffer, , float, f, 16, 4); -+ VLOAD(vector, buffer, q, float, f, 16, 8); -+#endif - VLOAD(vector, buffer, , float, f, 32, 2); - VLOAD(vector, buffer, q, float, f, 32, 4); - -@@ -80,6 +94,9 @@ void exec_vbsl (void) - VDUP(vector2, , uint, u, 16, 4, 0xFFF2); - VDUP(vector2, , uint, u, 32, 2, 0xFFFFFFF0); - VDUP(vector2, , uint, u, 64, 1, 0xFFFFFFF3); -+#if defined (FP16_SUPPORTED) -+ VDUP(vector2, , float, f, 16, 4, -2.4f); /* -2.4f is 0xC0CD. */ -+#endif - VDUP(vector2, , float, f, 32, 2, -30.3f); - VDUP(vector2, , poly, p, 8, 8, 0xF3); - VDUP(vector2, , poly, p, 16, 4, 0xFFF2); -@@ -94,6 +111,9 @@ void exec_vbsl (void) - VDUP(vector2, q, uint, u, 64, 2, 0xFFFFFFF3); - VDUP(vector2, q, poly, p, 8, 16, 0xF3); - VDUP(vector2, q, poly, p, 16, 8, 0xFFF2); -+#if defined (FP16_SUPPORTED) -+ VDUP(vector2, q, float, f, 16, 8, -2.4f); -+#endif - VDUP(vector2, q, float, f, 32, 4, -30.4f); - - VDUP(vector_first, , uint, u, 8, 8, 0xF4); -@@ -111,10 +131,18 @@ void exec_vbsl (void) - TEST_VBSL(uint, , poly, p, 16, 4); - TEST_VBSL(uint, q, poly, p, 8, 16); - TEST_VBSL(uint, q, poly, p, 16, 8); -+#if defined (FP16_SUPPORTED) -+ TEST_VBSL(uint, , float, f, 16, 4); -+ TEST_VBSL(uint, q, float, f, 16, 8); -+#endif - TEST_VBSL(uint, , float, f, 32, 2); - TEST_VBSL(uint, q, float, f, 32, 4); - -+#if defined (FP16_SUPPORTED) -+ CHECK_RESULTS (TEST_MSG, ""); -+#else - CHECK_RESULTS_NO_FP16 (TEST_MSG, ""); -+#endif - } - - int main (void) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcage.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcage.c -@@ -11,3 +11,13 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, - VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xffffffff, 0xffffffff }; - VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xffffffff, 0xffffffff, - 0xffffffff, 0xffffffff }; -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected, uint, 16, 4) [] = { 0xffff, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected, uint, 16, 8) [] = { 0xffff, 0xffff, 0xffff, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+ -+VECT_VAR_DECL (expected2, uint, 16, 4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL (expected2, uint, 16, 8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, -+ 0xffff, 0xffff, 0xffff, 0x0 }; -+#endif ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcageh_f16_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+uint16_t expected[] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0xFFFF, 0xFFFF, 0x0, 0xFFFF, -+ 0x0, 0x0, 0x0, 0x0, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, -+ 0xFFFF}; -+ -+#define TEST_MSG "VCAGEH_F16" -+#define INSN_NAME vcageh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "binary_scalar_op.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcagt.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcagt.c -@@ -11,3 +11,13 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0xffffffff, 0xffffffff, - VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xffffffff, 0xffffffff }; - VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xffffffff, 0xffffffff, - 0xffffffff, 0xffffffff }; -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected, uint, 16, 8) [] = { 0xffff, 0xffff, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+ -+VECT_VAR_DECL (expected2, uint, 16, 4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL (expected2, uint, 16, 8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, -+ 0xffff, 0xffff, 0x0, 0x0 }; -+#endif ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcagth_f16_1.c -@@ -0,0 +1,21 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+uint16_t expected[] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0xFFFF, 0xFFFF, 0x0, 0xFFFF, -+ 0x0, 0x0, 0x0, 0x0, 0xFFFF, 0xFFFF, 0xFFFF, 0x0, 0x0}; -+ -+#define TEST_MSG "VCAGTH_F16" -+#define INSN_NAME vcagth_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "binary_scalar_op.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcale.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcale.c -@@ -9,3 +9,13 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff }; - - VECT_VAR_DECL(expected2,uint,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected2,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected, uint, 16, 4) [] = { 0xffff, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL (expected, uint, 16, 8) [] = { 0x0, 0x0, 0xffff, 0xffff, -+ 0xffff, 0xffff, 0xffff, 0xffff }; -+ -+VECT_VAR_DECL (expected2, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected2, uint, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0xffff, 0xffff }; -+#endif ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcaleh_f16_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+uint16_t expected[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0, 0x0, -+ 0xFFFF, 0x0, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0, 0x0, -+ 0x0, 0xFFFF, 0xFFFF}; -+ -+#define TEST_MSG "VCALEH_F16" -+#define INSN_NAME vcaleh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "binary_scalar_op.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcalt.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcalt.c -@@ -9,3 +9,13 @@ VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff }; - - VECT_VAR_DECL(expected2,uint,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected2,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected, uint, 16, 4) [] = { 0x0, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL (expected, uint, 16, 8) [] = { 0x0, 0x0, 0x0, 0xffff, -+ 0xffff, 0xffff, 0xffff, 0xffff }; -+ -+VECT_VAR_DECL (expected2, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected2, uint, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0xffff }; -+#endif ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcalth_f16_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+uint16_t expected[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0, 0x0, -+ 0xFFFF, 0x0, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0, 0x0, -+ 0x0, 0x0, 0x0}; -+ -+#define TEST_MSG "VCALTH_F16" -+#define INSN_NAME vcalth_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "binary_scalar_op.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceq.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceq.c -@@ -32,6 +32,12 @@ VECT_VAR_DECL(expected_q_uint,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0xffff, 0x0 }; - VECT_VAR_DECL(expected_q_uint,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0x0 }; - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected_float, uint, 16, 4) [] = { 0x0, 0xffff, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_q_float, uint, 16, 8) [] = { 0x0, 0x0, 0xffff, 0x0, -+ 0x0, 0x0, 0x0, 0x0, }; -+#endif -+ - VECT_VAR_DECL(expected_float,uint,32,2) [] = { 0x0, 0xffffffff }; - VECT_VAR_DECL(expected_q_float,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0x0 }; - -@@ -39,6 +45,18 @@ VECT_VAR_DECL(expected_uint2,uint,32,2) [] = { 0xffffffff, 0x0 }; - VECT_VAR_DECL(expected_uint3,uint,32,2) [] = { 0x0, 0xffffffff }; - VECT_VAR_DECL(expected_uint4,uint,32,2) [] = { 0xffffffff, 0x0 }; - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected_nan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_mnan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_nan2, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+ -+VECT_VAR_DECL (expected_inf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_minf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_inf2, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_mzero, uint, 16, 4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+#endif -+ - VECT_VAR_DECL(expected_nan,uint,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected_mnan,uint,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected_nan2,uint,32,2) [] = { 0x0, 0x0 }; ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceqh_f16_1.c -@@ -0,0 +1,21 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+uint16_t expected[] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; -+ -+#define TEST_MSG "VCEQH_F16" -+#define INSN_NAME vceqh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "binary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceqz_1.c -@@ -0,0 +1,27 @@ -+/* This file tests an intrinsic which currently has only an f16 variant and that -+ is only available when FP16 arithmetic instructions are supported. */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+ -+#define INSN_NAME vceqz -+#define TEST_MSG "VCEQZ/VCEQZQ" -+ -+#include "cmp_zero_op.inc" -+ -+/* Expected results. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected_float, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_q_float, uint, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+#endif -+ -+/* Extra FP tests with special values (NaN, ....). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected_nan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_mnan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_inf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_minf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_zero, uint, 16, 4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL (expected_mzero, uint, 16, 4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+#endif ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vceqzh_f16_1.c -@@ -0,0 +1,21 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+uint16_t expected[] = { 0xFFFF, 0xFFFF, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; -+ -+#define TEST_MSG "VCEQZH_F16" -+#define INSN_NAME vceqzh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "unary_scalar_op.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcge.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcge.c -@@ -28,6 +28,14 @@ VECT_VAR_DECL(expected_q_uint,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, - 0, 0x0, 0xffff, 0xffff }; - VECT_VAR_DECL(expected_q_uint,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff }; - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected_float, uint, 16, 4) [] = { 0x0, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL (expected_q_float, uint, 16, 8) [] = { 0x0, 0x0, -+ 0xffff, 0xffff, -+ 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+#endif -+ - VECT_VAR_DECL(expected_float,uint,32,2) [] = { 0x0, 0xffffffff }; - VECT_VAR_DECL(expected_q_float,uint,32,4) [] = { 0x0, 0x0, 0xffffffff, 0xffffffff }; - -@@ -35,6 +43,20 @@ VECT_VAR_DECL(expected_uint2,uint,32,2) [] = { 0xffffffff, 0xffffffff }; - VECT_VAR_DECL(expected_uint3,uint,32,2) [] = { 0x0, 0xffffffff }; - VECT_VAR_DECL(expected_uint4,uint,32,2) [] = { 0xffffffff, 0xffffffff }; - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected_nan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_mnan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_nan2, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+ -+VECT_VAR_DECL (expected_inf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_minf, uint, 16, 4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL (expected_inf2, uint, 16, 4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL (expected_mzero, uint, 16, 4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+#endif -+ - VECT_VAR_DECL(expected_nan,uint,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected_mnan,uint,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected_nan2,uint,32,2) [] = { 0x0, 0x0 }; ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgeh_f16_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+uint16_t expected[] = { 0x0, 0x0, 0xFFFF, 0x0, 0x0, 0xFFFF, 0x0, 0xFFFF, -+ 0x0, 0x0, 0xFFFF, 0x0, 0xFFFF, 0xFFFF, 0x0, 0xFFFF, -+ 0xFFFF, 0x0}; -+ -+#define TEST_MSG "VCGEH_F16" -+#define INSN_NAME vcgeh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "binary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgez_1.c -@@ -0,0 +1,30 @@ -+/* This file tests an intrinsic which currently has only an f16 variant and that -+ is only available when FP16 arithmetic instructions are supported. */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+ -+#define INSN_NAME vcgez -+#define TEST_MSG "VCGEZ/VCGEZQ" -+ -+#include "cmp_zero_op.inc" -+ -+/* Expected results. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected_float, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_q_float, uint, 16, 8) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff, -+ 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+#endif -+ -+/* Extra FP tests with special values (NaN, ....). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected_nan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_mnan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_inf, uint, 16, 4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL (expected_minf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_zero, uint, 16, 4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL (expected_mzero, uint, 16, 4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+#endif ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgezh_f16_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+uint16_t expected[] = { 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0, -+ 0xFFFF, 0x0, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, -+ 0x0, 0xFFFF, 0xFFFF, 0x0}; -+ -+#define TEST_MSG "VCGEZH_F16" -+#define INSN_NAME vcgezh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "unary_scalar_op.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgt.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgt.c -@@ -28,6 +28,14 @@ VECT_VAR_DECL(expected_q_uint,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0xffff }; - VECT_VAR_DECL(expected_q_uint,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff }; - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected_float, uint, 16, 4) [] = { 0x0, 0x0, 0xffff, 0xffff }; -+VECT_VAR_DECL (expected_q_float, uint, 16, 8) [] = { 0x0, 0x0, -+ 0x0, 0xffff, -+ 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+#endif -+ - VECT_VAR_DECL(expected_float,uint,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected_q_float,uint,32,4) [] = { 0x0, 0x0, 0x0, 0xffffffff }; - -@@ -35,6 +43,19 @@ VECT_VAR_DECL(expected_uint2,uint,32,2) [] = { 0x0, 0xffffffff }; - VECT_VAR_DECL(expected_uint3,uint,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected_uint4,uint,32,2) [] = { 0x0, 0xffffffff }; - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected_nan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_mnan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_nan2, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+ -+VECT_VAR_DECL (expected_inf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_minf, uint, 16, 4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL (expected_inf2, uint, 16, 4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL (expected_mzero, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+#endif -+ - VECT_VAR_DECL(expected_nan,uint,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected_mnan,uint,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected_nan2,uint,32,2) [] = { 0x0, 0x0 }; ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgth_f16_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+uint16_t expected[] = { 0x0, 0x0, 0xFFFF, 0x0, 0x0, 0xFFFF, 0x0, 0xFFFF, -+ 0x0, 0x0, 0xFFFF, 0x0, 0xFFFF, 0xFFFF, 0x0, 0xFFFF, -+ 0xFFFF, 0x0}; -+ -+#define TEST_MSG "VCGTH_F16" -+#define INSN_NAME vcgth_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "binary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgtz_1.c -@@ -0,0 +1,28 @@ -+/* This file tests an intrinsic which currently has only an f16 variant and that -+ is only available when FP16 arithmetic instructions are supported. */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+ -+#define INSN_NAME vcgtz -+#define TEST_MSG "VCGTZ/VCGTZQ" -+ -+#include "cmp_zero_op.inc" -+ -+/* Expected results. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected_float, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_q_float, uint, 16, 8) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff, -+ 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+#endif -+ -+/* Extra FP tests with special values (NaN, ....). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected_nan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_mnan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_inf, uint, 16, 4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL (expected_minf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_zero, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_mzero, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+#endif ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcgtzh_f16_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+uint16_t expected[] = { 0x0, 0x0, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0, 0xFFFF, -+ 0x0, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0, -+ 0xFFFF, 0xFFFF, 0x0}; -+ -+#define TEST_MSG "VCGTZH_F16" -+#define INSN_NAME vcgtzh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "unary_scalar_op.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcle.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcle.c -@@ -31,6 +31,14 @@ VECT_VAR_DECL(expected_q_uint,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, - VECT_VAR_DECL(expected_q_uint,uint,32,4) [] = { 0xffffffff, 0xffffffff, - 0xffffffff, 0x0 }; - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected_float, uint, 16, 4) [] = { 0xffff, 0xffff, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_q_float, uint, 16, 8) [] = { 0xffff, 0xffff, -+ 0xffff, 0x0, -+ 0x0, 0x0, -+ 0x0, 0x0 }; -+#endif -+ - VECT_VAR_DECL(expected_float,uint,32,2) [] = { 0xffffffff, 0xffffffff }; - VECT_VAR_DECL(expected_q_float,uint,32,4) [] = { 0xffffffff, 0xffffffff, - 0xffffffff, 0x0 }; -@@ -39,6 +47,20 @@ VECT_VAR_DECL(expected_uint2,uint,32,2) [] = { 0xffffffff, 0x0 }; - VECT_VAR_DECL(expected_uint3,uint,32,2) [] = { 0xffffffff, 0xffffffff }; - VECT_VAR_DECL(expected_uint4,uint,32,2) [] = { 0xffffffff, 0x0 }; - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected_nan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_mnan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_nan2, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+ -+VECT_VAR_DECL (expected_inf, uint, 16, 4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL (expected_minf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_inf2, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+ -+VECT_VAR_DECL (expected_mzero, uint, 16, 4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+#endif -+ - VECT_VAR_DECL(expected_nan,uint,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected_mnan,uint,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected_nan2,uint,32,2) [] = { 0x0, 0x0 }; ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcleh_f16_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+uint16_t expected[] = { 0xFFFF, 0xFFFF, 0x0, 0xFFFF, 0xFFFF, 0x0, 0xFFFF, 0x0, -+ 0xFFFF, 0xFFFF, 0x0, 0xFFFF, 0x0, 0x0, 0xFFFF, 0x0, 0x0, -+ 0xFFFF}; -+ -+#define TEST_MSG "VCLEH_F16" -+#define INSN_NAME vcleh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "binary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclez_1.c -@@ -0,0 +1,29 @@ -+/* This file tests an intrinsic which currently has only an f16 variant and that -+ is only available when FP16 arithmetic instructions are supported. */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+ -+#define INSN_NAME vclez -+#define TEST_MSG "VCLEZ/VCLEZQ" -+ -+#include "cmp_zero_op.inc" -+ -+/* Expected results. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected_float, uint, 16, 4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL (expected_q_float, uint, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0 }; -+#endif -+ -+/* Extra FP tests with special values (NaN, ....). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected_nan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_mnan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_inf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+ -+VECT_VAR_DECL (expected_minf, uint, 16, 4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL (expected_zero, uint, 16, 4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL (expected_mzero, uint, 16, 4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+#endif ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclezh_f16_1.c -@@ -0,0 +1,21 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+uint16_t expected[] = { 0xFFFF, 0xFFFF, 0x0, 0x0, 0x0, 0x0, 0xFFFF, 0x0, 0xFFFF, -+ 0x0, 0x0, 0x0, 0x0, 0x0, 0xFFFF, 0x0, 0x0, 0xFFFF}; -+ -+#define TEST_MSG "VCLEZH_F16" -+#define INSN_NAME vclezh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "unary_scalar_op.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclt.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclt.c -@@ -30,6 +30,14 @@ VECT_VAR_DECL(expected_q_uint,uint,16,8) [] = { 0xffff, 0xffff, 0xffff, 0xffff, - VECT_VAR_DECL(expected_q_uint,uint,32,4) [] = { 0xffffffff, 0xffffffff, - 0x0, 0x0 }; - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected_float, uint, 16, 4) [] = { 0xffff, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_q_float, uint, 16, 8) [] = { 0xffff, 0xffff, -+ 0x0, 0x0, -+ 0x0, 0x0, -+ 0x0, 0x0 }; -+#endif -+ - VECT_VAR_DECL(expected_float,uint,32,2) [] = { 0xffffffff, 0x0 }; - VECT_VAR_DECL(expected_q_float,uint,32,4) [] = { 0xffffffff, 0xffffffff, - 0x0, 0x0 }; -@@ -38,6 +46,19 @@ VECT_VAR_DECL(expected_uint2,uint,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected_uint3,uint,32,2) [] = { 0xffffffff, 0x0 }; - VECT_VAR_DECL(expected_uint4,uint,32,2) [] = { 0x0, 0x0 }; - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected_nan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_mnan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_nan2, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+ -+VECT_VAR_DECL (expected_inf, uint, 16, 4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL (expected_minf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_inf2, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+ -+VECT_VAR_DECL (expected_mzero, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+#endif -+ - VECT_VAR_DECL(expected_nan,uint,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected_mnan,uint,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected_nan2,uint,32,2) [] = { 0x0, 0x0 }; ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vclth_f16_1.c -@@ -0,0 +1,22 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+uint16_t expected[] = { 0xFFFF, 0xFFFF, 0x0, 0xFFFF, 0xFFFF, 0x0, 0xFFFF, 0x0, -+ 0xFFFF, 0xFFFF, 0x0, 0xFFFF, 0x0, 0x0, 0xFFFF, 0x0, 0x0, -+ 0xFFFF}; -+ -+#define TEST_MSG "VCLTH_F16" -+#define INSN_NAME vclth_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "binary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcltz_1.c -@@ -0,0 +1,27 @@ -+/* This file tests an intrinsic which currently has only an f16 variant and that -+ is only available when FP16 arithmetic instructions are supported. */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+ -+#define INSN_NAME vcltz -+#define TEST_MSG "VCLTZ/VCLTZQ" -+ -+#include "cmp_zero_op.inc" -+ -+/* Expected results. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected_float, uint, 16, 4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL (expected_q_float, uint, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0 }; -+#endif -+ -+/* Extra FP tests with special values (NaN, ....). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected_nan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_mnan, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_inf, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+ -+VECT_VAR_DECL (expected_minf, uint, 16, 4) [] = { 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL (expected_zero, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL (expected_mzero, uint, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+#endif ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcltzh_f16_1.c -@@ -0,0 +1,21 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+uint16_t expected[] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xFFFF, 0x0, 0xFFFF, -+ 0x0, 0x0, 0x0, 0x0, 0x0, 0xFFFF, 0x0, 0x0, 0xFFFF}; -+ -+#define TEST_MSG "VCltZH_F16" -+#define INSN_NAME vcltzh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "unary_scalar_op.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcnt.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcnt.c -@@ -65,10 +65,10 @@ FNNAME (INSN_NAME) - - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected, ""); - CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, ""); - CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, ""); -- CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 16, PRIx8, expected, ""); - } - - int main (void) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcombine.c -@@ -93,8 +93,8 @@ void exec_vcombine (void) - CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); - CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, ""); -- CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected, ""); -- CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 16, PRIx8, expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected, ""); - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) - CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, ""); - #endif ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcreate.c -@@ -106,8 +106,8 @@ FNNAME (INSN_NAME) - CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); - CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, ""); -- CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected, ""); - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) - CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, ""); - #endif ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvt.c -@@ -4,36 +4,99 @@ - #include <math.h> - - /* Expected results for vcvt. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_s, hfloat, 16, 4) [] = -+{ 0xcc00, 0xcb80, 0xcb00, 0xca80 }; -+VECT_VAR_DECL(expected_u, hfloat, 16, 4) [] = -+{ 0x7c00, 0x7c00, 0x7c00, 0x7c00, }; -+VECT_VAR_DECL(expected_s, hfloat, 16, 8) [] = -+{ 0xcc00, 0xcb80, 0xcb00, 0xca80, -+ 0xca00, 0xc980, 0xc900, 0xc880 }; -+VECT_VAR_DECL(expected_u, hfloat, 16, 8) [] = -+{ 0x7c00, 0x7c00, 0x7c00, 0x7c00, -+ 0x7c00, 0x7c00, 0x7c00, 0x7c00, }; -+#endif - VECT_VAR_DECL(expected_s,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; - VECT_VAR_DECL(expected_u,hfloat,32,2) [] = { 0x4f800000, 0x4f800000 }; - VECT_VAR_DECL(expected_s,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, -- 0xc1600000, 0xc1500000 }; -+ 0xc1600000, 0xc1500000 }; - VECT_VAR_DECL(expected_u,hfloat,32,4) [] = { 0x4f800000, 0x4f800000, -- 0x4f800000, 0x4f800000 }; -+ 0x4f800000, 0x4f800000 }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, int, 16, 4) [] = { 0xfff1, 0x5, 0xfff1, 0x5 }; -+VECT_VAR_DECL(expected, uint, 16, 4) [] = { 0x0, 0x5, 0x0, 0x5 }; -+VECT_VAR_DECL(expected, int, 16, 8) [] = { 0x0, 0x0, 0xf, 0xfff1, -+ 0x0, 0x0, 0xf, 0xfff1 }; -+VECT_VAR_DECL(expected, uint, 16, 8) [] = { 0x0, 0x0, 0xf, 0x0, -+ 0x0, 0x0, 0xf, 0x0 }; -+#endif - VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff1, 0x5 }; - VECT_VAR_DECL(expected,uint,32,2) [] = { 0x0, 0x5 }; - VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x0, 0xf, 0xfffffff1 }; - VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x0, 0xf, 0x0 }; - - /* Expected results for vcvt_n. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_vcvt_n_s, hfloat, 16, 4) [] = { 0xc400, 0xc380, -+ 0xc300, 0xc280 }; -+VECT_VAR_DECL(expected_vcvt_n_u, hfloat, 16, 4) [] = { 0x6000, 0x6000, -+ 0x6000, 0x6000 }; -+VECT_VAR_DECL(expected_vcvt_n_s, hfloat, 16, 8) [] = { 0xb000, 0xaf80, -+ 0xaf00, 0xae80, -+ 0xae00, 0xad80, -+ 0xad00, 0xac80 }; -+VECT_VAR_DECL(expected_vcvt_n_u, hfloat, 16, 8) [] = { 0x4c00, 0x4c00, -+ 0x4c00, 0x4c00, -+ 0x4c00, 0x4c00, -+ 0x4c00, 0x4c00 }; -+#endif - VECT_VAR_DECL(expected_vcvt_n_s,hfloat,32,2) [] = { 0xc0800000, 0xc0700000 }; - VECT_VAR_DECL(expected_vcvt_n_u,hfloat,32,2) [] = { 0x4c000000, 0x4c000000 }; - VECT_VAR_DECL(expected_vcvt_n_s,hfloat,32,4) [] = { 0xb2800000, 0xb2700000, - 0xb2600000, 0xb2500000 }; - VECT_VAR_DECL(expected_vcvt_n_u,hfloat,32,4) [] = { 0x49800000, 0x49800000, - 0x49800000, 0x49800000 }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_vcvt_n, int, 16, 4) [] = { 0xffc3, 0x15, -+ 0xffc3, 0x15 }; -+VECT_VAR_DECL(expected_vcvt_n, uint, 16, 4) [] = { 0x0, 0x2a6, 0x0, 0x2a6 }; -+VECT_VAR_DECL(expected_vcvt_n, int, 16, 8) [] = { 0x0, 0x0, 0x78f, 0xf871, -+ 0x0, 0x0, 0x78f, 0xf871 }; -+VECT_VAR_DECL(expected_vcvt_n, uint, 16, 8) [] = { 0x0, 0x0, 0xf1e0, 0x0, -+ 0x0, 0x0, 0xf1e0, 0x0 }; -+#endif - VECT_VAR_DECL(expected_vcvt_n,int,32,2) [] = { 0xff0b3333, 0x54cccd }; - VECT_VAR_DECL(expected_vcvt_n,uint,32,2) [] = { 0x0, 0x15 }; - VECT_VAR_DECL(expected_vcvt_n,int,32,4) [] = { 0x0, 0x0, 0x1e3d7, 0xfffe1c29 }; - VECT_VAR_DECL(expected_vcvt_n,uint,32,4) [] = { 0x0, 0x0, 0x1e, 0x0 }; - - /* Expected results for vcvt with rounding. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_rounding, int, 16, 4) [] = { 0xa, 0xa, 0xa, 0xa }; -+VECT_VAR_DECL(expected_rounding, uint, 16, 4) [] = { 0xa, 0xa, 0xa, 0xa }; -+VECT_VAR_DECL(expected_rounding, int, 16, 8) [] = { 0x7d, 0x7d, 0x7d, 0x7d, -+ 0x7d, 0x7d, 0x7d, 0x7d }; -+VECT_VAR_DECL(expected_rounding, uint, 16, 8) [] = { 0x7d, 0x7d, 0x7d, 0x7d, -+ 0x7d, 0x7d, 0x7d, 0x7d }; -+#endif - VECT_VAR_DECL(expected_rounding,int,32,2) [] = { 0xa, 0xa }; - VECT_VAR_DECL(expected_rounding,uint,32,2) [] = { 0xa, 0xa }; - VECT_VAR_DECL(expected_rounding,int,32,4) [] = { 0x7d, 0x7d, 0x7d, 0x7d }; - VECT_VAR_DECL(expected_rounding,uint,32,4) [] = { 0x7d, 0x7d, 0x7d, 0x7d }; - - /* Expected results for vcvt_n with rounding. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_vcvt_n_rounding, int, 16, 4) [] = -+{ 0x533, 0x533, 0x533, 0x533 }; -+VECT_VAR_DECL(expected_vcvt_n_rounding, uint, 16, 4) [] = -+{ 0x533, 0x533, 0x533, 0x533 }; -+VECT_VAR_DECL(expected_vcvt_n_rounding, int, 16, 8) [] = -+{ 0x7fff, 0x7fff, 0x7fff, 0x7fff, -+ 0x7fff, 0x7fff, 0x7fff, 0x7fff }; -+VECT_VAR_DECL(expected_vcvt_n_rounding, uint, 16, 8) [] = -+{ 0xffff, 0xffff, 0xffff, 0xffff, -+ 0xffff, 0xffff, 0xffff, 0xffff }; -+#endif - VECT_VAR_DECL(expected_vcvt_n_rounding,int,32,2) [] = { 0xa66666, 0xa66666 }; - VECT_VAR_DECL(expected_vcvt_n_rounding,uint,32,2) [] = { 0xa66666, 0xa66666 }; - VECT_VAR_DECL(expected_vcvt_n_rounding,int,32,4) [] = { 0xfbccc, 0xfbccc, -@@ -42,11 +105,17 @@ VECT_VAR_DECL(expected_vcvt_n_rounding,uint,32,4) [] = { 0xfbccc, 0xfbccc, - 0xfbccc, 0xfbccc }; - - /* Expected results for vcvt_n with saturation. */ --VECT_VAR_DECL(expected_vcvt_n_saturation,int,32,2) [] = { 0x7fffffff, -- 0x7fffffff }; --VECT_VAR_DECL(expected_vcvt_n_saturation,int,32,4) [] = { 0x7fffffff, -- 0x7fffffff, -- 0x7fffffff, 0x7fffffff }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_vcvt_n_saturation, int, 16, 4) [] = -+{ 0x533, 0x533, 0x533, 0x533 }; -+VECT_VAR_DECL(expected_vcvt_n_saturation, int, 16, 8) [] = -+{ 0x7fff, 0x7fff, 0x7fff, 0x7fff, -+ 0x7fff, 0x7fff, 0x7fff, 0x7fff }; -+#endif -+VECT_VAR_DECL(expected_vcvt_n_saturation,int,32,2) [] = -+{ 0x7fffffff, 0x7fffffff }; -+VECT_VAR_DECL(expected_vcvt_n_saturation,int,32,4) [] = -+{ 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }; - - #define TEST_MSG "VCVT/VCVTQ" - void exec_vcvt (void) -@@ -89,11 +158,26 @@ void exec_vcvt (void) - - /* Initialize input "vector" from "buffer". */ - TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VLOAD(vector, buffer, , float, f, 16, 4); -+ VLOAD(vector, buffer, q, float, f, 16, 8); -+#endif - VLOAD(vector, buffer, , float, f, 32, 2); - VLOAD(vector, buffer, q, float, f, 32, 4); - - /* Make sure some elements have a fractional part, to exercise - integer conversions. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VSET_LANE(vector, , float, f, 16, 4, 0, -15.3f); -+ VSET_LANE(vector, , float, f, 16, 4, 1, 5.3f); -+ VSET_LANE(vector, , float, f, 16, 4, 2, -15.3f); -+ VSET_LANE(vector, , float, f, 16, 4, 3, 5.3f); -+ VSET_LANE(vector, q, float, f, 16, 8, 4, -15.3f); -+ VSET_LANE(vector, q, float, f, 16, 8, 5, 5.3f); -+ VSET_LANE(vector, q, float, f, 16, 8, 6, -15.3f); -+ VSET_LANE(vector, q, float, f, 16, 8, 7, 5.3f); -+#endif -+ - VSET_LANE(vector, , float, f, 32, 2, 0, -15.3f); - VSET_LANE(vector, , float, f, 32, 2, 1, 5.3f); - VSET_LANE(vector, q, float, f, 32, 4, 2, -15.3f); -@@ -103,23 +187,55 @@ void exec_vcvt (void) - before overwriting them. */ - #define TEST_MSG2 "" - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ /* vcvt_f16_xx. */ -+ TEST_VCVT_FP(, float, f, 16, 4, int, s, expected_s); -+ TEST_VCVT_FP(, float, f, 16, 4, uint, u, expected_u); -+#endif - /* vcvt_f32_xx. */ - TEST_VCVT_FP(, float, f, 32, 2, int, s, expected_s); - TEST_VCVT_FP(, float, f, 32, 2, uint, u, expected_u); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ /* vcvtq_f16_xx. */ -+ TEST_VCVT_FP(q, float, f, 16, 8, int, s, expected_s); -+ TEST_VCVT_FP(q, float, f, 16, 8, uint, u, expected_u); -+#endif - /* vcvtq_f32_xx. */ - TEST_VCVT_FP(q, float, f, 32, 4, int, s, expected_s); - TEST_VCVT_FP(q, float, f, 32, 4, uint, u, expected_u); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ /* vcvt_xx_f16. */ -+ TEST_VCVT(, int, s, 16, 4, float, f, expected); -+ TEST_VCVT(, uint, u, 16, 4, float, f, expected); -+#endif - /* vcvt_xx_f32. */ - TEST_VCVT(, int, s, 32, 2, float, f, expected); - TEST_VCVT(, uint, u, 32, 2, float, f, expected); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VSET_LANE(vector, q, float, f, 16, 8, 0, 0.0f); -+ VSET_LANE(vector, q, float, f, 16, 8, 1, -0.0f); -+ VSET_LANE(vector, q, float, f, 16, 8, 2, 15.12f); -+ VSET_LANE(vector, q, float, f, 16, 8, 3, -15.12f); -+ VSET_LANE(vector, q, float, f, 16, 8, 4, 0.0f); -+ VSET_LANE(vector, q, float, f, 16, 8, 5, -0.0f); -+ VSET_LANE(vector, q, float, f, 16, 8, 6, 15.12f); -+ VSET_LANE(vector, q, float, f, 16, 8, 7, -15.12f); -+#endif -+ - VSET_LANE(vector, q, float, f, 32, 4, 0, 0.0f); - VSET_LANE(vector, q, float, f, 32, 4, 1, -0.0f); - VSET_LANE(vector, q, float, f, 32, 4, 2, 15.12f); - VSET_LANE(vector, q, float, f, 32, 4, 3, -15.12f); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ /* vcvtq_xx_f16. */ -+ TEST_VCVT(q, int, s, 16, 8, float, f, expected); -+ TEST_VCVT(q, uint, u, 16, 8, float, f, expected); -+#endif -+ - /* vcvtq_xx_f32. */ - TEST_VCVT(q, int, s, 32, 4, float, f, expected); - TEST_VCVT(q, uint, u, 32, 4, float, f, expected); -@@ -129,18 +245,38 @@ void exec_vcvt (void) - #undef TEST_MSG - #define TEST_MSG "VCVT_N/VCVTQ_N" - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ /* vcvt_n_f16_xx. */ -+ TEST_VCVT_N_FP(, float, f, 16, 4, int, s, 2, expected_vcvt_n_s); -+ TEST_VCVT_N_FP(, float, f, 16, 4, uint, u, 7, expected_vcvt_n_u); -+#endif - /* vcvt_n_f32_xx. */ - TEST_VCVT_N_FP(, float, f, 32, 2, int, s, 2, expected_vcvt_n_s); - TEST_VCVT_N_FP(, float, f, 32, 2, uint, u, 7, expected_vcvt_n_u); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ /* vcvtq_n_f16_xx. */ -+ TEST_VCVT_N_FP(q, float, f, 16, 8, int, s, 7, expected_vcvt_n_s); -+ TEST_VCVT_N_FP(q, float, f, 16, 8, uint, u, 12, expected_vcvt_n_u); -+#endif - /* vcvtq_n_f32_xx. */ - TEST_VCVT_N_FP(q, float, f, 32, 4, int, s, 30, expected_vcvt_n_s); - TEST_VCVT_N_FP(q, float, f, 32, 4, uint, u, 12, expected_vcvt_n_u); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ /* vcvt_n_xx_f16. */ -+ TEST_VCVT_N(, int, s, 16, 4, float, f, 2, expected_vcvt_n); -+ TEST_VCVT_N(, uint, u, 16, 4, float, f, 7, expected_vcvt_n); -+#endif - /* vcvt_n_xx_f32. */ - TEST_VCVT_N(, int, s, 32, 2, float, f, 20, expected_vcvt_n); - TEST_VCVT_N(, uint, u, 32, 2, float, f, 2, expected_vcvt_n); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ /* vcvtq_n_xx_f16. */ -+ TEST_VCVT_N(q, int, s, 16, 8, float, f, 7, expected_vcvt_n); -+ TEST_VCVT_N(q, uint, u, 16, 8, float, f, 12, expected_vcvt_n); -+#endif - /* vcvtq_n_xx_f32. */ - TEST_VCVT_N(q, int, s, 32, 4, float, f, 13, expected_vcvt_n); - TEST_VCVT_N(q, uint, u, 32, 4, float, f, 1, expected_vcvt_n); -@@ -150,20 +286,49 @@ void exec_vcvt (void) - #define TEST_MSG "VCVT/VCVTQ" - #undef TEST_MSG2 - #define TEST_MSG2 "(check rounding)" -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, , float, f, 16, 4, 10.4f); -+ VDUP(vector, q, float, f, 16, 8, 125.9f); -+#endif - VDUP(vector, , float, f, 32, 2, 10.4f); - VDUP(vector, q, float, f, 32, 4, 125.9f); -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ /* vcvt_xx_f16. */ -+ TEST_VCVT(, int, s, 16, 4, float, f, expected_rounding); -+ TEST_VCVT(, uint, u, 16, 4, float, f, expected_rounding); -+#endif - /* vcvt_xx_f32. */ - TEST_VCVT(, int, s, 32, 2, float, f, expected_rounding); - TEST_VCVT(, uint, u, 32, 2, float, f, expected_rounding); -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ /* vcvtq_xx_f16. */ -+ TEST_VCVT(q, int, s, 16, 8, float, f, expected_rounding); -+ TEST_VCVT(q, uint, u, 16, 8, float, f, expected_rounding); -+#endif - /* vcvtq_xx_f32. */ - TEST_VCVT(q, int, s, 32, 4, float, f, expected_rounding); - TEST_VCVT(q, uint, u, 32, 4, float, f, expected_rounding); - - #undef TEST_MSG - #define TEST_MSG "VCVT_N/VCVTQ_N" -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ /* vcvt_n_xx_f16. */ -+ TEST_VCVT_N(, int, s, 16, 4, float, f, 7, expected_vcvt_n_rounding); -+ TEST_VCVT_N(, uint, u, 16, 4, float, f, 7, expected_vcvt_n_rounding); -+#endif - /* vcvt_n_xx_f32. */ - TEST_VCVT_N(, int, s, 32, 2, float, f, 20, expected_vcvt_n_rounding); - TEST_VCVT_N(, uint, u, 32, 2, float, f, 20, expected_vcvt_n_rounding); -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ /* vcvtq_n_xx_f16. */ -+ TEST_VCVT_N(q, int, s, 16, 8, float, f, 13, expected_vcvt_n_rounding); -+ TEST_VCVT_N(q, uint, u, 16, 8, float, f, 13, expected_vcvt_n_rounding); -+#endif - /* vcvtq_n_xx_f32. */ - TEST_VCVT_N(q, int, s, 32, 4, float, f, 13, expected_vcvt_n_rounding); - TEST_VCVT_N(q, uint, u, 32, 4, float, f, 13, expected_vcvt_n_rounding); -@@ -172,8 +337,18 @@ void exec_vcvt (void) - #define TEST_MSG "VCVT_N/VCVTQ_N" - #undef TEST_MSG2 - #define TEST_MSG2 "(check saturation)" -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ /* vcvt_n_xx_f16. */ -+ TEST_VCVT_N(, int, s, 16, 4, float, f, 7, expected_vcvt_n_saturation); -+#endif - /* vcvt_n_xx_f32. */ - TEST_VCVT_N(, int, s, 32, 2, float, f, 31, expected_vcvt_n_saturation); -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ /* vcvtq_n_xx_f16. */ -+ TEST_VCVT_N(q, int, s, 16, 8, float, f, 13, expected_vcvt_n_saturation); -+#endif - /* vcvtq_n_xx_f32. */ - TEST_VCVT_N(q, int, s, 32, 4, float, f, 31, expected_vcvt_n_saturation); - } ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtX.inc -@@ -0,0 +1,113 @@ -+/* Template file for VCVT operator validation. -+ -+ This file is meant to be included by the relevant test files, which -+ have to define the intrinsic family to test. If a given intrinsic -+ supports variants which are not supported by all the other vcvt -+ operators, these can be tested by providing a definition for -+ EXTRA_TESTS. -+ -+ This file is only used for VCVT? tests, which currently have only f16 to -+ integer variants. It is based on vcvt.c. */ -+ -+#define FNNAME1(NAME) exec_ ## NAME -+#define FNNAME(NAME) FNNAME1 (NAME) -+ -+void FNNAME (INSN_NAME) (void) -+{ -+ int i; -+ -+ /* Basic test: y=vcvt(x), then store the result. */ -+#define TEST_VCVT1(INSN, Q, T1, T2, W, N, TS1, TS2, EXP) \ -+ VECT_VAR(vector_res, T1, W, N) = \ -+ INSN##Q##_##T2##W##_##TS2##W(VECT_VAR(vector, TS1, W, N)); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ -+ VECT_VAR(vector_res, T1, W, N)); \ -+ CHECK(TEST_MSG, T1, W, N, PRIx##W, EXP, TEST_MSG2); -+ -+#define TEST_VCVT(INSN, Q, T1, T2, W, N, TS1, TS2, EXP) \ -+ TEST_VCVT1 (INSN, Q, T1, T2, W, N, TS1, TS2, EXP) -+ -+ DECL_VARIABLE_ALL_VARIANTS(vector); -+ DECL_VARIABLE_ALL_VARIANTS(vector_res); -+ -+ clean_results (); -+ -+ /* Initialize input "vector" from "buffer". */ -+ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VLOAD(vector, buffer, , float, f, 16, 4); -+ VLOAD(vector, buffer, q, float, f, 16, 8); -+#endif -+ -+ /* Make sure some elements have a fractional part, to exercise -+ integer conversions. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VSET_LANE(vector, , float, f, 16, 4, 0, -15.3f); -+ VSET_LANE(vector, , float, f, 16, 4, 1, 5.3f); -+ VSET_LANE(vector, , float, f, 16, 4, 2, -15.3f); -+ VSET_LANE(vector, , float, f, 16, 4, 3, 5.3f); -+ VSET_LANE(vector, q, float, f, 16, 8, 4, -15.3f); -+ VSET_LANE(vector, q, float, f, 16, 8, 5, 5.3f); -+ VSET_LANE(vector, q, float, f, 16, 8, 6, -15.3f); -+ VSET_LANE(vector, q, float, f, 16, 8, 7, 5.3f); -+#endif -+ -+ /* The same result buffers are used multiple times, so we check them -+ before overwriting them. */ -+#define TEST_MSG2 "" -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ /* vcvt?_xx_f16. */ -+ TEST_VCVT(INSN_NAME, , int, s, 16, 4, float, f, expected); -+ TEST_VCVT(INSN_NAME, , uint, u, 16, 4, float, f, expected); -+#endif -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VSET_LANE(vector, q, float, f, 16, 8, 0, 0.0f); -+ VSET_LANE(vector, q, float, f, 16, 8, 1, -0.0f); -+ VSET_LANE(vector, q, float, f, 16, 8, 2, 15.12f); -+ VSET_LANE(vector, q, float, f, 16, 8, 3, -15.12f); -+ VSET_LANE(vector, q, float, f, 16, 8, 4, 0.0f); -+ VSET_LANE(vector, q, float, f, 16, 8, 5, -0.0f); -+ VSET_LANE(vector, q, float, f, 16, 8, 6, 15.12f); -+ VSET_LANE(vector, q, float, f, 16, 8, 7, -15.12f); -+#endif -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ /* vcvt?q_xx_f16. */ -+ TEST_VCVT(INSN_NAME, q, int, s, 16, 8, float, f, expected); -+ TEST_VCVT(INSN_NAME, q, uint, u, 16, 8, float, f, expected); -+#endif -+ -+ /* Check rounding. */ -+#undef TEST_MSG2 -+#define TEST_MSG2 "(check rounding)" -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, , float, f, 16, 4, 10.4f); -+ VDUP(vector, q, float, f, 16, 8, 125.9f); -+#endif -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ /* vcvt?_xx_f16. */ -+ TEST_VCVT(INSN_NAME, , int, s, 16, 4, float, f, expected_rounding); -+ TEST_VCVT(INSN_NAME, , uint, u, 16, 4, float, f, expected_rounding); -+#endif -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ /* vcvt?q_xx_f16. */ -+ TEST_VCVT(INSN_NAME, q, int, s, 16, 8, float, f, expected_rounding); -+ TEST_VCVT(INSN_NAME, q, uint, u, 16, 8, float, f, expected_rounding); -+#endif -+ -+#ifdef EXTRA_TESTS -+ EXTRA_TESTS(); -+#endif -+} -+ -+int -+main (void) -+{ -+ FNNAME (INSN_NAME) (); -+ return 0; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvta_1.c -@@ -0,0 +1,33 @@ -+/* This file tests an intrinsic which currently has only an f16 variant and that -+ is only available when FP16 arithmetic instructions are supported. */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+#include <math.h> -+ -+/* Expected results. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, int, 16, 4) [] = { 0xfff1, 0x5, 0xfff1, 0x5 }; -+VECT_VAR_DECL(expected, uint, 16, 4) [] = { 0x0, 0x5, 0x0, 0x5 }; -+VECT_VAR_DECL(expected, int, 16, 8) [] = { 0x0, 0x0, 0xf, 0xfff1, -+ 0x0, 0x0, 0xf, 0xfff1 }; -+VECT_VAR_DECL(expected, uint, 16, 8) [] = { 0x0, 0x0, 0xf, 0x0, -+ 0x0, 0x0, 0xf, 0x0 }; -+#endif -+ -+/* Expected results with rounding. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_rounding, int, 16, 4) [] = { 0xa, 0xa, 0xa, 0xa }; -+VECT_VAR_DECL(expected_rounding, uint, 16, 4) [] = { 0xa, 0xa, 0xa, 0xa }; -+VECT_VAR_DECL(expected_rounding, int, 16, 8) [] = { 0x7e, 0x7e, 0x7e, 0x7e, -+ 0x7e, 0x7e, 0x7e, 0x7e }; -+VECT_VAR_DECL(expected_rounding, uint, 16, 8) [] = { 0x7e, 0x7e, 0x7e, 0x7e, -+ 0x7e, 0x7e, 0x7e, 0x7e }; -+#endif -+ -+#define TEST_MSG "VCVTA/VCVTAQ" -+#define INSN_NAME vcvta -+ -+#include "vcvtX.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_s16_f16_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 123.9, -56.8, 0.7, 24.6, -63.5, 169.4, -4.3, 77.0 }; -+int16_t expected[] = { 124, -57, 1, 25, -64, 169, -4, 77 }; -+ -+#define TEST_MSG "VCVTAH_S16_F16" -+#define INSN_NAME vcvtah_s16_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE int16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_s32_f16_1.c -@@ -0,0 +1,53 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = -+{ -+ 0.0, -0.0, -+ 123.4, -567.8, -+ -34.8, 1024, -+ 663.1, 169.1, -+ -4.8, 77.0, -+ -144.5, -56.8, -+ -+ (float16_t) -16, (float16_t) -15, -+ (float16_t) -14, (float16_t) -13, -+}; -+ -+/* Expected results (32-bit hexadecimal representation). */ -+uint32_t expected[] = -+{ -+ 0x00000000, -+ 0x00000000, -+ 0x0000007b, -+ 0xfffffdc8, -+ 0xffffffdd, -+ 0x00000400, -+ 0x00000297, -+ 0x000000a9, -+ 0xfffffffb, -+ 0x0000004d, -+ 0xffffff6f, -+ 0xffffffc7, -+ 0xfffffff0, -+ 0xfffffff1, -+ 0xfffffff2, -+ 0xfffffff3 -+}; -+ -+#define TEST_MSG "VCVTAH_S32_F16" -+#define INSN_NAME vcvtah_s32_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE int32_t -+#define OUTPUT_TYPE_SIZE 32 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_s64_f16_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 123.9, -56.8, 0.7, 24.6, -63.5, 169.4, -4.3, 77.0 }; -+int64_t expected[] = { 124, -57, 1, 25, -64, 169, -4, 77 }; -+ -+#define TEST_MSG "VCVTAH_S64_F16" -+#define INSN_NAME vcvtah_s64_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE int64_t -+#define OUTPUT_TYPE_SIZE 64 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_u16_f16_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 123.9, 56.8, 0.7, 24.6, 63.5, 169.4, 4.3, 77.0 }; -+uint16_t expected[] = { 124, 57, 1, 25, 64, 169, 4, 77 }; -+ -+#define TEST_MSG "VCVTAH_u16_F16" -+#define INSN_NAME vcvtah_u16_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_u32_f16_1.c -@@ -0,0 +1,53 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = -+{ -+ 0.0, -0.0, -+ 123.4, -567.8, -+ -34.8, 1024, -+ 663.1, 169.1, -+ -4.8, 77.0, -+ -144.5, -56.8, -+ -+ (float16_t) -16, (float16_t) -15, -+ (float16_t) -14, (float16_t) -13, -+}; -+ -+/* Expected results (32-bit hexadecimal representation). */ -+uint32_t expected[] = -+{ -+ 0x00000000, -+ 0x00000000, -+ 0x0000007b, -+ 0x00000000, -+ 0x00000000, -+ 0x00000400, -+ 0x00000297, -+ 0x000000a9, -+ 0x00000000, -+ 0x0000004d, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000 -+}; -+ -+#define TEST_MSG "VCVTAH_U32_F16" -+#define INSN_NAME vcvtah_u32_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint32_t -+#define OUTPUT_TYPE_SIZE 32 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtah_u64_f16_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 123.9, 56.8, 0.7, 24.6, 63.5, 169.4, 4.3, 77.0 }; -+uint64_t expected[] = { 124, 57, 1, 25, 64, 169, 4, 77 }; -+ -+#define TEST_MSG "VCVTAH_u64_F16" -+#define INSN_NAME vcvtah_u64_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint64_t -+#define OUTPUT_TYPE_SIZE 64 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_s16_1.c -@@ -0,0 +1,25 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+int16_t input[] = { 123, -567, 0, 1024, -63, 169, -4, 77 }; -+uint16_t expected[] = { 0x57B0 /* 123.0. */, 0xE06E /* -567.0. */, -+ 0x0000 /* 0.0. */, 0x6400 /* 1024. */, -+ 0xD3E0 /* -63. */, 0x5948 /* 169. */, -+ 0xC400 /* -4. */, 0x54D0 /* 77. */ }; -+ -+#define TEST_MSG "VCVTH_F16_S16" -+#define INSN_NAME vcvth_f16_s16 -+ -+#define EXPECTED expected -+ -+#define INPUT input -+#define INPUT_TYPE int16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_s32_1.c -@@ -0,0 +1,52 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+uint32_t input[] = -+{ -+ 0, -0, -+ 123, -567, -+ -34, 1024, -+ -63, 169, -+ -4, 77, -+ -144, -56, -+ -16, -15, -+ -14, -13, -+}; -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected[] = -+{ -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x57b0 /* 123.000000 */, -+ 0xe06e /* -567.000000 */, -+ 0xd040 /* -34.000000 */, -+ 0x6400 /* 1024.000000 */, -+ 0xd3e0 /* -63.000000 */, -+ 0x5948 /* 169.000000 */, -+ 0xc400 /* -4.000000 */, -+ 0x54d0 /* 77.000000 */, -+ 0xd880 /* -144.000000 */, -+ 0xd300 /* -56.000000 */, -+ 0xcc00 /* -16.000000 */, -+ 0xcb80 /* -15.000000 */, -+ 0xcb00 /* -14.000000 */, -+ 0xca80 /* -13.000000 */ -+}; -+ -+#define TEST_MSG "VCVTH_F16_S32" -+#define INSN_NAME vcvth_f16_s32 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE uint32_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_s64_1.c -@@ -0,0 +1,25 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+int64_t input[] = { 123, -567, 0, 1024, -63, 169, -4, 77 }; -+uint16_t expected[] = { 0x57B0 /* 123.0. */, 0xE06E /* -567.0. */, -+ 0x0000 /* 0.0. */, 0x6400 /* 1024. */, -+ 0xD3E0 /* -63. */, 0x5948 /* 169. */, -+ 0xC400 /* -4. */, 0x54D0 /* 77. */ }; -+ -+#define TEST_MSG "VCVTH_F16_S64" -+#define INSN_NAME vcvth_f16_s64 -+ -+#define EXPECTED expected -+ -+#define INPUT input -+#define INPUT_TYPE int64_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_u16_1.c -@@ -0,0 +1,25 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+uint16_t input[] = { 123, 567, 0, 1024, 63, 169, 4, 77 }; -+uint16_t expected[] = { 0x57B0 /* 123.0. */, 0x606E /* 567.0. */, -+ 0x0000 /* 0.0. */, 0x6400 /* 1024.0. */, -+ 0x53E0 /* 63.0. */, 0x5948 /* 169.0. */, -+ 0x4400 /* 4.0. */, 0x54D0 /* 77.0. */ }; -+ -+#define TEST_MSG "VCVTH_F16_U16" -+#define INSN_NAME vcvth_f16_u16 -+ -+#define EXPECTED expected -+ -+#define INPUT input -+#define INPUT_TYPE uint16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_u32_1.c -@@ -0,0 +1,52 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+int32_t input[] = -+{ -+ 0, -0, -+ 123, -567, -+ -34, 1024, -+ -63, 169, -+ -4, 77, -+ -144, -56, -+ -16, -15, -+ -14, -13, -+}; -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected[] = -+{ -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x57b0 /* 123.000000 */, -+ 0x7c00 /* inf */, -+ 0x7c00 /* inf */, -+ 0x6400 /* 1024.000000 */, -+ 0x7c00 /* inf */, -+ 0x5948 /* 169.000000 */, -+ 0x7c00 /* inf */, -+ 0x54d0 /* 77.000000 */, -+ 0x7c00 /* inf */, -+ 0x7c00 /* inf */, -+ 0x7c00 /* inf */, -+ 0x7c00 /* inf */, -+ 0x7c00 /* inf */, -+ 0x7c00 /* inf */ -+}; -+ -+#define TEST_MSG "VCVTH_F16_U32" -+#define INSN_NAME vcvth_f16_u32 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE int32_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_f16_u64_1.c -@@ -0,0 +1,25 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+uint64_t input[] = { 123, 567, 0, 1024, 63, 169, 4, 77 }; -+uint16_t expected[] = { 0x57B0 /* 123.0. */, 0x606E /* 567.0. */, -+ 0x0000 /* 0.0. */, 0x6400 /* 1024.0. */, -+ 0x53E0 /* 63.0. */, 0x5948 /* 169.0. */, -+ 0x4400 /* 4.0. */, 0x54D0 /* 77.0. */ }; -+ -+#define TEST_MSG "VCVTH_F16_U64" -+#define INSN_NAME vcvth_f16_u64 -+ -+#define EXPECTED expected -+ -+#define INPUT input -+#define INPUT_TYPE uint64_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_s16_1.c -@@ -0,0 +1,46 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+int16_t input[] = { 1, 10, 48, 100, -1, -10, 7, -7 }; -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected_1[] = { 0x3800 /* 0.5. */, -+ 0x4500 /* 5. */, -+ 0x4E00 /* 24. */, -+ 0x5240 /* 50. */, -+ 0xB800 /* -0.5. */, -+ 0xC500 /* -5. */, -+ 0x4300 /* 3.5. */, -+ 0xC300 /* -3.5. */ }; -+ -+uint16_t expected_2[] = { 0x3400 /* 0.25. */, -+ 0x4100 /* 2.5. */, -+ 0x4A00 /* 12. */, -+ 0x4E40 /* 25. */, -+ 0xB400 /* -0.25. */, -+ 0xC100 /* -2.5. */, -+ 0x3F00 /* 1.75. */, -+ 0xBF00 /* -1.75. */ }; -+ -+#define TEST_MSG "VCVTH_N_F16_S16" -+#define INSN_NAME vcvth_n_f16_s16 -+ -+#define INPUT input -+#define EXPECTED_1 expected_1 -+#define EXPECTED_2 expected_2 -+ -+#define INPUT_TYPE int16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+#define SCALAR_OPERANDS -+#define SCALAR_1 1 -+#define SCALAR_2 2 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_s32_1.c -@@ -0,0 +1,99 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+uint32_t input[] = -+{ -+ 0, -0, -+ 123, -567, -+ -34, 1024, -+ -63, 169, -+ -4, 77, -+ -144, -56, -+ -16, -15, -+ -14, -13, -+}; -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected_1[] = -+{ -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x53b0 /* 61.500000 */, -+ 0xdc6e /* -283.500000 */, -+ 0xcc40 /* -17.000000 */, -+ 0x6000 /* 512.000000 */, -+ 0xcfe0 /* -31.500000 */, -+ 0x5548 /* 84.500000 */, -+ 0xc000 /* -2.000000 */, -+ 0x50d0 /* 38.500000 */, -+ 0xd480 /* -72.000000 */, -+ 0xcf00 /* -28.000000 */, -+ 0xc800 /* -8.000000 */, -+ 0xc780 /* -7.500000 */, -+ 0xc700 /* -7.000000 */, -+ 0xc680 /* -6.500000 */ -+}; -+ -+uint16_t expected_2[] = -+{ -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x4fb0 /* 30.750000 */, -+ 0xd86e /* -141.750000 */, -+ 0xc840 /* -8.500000 */, -+ 0x5c00 /* 256.000000 */, -+ 0xcbe0 /* -15.750000 */, -+ 0x5148 /* 42.250000 */, -+ 0xbc00 /* -1.000000 */, -+ 0x4cd0 /* 19.250000 */, -+ 0xd080 /* -36.000000 */, -+ 0xcb00 /* -14.000000 */, -+ 0xc400 /* -4.000000 */, -+ 0xc380 /* -3.750000 */, -+ 0xc300 /* -3.500000 */, -+ 0xc280 /* -3.250000 */ -+}; -+ -+uint16_t expected_3[] = -+{ -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x8002 /* -0.000000 */, -+ 0x8000 /* -0.000000 */, -+ 0x0004 /* 0.000000 */, -+ 0x8000 /* -0.000000 */, -+ 0x0001 /* 0.000000 */, -+ 0x8000 /* -0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x8001 /* -0.000000 */, -+ 0x8000 /* -0.000000 */, -+ 0x8000 /* -0.000000 */, -+ 0x8000 /* -0.000000 */, -+ 0x8000 /* -0.000000 */, -+ 0x8000 /* -0.000000 */ -+}; -+ -+#define TEST_MSG "VCVTH_N_F16_S32" -+#define INSN_NAME vcvth_n_f16_s32 -+ -+#define INPUT input -+#define EXPECTED_1 expected_1 -+#define EXPECTED_2 expected_2 -+#define EXPECTED_3 expected_3 -+ -+#define INPUT_TYPE int32_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+#define SCALAR_OPERANDS -+#define SCALAR_1 1 -+#define SCALAR_2 2 -+#define SCALAR_3 32 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_s64_1.c -@@ -0,0 +1,46 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+int64_t input[] = { 1, 10, 48, 100, -1, -10, 7, -7 }; -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected_1[] = { 0x3800 /* 0.5. */, -+ 0x4500 /* 5. */, -+ 0x4E00 /* 24. */, -+ 0x5240 /* 50. */, -+ 0xB800 /* -0.5. */, -+ 0xC500 /* -5. */, -+ 0x4300 /* 3.5. */, -+ 0xC300 /* -3.5. */ }; -+ -+uint16_t expected_2[] = { 0x3400 /* 0.25. */, -+ 0x4100 /* 2.5. */, -+ 0x4A00 /* 12. */, -+ 0x4E40 /* 25. */, -+ 0xB400 /* -0.25. */, -+ 0xC100 /* -2.5. */, -+ 0x3F00 /* 1.75. */, -+ 0xBF00 /* -1.75. */ }; -+ -+#define TEST_MSG "VCVTH_N_F16_S64" -+#define INSN_NAME vcvth_n_f16_s64 -+ -+#define INPUT input -+#define EXPECTED_1 expected_1 -+#define EXPECTED_2 expected_2 -+ -+#define INPUT_TYPE int64_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+#define SCALAR_OPERANDS -+#define SCALAR_1 1 -+#define SCALAR_2 2 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_u16_1.c -@@ -0,0 +1,46 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+uint16_t input[] = { 1, 10, 48, 100, 1000, 0, 500, 9 }; -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected_1[] = { 0x3800 /* 0.5. */, -+ 0x4500 /* 5. */, -+ 0x4E00 /* 24. */, -+ 0x5240 /* 50. */, -+ 0x5FD0 /* 500. */, -+ 0x0000 /* 0.0. */, -+ 0x5BD0 /* 250. */, -+ 0x4480 /* 4.5. */ }; -+ -+uint16_t expected_2[] = { 0x3400 /* 0.25. */, -+ 0x4100 /* 2.5. */, -+ 0x4A00 /* 12. */, -+ 0x4E40 /* 25. */, -+ 0x5BD0 /* 250. */, -+ 0x0000 /* 0.0. */, -+ 0x57D0 /* 125. */, -+ 0x4080 /* 2.25. */ }; -+ -+#define TEST_MSG "VCVTH_N_F16_U16" -+#define INSN_NAME vcvth_n_f16_u16 -+ -+#define INPUT input -+#define EXPECTED_1 expected_1 -+#define EXPECTED_2 expected_2 -+ -+#define INPUT_TYPE uint16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+#define SCALAR_OPERANDS -+#define SCALAR_1 1 -+#define SCALAR_2 2 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_u32_1.c -@@ -0,0 +1,99 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+uint32_t input[] = -+{ -+ 0, -0, -+ 123, -567, -+ -34, 1024, -+ -63, 169, -+ -4, 77, -+ -144, -56, -+ -16, -15, -+ -14, -13, -+}; -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected_1[] = -+{ -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x53b0 /* 61.500000 */, -+ 0x7c00 /* inf */, -+ 0x7c00 /* inf */, -+ 0x6000 /* 512.000000 */, -+ 0x7c00 /* inf */, -+ 0x5548 /* 84.500000 */, -+ 0x7c00 /* inf */, -+ 0x50d0 /* 38.500000 */, -+ 0x7c00 /* inf */, -+ 0x7c00 /* inf */, -+ 0x7c00 /* inf */, -+ 0x7c00 /* inf */, -+ 0x7c00 /* inf */, -+ 0x7c00 /* inf */ -+}; -+ -+uint16_t expected_2[] = -+{ -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x4fb0 /* 30.750000 */, -+ 0x7c00 /* inf */, -+ 0x7c00 /* inf */, -+ 0x5c00 /* 256.000000 */, -+ 0x7c00 /* inf */, -+ 0x5148 /* 42.250000 */, -+ 0x7c00 /* inf */, -+ 0x4cd0 /* 19.250000 */, -+ 0x7c00 /* inf */, -+ 0x7c00 /* inf */, -+ 0x7c00 /* inf */, -+ 0x7c00 /* inf */, -+ 0x7c00 /* inf */, -+ 0x7c00 /* inf */ -+}; -+ -+uint16_t expected_3[] = -+{ -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0x0004 /* 0.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0x0001 /* 0.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0x3c00 /* 1.000000 */ -+}; -+ -+#define TEST_MSG "VCVTH_N_F16_U32" -+#define INSN_NAME vcvth_n_f16_u32 -+ -+#define INPUT input -+#define EXPECTED_1 expected_1 -+#define EXPECTED_2 expected_2 -+#define EXPECTED_3 expected_3 -+ -+#define INPUT_TYPE uint32_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+#define SCALAR_OPERANDS -+#define SCALAR_1 1 -+#define SCALAR_2 2 -+#define SCALAR_3 32 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_f16_u64_1.c -@@ -0,0 +1,46 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+uint64_t input[] = { 1, 10, 48, 100, 1000, 0, 500, 9 }; -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected_1[] = { 0x3800 /* 0.5. */, -+ 0x4500 /* 5. */, -+ 0x4E00 /* 24. */, -+ 0x5240 /* 50. */, -+ 0x5FD0 /* 500. */, -+ 0x0000 /* 0.0. */, -+ 0x5BD0 /* 250. */, -+ 0x4480 /* 4.5. */ }; -+ -+uint16_t expected_2[] = { 0x3400 /* 0.25. */, -+ 0x4100 /* 2.5. */, -+ 0x4A00 /* 12. */, -+ 0x4E40 /* 25. */, -+ 0x5BD0 /* 250. */, -+ 0x0000 /* 0.0. */, -+ 0x57D0 /* 125. */, -+ 0x4080 /* 2.25. */ }; -+ -+#define TEST_MSG "VCVTH_N_F16_U64" -+#define INSN_NAME vcvth_n_f16_u64 -+ -+#define INPUT input -+#define EXPECTED_1 expected_1 -+#define EXPECTED_2 expected_2 -+ -+#define INPUT_TYPE uint64_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+#define SCALAR_OPERANDS -+#define SCALAR_1 1 -+#define SCALAR_2 2 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_s16_f16_1.c -@@ -0,0 +1,29 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 2.5, 100, 7.1, -9.9, -5.0, 9.1, -4.8, 77 }; -+int16_t expected_1[] = { 5, 200, 14, -19, -10, 18, -9, 154 }; -+int16_t expected_2[] = { 10, 400, 28, -39, -20, 36, -19, 308 }; -+ -+#define TEST_MSG "VCVTH_N_S16_F16" -+#define INSN_NAME vcvth_n_s16_f16 -+ -+#define INPUT input -+#define EXPECTED_1 expected_1 -+#define EXPECTED_2 expected_2 -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE int16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+#define SCALAR_OPERANDS -+#define SCALAR_1 1 -+#define SCALAR_2 2 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_s32_f16_1.c -@@ -0,0 +1,100 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = -+{ -+ 0.0, -0.0, -+ 123.4, -567.8, -+ -34.8, 1024, -+ 663.1, 169.1, -+ -4.8, 77.0, -+ -144.5, -56.8, -+ -+ (float16_t) -16, (float16_t) -15, -+ (float16_t) -14, (float16_t) -13, -+}; -+ -+/* Expected results (32-bit hexadecimal representation). */ -+uint32_t expected_1[] = -+{ -+ 0x00000000, -+ 0x00000000, -+ 0x000000f6, -+ 0xfffffb90, -+ 0xffffffbb, -+ 0x00000800, -+ 0x0000052e, -+ 0x00000152, -+ 0xfffffff7, -+ 0x0000009a, -+ 0xfffffedf, -+ 0xffffff8f, -+ 0xffffffe0, -+ 0xffffffe2, -+ 0xffffffe4, -+ 0xffffffe6, -+}; -+ -+uint32_t expected_2[] = -+{ -+ 0x00000000, -+ 0x00000000, -+ 0x000001ed, -+ 0xfffff720, -+ 0xffffff75, -+ 0x00001000, -+ 0x00000a5c, -+ 0x000002a4, -+ 0xffffffed, -+ 0x00000134, -+ 0xfffffdbe, -+ 0xffffff1d, -+ 0xffffffc0, -+ 0xffffffc4, -+ 0xffffffc8, -+ 0xffffffcc, -+}; -+ -+uint32_t expected_3[] = -+{ -+ 0x00000000, -+ 0x00000000, -+ 0x7fffffff, -+ 0x80000000, -+ 0x80000000, -+ 0x7fffffff, -+ 0x7fffffff, -+ 0x7fffffff, -+ 0x80000000, -+ 0x7fffffff, -+ 0x80000000, -+ 0x80000000, -+ 0x80000000, -+ 0x80000000, -+ 0x80000000, -+ 0x80000000, -+}; -+ -+#define TEST_MSG "VCVTH_N_S32_F16" -+#define INSN_NAME vcvth_n_s32_f16 -+ -+#define INPUT input -+#define EXPECTED_1 expected_1 -+#define EXPECTED_2 expected_2 -+#define EXPECTED_3 expected_3 -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint32_t -+#define OUTPUT_TYPE_SIZE 32 -+ -+#define SCALAR_OPERANDS -+#define SCALAR_1 1 -+#define SCALAR_2 2 -+#define SCALAR_3 32 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_s64_f16_1.c -@@ -0,0 +1,29 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 2.5, 100, 7.1, -9.9, -5.0, 9.1, -4.8, 77 }; -+int64_t expected_1[] = { 5, 200, 14, -19, -10, 18, -9, 154 }; -+int64_t expected_2[] = { 10, 400, 28, -39, -20, 36, -19, 308 }; -+ -+#define TEST_MSG "VCVTH_N_S64_F16" -+#define INSN_NAME vcvth_n_s64_f16 -+ -+#define INPUT input -+#define EXPECTED_1 expected_1 -+#define EXPECTED_2 expected_2 -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE int64_t -+#define OUTPUT_TYPE_SIZE 64 -+ -+#define SCALAR_OPERANDS -+#define SCALAR_1 1 -+#define SCALAR_2 2 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_u16_f16_1.c -@@ -0,0 +1,29 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 2.5, 100, 7.1, 9.9, 5.0, 9.1, 4.8, 77 }; -+uint16_t expected_1[] = {5, 200, 14, 19, 10, 18, 9, 154}; -+uint16_t expected_2[] = {10, 400, 28, 39, 20, 36, 19, 308}; -+ -+#define TEST_MSG "VCVTH_N_U16_F16" -+#define INSN_NAME vcvth_n_u16_f16 -+ -+#define INPUT input -+#define EXPECTED_1 expected_1 -+#define EXPECTED_2 expected_2 -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+#define SCALAR_OPERANDS -+#define SCALAR_1 1 -+#define SCALAR_2 2 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_u32_f16_1.c -@@ -0,0 +1,100 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = -+{ -+ 0.0, -0.0, -+ 123.4, -567.8, -+ -34.8, 1024, -+ 663.1, 169.1, -+ -4.8, 77.0, -+ -144.5, -56.8, -+ -+ (float16_t) -16, (float16_t) -15, -+ (float16_t) -14, (float16_t) -13, -+}; -+ -+/* Expected results (32-bit hexadecimal representation). */ -+uint32_t expected_1[] = -+{ -+ 0x00000000, -+ 0x00000000, -+ 0x000000f6, -+ 0x00000000, -+ 0x00000000, -+ 0x00000800, -+ 0x0000052e, -+ 0x00000152, -+ 0x00000000, -+ 0x0000009a, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+}; -+ -+uint32_t expected_2[] = -+{ -+ 0x00000000, -+ 0x00000000, -+ 0x000001ed, -+ 0x00000000, -+ 0x00000000, -+ 0x00001000, -+ 0x00000a5c, -+ 0x000002a4, -+ 0x00000000, -+ 0x00000134, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+}; -+ -+uint32_t expected_3[] = -+{ -+ 0x00000000, -+ 0x00000000, -+ 0xffffffff, -+ 0x00000000, -+ 0x00000000, -+ 0xffffffff, -+ 0xffffffff, -+ 0xffffffff, -+ 0x00000000, -+ 0xffffffff, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+}; -+ -+#define TEST_MSG "VCVTH_N_U32_F16" -+#define INSN_NAME vcvth_n_u32_f16 -+ -+#define INPUT input -+#define EXPECTED_1 expected_1 -+#define EXPECTED_2 expected_2 -+#define EXPECTED_3 expected_3 -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint32_t -+#define OUTPUT_TYPE_SIZE 32 -+ -+#define SCALAR_OPERANDS -+#define SCALAR_1 1 -+#define SCALAR_2 2 -+#define SCALAR_3 32 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_n_u64_f16_1.c -@@ -0,0 +1,29 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 2.5, 100, 7.1, 9.9, 5.0, 9.1, 4.8, 77 }; -+uint64_t expected_1[] = { 5, 200, 14, 19, 10, 18, 9, 154 }; -+uint64_t expected_2[] = { 10, 400, 28, 39, 20, 36, 19, 308 }; -+ -+#define TEST_MSG "VCVTH_N_U64_F16" -+#define INSN_NAME vcvth_n_u64_f16 -+ -+#define INPUT input -+#define EXPECTED_1 expected_1 -+#define EXPECTED_2 expected_2 -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint64_t -+#define OUTPUT_TYPE_SIZE 64 -+ -+#define SCALAR_OPERANDS -+#define SCALAR_1 1 -+#define SCALAR_2 2 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_s16_f16_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 123.9, -56.8, 0.7, 24.6, -63.5, 169.4, -4.3, 77.0 }; -+int16_t expected[] = { 123, -56, 0, 24, -63, 169, -4, 77 }; -+ -+#define TEST_MSG "VCVTH_S16_F16" -+#define INSN_NAME vcvth_s16_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE int16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_s32_f16_1.c -@@ -0,0 +1,53 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = -+{ -+ 0.0, -0.0, -+ 123.4, -567.8, -+ -34.8, 1024, -+ 663.1, 169.1, -+ -4.8, 77.0, -+ -144.5, -56.8, -+ -+ (float16_t) -16, (float16_t) -15, -+ (float16_t) -14, (float16_t) -13, -+}; -+ -+/* Expected results (32-bit hexadecimal representation). */ -+uint32_t expected[] = -+{ -+ 0x00000000, -+ 0x00000000, -+ 0x0000007b, -+ 0xfffffdc8, -+ 0xffffffde, -+ 0x00000400, -+ 0x00000297, -+ 0x000000a9, -+ 0xfffffffc, -+ 0x0000004d, -+ 0xffffff70, -+ 0xffffffc8, -+ 0xfffffff0, -+ 0xfffffff1, -+ 0xfffffff2, -+ 0xfffffff3, -+}; -+ -+#define TEST_MSG "VCVTH_S32_F16" -+#define INSN_NAME vcvth_s32_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE int32_t -+#define OUTPUT_TYPE_SIZE 32 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_s64_f16_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 123.9, -56.8, 0.7, 24.6, -63.5, 169.4, -4.3, 77.0 }; -+int64_t expected[] = { 123, -56, 0, 24, -63, 169, -4, 77 }; -+ -+#define TEST_MSG "VCVTH_S64_F16" -+#define INSN_NAME vcvth_s64_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE int64_t -+#define OUTPUT_TYPE_SIZE 64 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_u16_f16_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 123.9, 56.8, 0.7, 24.6, 63.5, 169.4, 4.3, 77.0 }; -+uint16_t expected[] = { 123, 56, 0, 24, 63, 169, 4, 77 }; -+ -+#define TEST_MSG "VCVTH_u16_F16" -+#define INSN_NAME vcvth_u16_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_u32_f16_1.c -@@ -0,0 +1,53 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = -+{ -+ 0.0, -0.0, -+ 123.4, -567.8, -+ -34.8, 1024, -+ 663.1, 169.1, -+ -4.8, 77.0, -+ -144.5, -56.8, -+ -+ (float16_t) -16, (float16_t) -15, -+ (float16_t) -14, (float16_t) -13, -+}; -+ -+/* Expected results (32-bit hexadecimal representation). */ -+uint32_t expected[] = -+{ -+ 0x00000000, -+ 0x00000000, -+ 0x0000007b, -+ 0x00000000, -+ 0x00000000, -+ 0x00000400, -+ 0x00000297, -+ 0x000000a9, -+ 0x00000000, -+ 0x0000004d, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+}; -+ -+#define TEST_MSG "VCVTH_U32_F16" -+#define INSN_NAME vcvth_u32_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint32_t -+#define OUTPUT_TYPE_SIZE 32 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvth_u64_f16_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 123.9, 56.8, 0.7, 24.6, 63.5, 169.4, 4.3, 77.0 }; -+uint64_t expected[] = { 123, 56, 0, 24, 63, 169, 4, 77 }; -+ -+#define TEST_MSG "VCVTH_u64_F16" -+#define INSN_NAME vcvth_u64_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint64_t -+#define OUTPUT_TYPE_SIZE 64 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtm_1.c -@@ -0,0 +1,33 @@ -+/* This file tests an intrinsic which currently has only an f16 variant and that -+ is only available when FP16 arithmetic instructions are supported. */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+#include <math.h> -+ -+/* Expected results. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, int, 16, 4) [] = { 0xfff0, 0x5, 0xfff0, 0x5 }; -+VECT_VAR_DECL(expected, uint, 16, 4) [] = { 0x0, 0x5, 0x0, 0x5 }; -+VECT_VAR_DECL(expected, int, 16, 8) [] = { 0x0, 0x0, 0xf, 0xfff0, 0x0, -+ 0x0, 0xf, 0xfff0 }; -+VECT_VAR_DECL(expected, uint, 16, 8) [] = { 0x0, 0x0, 0xf, 0x0, -+ 0x0, 0x0, 0xf, 0x0 }; -+#endif -+ -+/* Expected results with rounding. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_rounding, int, 16, 4) [] = { 0xa, 0xa, 0xa, 0xa }; -+VECT_VAR_DECL(expected_rounding, uint, 16, 4) [] = { 0xa, 0xa, 0xa, 0xa }; -+VECT_VAR_DECL(expected_rounding, int, 16, 8) [] = { 0x7d, 0x7d, 0x7d, 0x7d, -+ 0x7d, 0x7d, 0x7d, 0x7d }; -+VECT_VAR_DECL(expected_rounding, uint, 16, 8) [] = { 0x7d, 0x7d, 0x7d, 0x7d, -+ 0x7d, 0x7d, 0x7d, 0x7d }; -+#endif -+ -+#define TEST_MSG "VCVTM/VCVTMQ" -+#define INSN_NAME vcvtm -+ -+#include "vcvtX.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_s16_f16_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 123.9, -56.8, 0.7, 24.6, -63.5, 169.4, -4.3, 77.0 }; -+int16_t expected[] = { 123, -57, 0, 24, -64, 169, -5, 77 }; -+ -+#define TEST_MSG "VCVTMH_S16_F16" -+#define INSN_NAME vcvtmh_s16_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE int16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_s32_f16_1.c -@@ -0,0 +1,53 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = -+{ -+ 0.0, -0.0, -+ 123.4, -567.8, -+ -34.8, 1024, -+ 663.1, 169.1, -+ -4.8, 77.0, -+ -144.5, -56.8, -+ -+ (float16_t) -16, (float16_t) -15, -+ (float16_t) -14, (float16_t) -13, -+}; -+ -+/* Expected results (32-bit hexadecimal representation). */ -+uint32_t expected[] = -+{ -+ 0x00000000, -+ 0x00000000, -+ 0x0000007b, -+ 0xfffffdc8, -+ 0xffffffdd, -+ 0x00000400, -+ 0x00000297, -+ 0x000000a9, -+ 0xfffffffb, -+ 0x0000004d, -+ 0xffffff6f, -+ 0xffffffc7, -+ 0xfffffff0, -+ 0xfffffff1, -+ 0xfffffff2, -+ 0xfffffff3 -+}; -+ -+#define TEST_MSG "VCVTMH_S32_F16" -+#define INSN_NAME vcvtmh_s32_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE int32_t -+#define OUTPUT_TYPE_SIZE 32 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_s64_f16_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 123.9, -56.8, 0.7, 24.6, -63.5, 169.4, -4.3, 77.0 }; -+int64_t expected[] = { 123, -57, 0, 24, -64, 169, -5, 77 }; -+ -+#define TEST_MSG "VCVTMH_S64_F16" -+#define INSN_NAME vcvtmh_s64_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE int64_t -+#define OUTPUT_TYPE_SIZE 64 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_u16_f16_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 123.9, 56.8, 0.7, 24.6, 63.5, 169.4, 4.3, 77.0 }; -+uint16_t expected[] = { 123, 56, 0, 24, 63, 169, 4, 77 }; -+ -+#define TEST_MSG "VCVTMH_u16_F16" -+#define INSN_NAME vcvtmh_u16_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_u32_f16_1.c -@@ -0,0 +1,53 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = -+{ -+ 0.0, -0.0, -+ 123.4, -567.8, -+ -34.8, 1024, -+ 663.1, 169.1, -+ -4.8, 77.0, -+ -144.5, -56.8, -+ -+ (float16_t) -16, (float16_t) -15, -+ (float16_t) -14, (float16_t) -13, -+}; -+ -+/* Expected results (32-bit hexadecimal representation). */ -+uint32_t expected[] = -+{ -+ 0x00000000, -+ 0x00000000, -+ 0x0000007b, -+ 0x00000000, -+ 0x00000000, -+ 0x00000400, -+ 0x00000297, -+ 0x000000a9, -+ 0x00000000, -+ 0x0000004d, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+}; -+ -+#define TEST_MSG "VCVTMH_U32_F16" -+#define INSN_NAME vcvtmh_u32_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint32_t -+#define OUTPUT_TYPE_SIZE 32 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtmh_u64_f16_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 123.9, 56.8, 0.7, 24.6, 63.5, 169.4, 4.3, 77.0 }; -+uint64_t expected[] = { 123, 56, 0, 24, 63, 169, 4, 77 }; -+ -+#define TEST_MSG "VCVTMH_u64_F16" -+#define INSN_NAME vcvtmh_u64_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint64_t -+#define OUTPUT_TYPE_SIZE 64 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_s16_f16_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 123.9, -56.8, 0.7, 24.6, -63.5, 169.4, -4.3, 77.0 }; -+int16_t expected[] = { 124, -57, 1, 25, -64, 169, -4, 77 }; -+ -+#define TEST_MSG "VCVTNH_S16_F16" -+#define INSN_NAME vcvtnh_s16_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE int16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_s32_f16_1.c -@@ -0,0 +1,53 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = -+{ -+ 0.0, -0.0, -+ 123.4, -567.8, -+ -34.8, 1024, -+ 663.1, 169.1, -+ -4.8, 77.0, -+ -144.5, -56.8, -+ -+ (float16_t) -16, (float16_t) -15, -+ (float16_t) -14, (float16_t) -13, -+}; -+ -+/* Expected results (32-bit hexadecimal representation). */ -+uint32_t expected[] = -+{ -+ 0x00000000, -+ 0x00000000, -+ 0x0000007b, -+ 0xfffffdc8, -+ 0xffffffdd, -+ 0x00000400, -+ 0x00000297, -+ 0x000000a9, -+ 0xfffffffb, -+ 0x0000004d, -+ 0xffffff70, -+ 0xffffffc7, -+ 0xfffffff0, -+ 0xfffffff1, -+ 0xfffffff2, -+ 0xfffffff3 -+}; -+ -+#define TEST_MSG "VCVTNH_S32_F16" -+#define INSN_NAME vcvtnh_s32_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE int32_t -+#define OUTPUT_TYPE_SIZE 32 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_s64_f16_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 123.9, -56.8, 0.7, 24.6, -63.5, 169.4, -4.3, 77.0 }; -+int64_t expected[] = { 124, -57, 1, 25, -64, 169, -4, 77 }; -+ -+#define TEST_MSG "VCVTNH_S64_F16" -+#define INSN_NAME vcvtnh_s64_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE int64_t -+#define OUTPUT_TYPE_SIZE 64 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_u16_f16_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 123.9, 56.8, 0.7, 24.6, 63.5, 169.4, 4.3, 77.0 }; -+uint16_t expected[] = { 124, 57, 1, 25, 64, 169, 4, 77 }; -+ -+#define TEST_MSG "VCVTNH_u16_F16" -+#define INSN_NAME vcvtnh_u16_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_u32_f16_1.c -@@ -0,0 +1,53 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = -+{ -+ 0.0, -0.0, -+ 123.4, -567.8, -+ -34.8, 1024, -+ 663.1, 169.1, -+ -4.8, 77.0, -+ -144.5, -56.8, -+ -+ (float16_t) -16, (float16_t) -15, -+ (float16_t) -14, (float16_t) -13, -+}; -+ -+/* Expected results (32-bit hexadecimal representation). */ -+uint32_t expected[] = -+{ -+ 0x00000000, -+ 0x00000000, -+ 0x0000007b, -+ 0x00000000, -+ 0x00000000, -+ 0x00000400, -+ 0x00000297, -+ 0x000000a9, -+ 0x00000000, -+ 0x0000004d, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+}; -+ -+#define TEST_MSG "VCVTNH_U32_F16" -+#define INSN_NAME vcvtnh_u32_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint32_t -+#define OUTPUT_TYPE_SIZE 32 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtnh_u64_f16_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 123.9, 56.8, 0.7, 24.6, 63.5, 169.4, 4.3, 77.0 }; -+uint64_t expected[] = { 124, 57, 1, 25, 64, 169, 4, 77 }; -+ -+#define TEST_MSG "VCVTNH_u64_F16" -+#define INSN_NAME vcvtnh_u64_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint64_t -+#define OUTPUT_TYPE_SIZE 64 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtp_1.c -@@ -0,0 +1,33 @@ -+/* This file tests an intrinsic which currently has only an f16 variant and that -+ is only available when FP16 arithmetic instructions are supported. */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+#include <math.h> -+ -+/* Expected results. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, int, 16, 4) [] = { 0xfff1, 0x6, 0xfff1, 0x6 }; -+VECT_VAR_DECL(expected, uint, 16, 4) [] = { 0x0, 0x6, 0x0, 0x6 }; -+VECT_VAR_DECL(expected, int, 16, 8) [] = { 0x0, 0x0, 0x10, 0xfff1, -+ 0x0, 0x0, 0x10, 0xfff1 }; -+VECT_VAR_DECL(expected, uint, 16, 8) [] = { 0x0, 0x0, 0x10, 0x0, -+ 0x0, 0x0, 0x10, 0x0 }; -+#endif -+ -+/* Expected results with rounding. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_rounding, int, 16, 4) [] = { 0xb, 0xb, 0xb, 0xb }; -+VECT_VAR_DECL(expected_rounding, uint, 16, 4) [] = { 0xb, 0xb, 0xb, 0xb }; -+VECT_VAR_DECL(expected_rounding, int, 16, 8) [] = { 0x7e, 0x7e, 0x7e, 0x7e, -+ 0x7e, 0x7e, 0x7e, 0x7e }; -+VECT_VAR_DECL(expected_rounding, uint, 16, 8) [] = { 0x7e, 0x7e, 0x7e, 0x7e, -+ 0x7e, 0x7e, 0x7e, 0x7e }; -+#endif -+ -+#define TEST_MSG "VCVTP/VCVTPQ" -+#define INSN_NAME vcvtp -+ -+#include "vcvtX.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_s16_f16_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 123.9, -56.8, 0.7, 24.6, -63.5, 169.4, -4.3, 77.0 }; -+int16_t expected[] = { 124, -56, 1, 25, -63, 170, -4, 77 }; -+ -+#define TEST_MSG "VCVTPH_S16_F16" -+#define INSN_NAME vcvtph_s16_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE int16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_s32_f16_1.c -@@ -0,0 +1,53 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = -+{ -+ 0.0, -0.0, -+ 123.4, -567.8, -+ -34.8, 1024, -+ 663.1, 169.1, -+ -4.8, 77.0, -+ -144.5, -56.8, -+ -+ (float16_t) -16, (float16_t) -15, -+ (float16_t) -14, (float16_t) -13, -+}; -+ -+/* Expected results (32-bit hexadecimal representation). */ -+uint32_t expected[] = -+{ -+ 0x00000000, -+ 0x00000000, -+ 0x0000007c, -+ 0xfffffdc8, -+ 0xffffffde, -+ 0x00000400, -+ 0x00000297, -+ 0x000000aa, -+ 0xfffffffc, -+ 0x0000004d, -+ 0xffffff70, -+ 0xffffffc8, -+ 0xfffffff0, -+ 0xfffffff1, -+ 0xfffffff2, -+ 0xfffffff3 -+}; -+ -+#define TEST_MSG "VCVTPH_S32_F16" -+#define INSN_NAME vcvtph_s32_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE int32_t -+#define OUTPUT_TYPE_SIZE 32 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_s64_f16_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 123.9, -56.8, 0.7, 24.6, -63.5, 169.4, -4.3, 77.0 }; -+int64_t expected[] = { 124, -56, 1, 25, -63, 170, -4, 77 }; -+ -+#define TEST_MSG "VCVTPH_S64_F16" -+#define INSN_NAME vcvtph_s64_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE int64_t -+#define OUTPUT_TYPE_SIZE 64 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_u16_f16_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 123.9, 56.8, 0.7, 24.6, 63.5, 169.4, 4.3, 77.0 }; -+uint16_t expected[] = { 124, 57, 1, 25, 64, 170, 5, 77 }; -+ -+#define TEST_MSG "VCVTPH_u16_F16" -+#define INSN_NAME vcvtph_u16_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_u32_f16_1.c -@@ -0,0 +1,53 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = -+{ -+ 0.0, -0.0, -+ 123.4, -567.8, -+ -34.8, 1024, -+ 663.1, 169.1, -+ -4.8, 77.0, -+ -144.5, -56.8, -+ -+ (float16_t) -16, (float16_t) -15, -+ (float16_t) -14, (float16_t) -13, -+}; -+ -+/* Expected results (32-bit hexadecimal representation). */ -+uint32_t expected[] = -+{ -+ 0x00000000, -+ 0x00000000, -+ 0x0000007c, -+ 0x00000000, -+ 0x00000000, -+ 0x00000400, -+ 0x00000297, -+ 0x000000aa, -+ 0x00000000, -+ 0x0000004d, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+ 0x00000000, -+}; -+ -+#define TEST_MSG "VCVTPH_U32_F16" -+#define INSN_NAME vcvtph_u32_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint32_t -+#define OUTPUT_TYPE_SIZE 32 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vcvtph_u64_f16_1.c -@@ -0,0 +1,23 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 123.9, 56.8, 0.7, 24.6, 63.5, 169.4, 4.3, 77.0 }; -+uint64_t expected[] = { 124, 57, 1, 25, 64, 170, 5, 77 }; -+ -+#define TEST_MSG "VCVTPH_u64_F16" -+#define INSN_NAME vcvtph_u64_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE uint64_t -+#define OUTPUT_TYPE_SIZE 64 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdiv_f16_1.c -@@ -0,0 +1,86 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_neon } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+#define FP16_C(a) ((__fp16) a) -+#define A FP16_C (13.4) -+#define B FP16_C (-56.8) -+#define C FP16_C (-34.8) -+#define D FP16_C (12) -+#define E FP16_C (63.1) -+#define F FP16_C (19.1) -+#define G FP16_C (-4.8) -+#define H FP16_C (77) -+ -+#define I FP16_C (0.7) -+#define J FP16_C (-78) -+#define K FP16_C (11.23) -+#define L FP16_C (98) -+#define M FP16_C (87.1) -+#define N FP16_C (-8) -+#define O FP16_C (-1.1) -+#define P FP16_C (-9.7) -+ -+/* Expected results for vdiv. */ -+VECT_VAR_DECL (expected_div_static, hfloat, 16, 4) [] -+ = { 0x32CC /* A / E. */, 0xC1F3 /* B / F. */, -+ 0x4740 /* C / G. */, 0x30FD /* D / H. */ }; -+ -+VECT_VAR_DECL (expected_div_static, hfloat, 16, 8) [] -+ = { 0x32CC /* A / E. */, 0xC1F3 /* B / F. */, -+ 0x4740 /* C / G. */, 0x30FD /* D / H. */, -+ 0x201D /* I / M. */, 0x48E0 /* J / N. */, -+ 0xC91B /* K / O. */, 0xC90D /* L / P. */ }; -+ -+void exec_vdiv_f16 (void) -+{ -+#undef TEST_MSG -+#define TEST_MSG "VDIV (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc_1, float, 16, 4); -+ DECL_VARIABLE(vsrc_2, float, 16, 4); -+ VECT_VAR_DECL (buf_src_1, float, 16, 4) [] = {A, B, C, D}; -+ VECT_VAR_DECL (buf_src_2, float, 16, 4) [] = {E, F, G, H}; -+ VLOAD (vsrc_1, buf_src_1, , float, f, 16, 4); -+ VLOAD (vsrc_2, buf_src_2, , float, f, 16, 4); -+ -+ DECL_VARIABLE (vector_res, float, 16, 4) -+ = vdiv_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4)); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_div_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VDIVQ (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc_1, float, 16, 8); -+ DECL_VARIABLE(vsrc_2, float, 16, 8); -+ VECT_VAR_DECL (buf_src_1, float, 16, 8) [] = {A, B, C, D, I, J, K, L}; -+ VECT_VAR_DECL (buf_src_2, float, 16, 8) [] = {E, F, G, H, M, N, O, P}; -+ VLOAD (vsrc_1, buf_src_1, q, float, f, 16, 8); -+ VLOAD (vsrc_2, buf_src_2, q, float, f, 16, 8); -+ -+ DECL_VARIABLE (vector_res, float, 16, 8) -+ = vdivq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8)); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_div_static, ""); -+} -+ -+int -+main (void) -+{ -+ exec_vdiv_f16 (); -+ return 0; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdivh_f16_1.c -@@ -0,0 +1,42 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+#define INFF __builtin_inf () -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected[] = -+{ -+ 0x0000 /* 0.000000 */, -+ 0x8000 /* -0.000000 */, -+ 0xb765 /* -0.462158 */, -+ 0x27ef /* 0.030991 */, -+ 0x3955 /* 0.666504 */, -+ 0xccff /* -19.984375 */, -+ 0xc49a /* -4.601562 */, -+ 0xb1e3 /* -0.183960 */, -+ 0x3cd3 /* 1.206055 */, -+ 0x23f0 /* 0.015503 */, -+ 0xa9ef /* -0.046356 */, -+ 0x32f4 /* 0.217285 */, -+ 0xb036 /* -0.131592 */, -+ 0x4126 /* 2.574219 */, -+ 0xcd15 /* -20.328125 */, -+ 0x537f /* 59.968750 */, -+ 0x7e00 /* nan */, -+ 0x7e00 /* nan */ -+}; -+ -+#define TEST_MSG "VDIVH_F16" -+#define INSN_NAME vdivh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "binary_scalar_op.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup-vmov.c -@@ -19,6 +19,10 @@ VECT_VAR_DECL(expected0,uint,64,1) [] = { 0xfffffffffffffff0 }; - VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf0, 0xf0, 0xf0, - 0xf0, 0xf0, 0xf0, 0xf0 }; - VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcc00, -+ 0xcc00, 0xcc00 }; -+#endif - VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 }; - VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, - 0xf0, 0xf0, 0xf0, 0xf0, -@@ -46,6 +50,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf0, 0xf0, 0xf0, - 0xf0, 0xf0, 0xf0, 0xf0 }; - VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff0, 0xfff0, 0xfff0, - 0xfff0, 0xfff0, 0xfff0, 0xfff0 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcc00, -+ 0xcc00, 0xcc00, -+ 0xcc00, 0xcc00, -+ 0xcc00, 0xcc00 }; -+#endif - VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1800000, - 0xc1800000, 0xc1800000 }; - -@@ -63,6 +73,10 @@ VECT_VAR_DECL(expected1,uint,64,1) [] = { 0xfffffffffffffff1 }; - VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf1, 0xf1, 0xf1, 0xf1, - 0xf1, 0xf1, 0xf1, 0xf1 }; - VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xcb80, -+ 0xcb80, 0xcb80 }; -+#endif - VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; - VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, - 0xf1, 0xf1, 0xf1, 0xf1, -@@ -90,6 +104,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf1, 0xf1, 0xf1, 0xf1, - 0xf1, 0xf1, 0xf1, 0xf1 }; - VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, - 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xcb80, 0xcb80, -+ 0xcb80, 0xcb80, -+ 0xcb80, 0xcb80, -+ 0xcb80, 0xcb80 }; -+#endif - VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1700000, - 0xc1700000, 0xc1700000 }; - -@@ -107,6 +127,10 @@ VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff2 }; - VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf2, 0xf2, 0xf2, 0xf2, - 0xf2, 0xf2, 0xf2, 0xf2 }; - VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb00, 0xcb00, -+ 0xcb00, 0xcb00 }; -+#endif - VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1600000, 0xc1600000 }; - VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, - 0xf2, 0xf2, 0xf2, 0xf2, -@@ -134,6 +158,12 @@ VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, - 0xf2, 0xf2, 0xf2, 0xf2 }; - VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2, - 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xcb00, 0xcb00, -+ 0xcb00, 0xcb00, -+ 0xcb00, 0xcb00, -+ 0xcb00, 0xcb00 }; -+#endif - VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0xc1600000, - 0xc1600000, 0xc1600000 }; - -@@ -171,6 +201,9 @@ void exec_vdup_vmov (void) - TEST_VDUP(, uint, u, 64, 1); - TEST_VDUP(, poly, p, 8, 8); - TEST_VDUP(, poly, p, 16, 4); -+#if defined (FP16_SUPPORTED) -+ TEST_VDUP(, float, f, 16, 4); -+#endif - TEST_VDUP(, float, f, 32, 2); - - TEST_VDUP(q, int, s, 8, 16); -@@ -183,8 +216,26 @@ void exec_vdup_vmov (void) - TEST_VDUP(q, uint, u, 64, 2); - TEST_VDUP(q, poly, p, 8, 16); - TEST_VDUP(q, poly, p, 16, 8); -+#if defined (FP16_SUPPORTED) -+ TEST_VDUP(q, float, f, 16, 8); -+#endif - TEST_VDUP(q, float, f, 32, 4); - -+#if defined (FP16_SUPPORTED) -+ switch (i) { -+ case 0: -+ CHECK_RESULTS_NAMED (TEST_MSG, expected0, ""); -+ break; -+ case 1: -+ CHECK_RESULTS_NAMED (TEST_MSG, expected1, ""); -+ break; -+ case 2: -+ CHECK_RESULTS_NAMED (TEST_MSG, expected2, ""); -+ break; -+ default: -+ abort(); -+ } -+#else - switch (i) { - case 0: - CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, ""); -@@ -198,6 +249,7 @@ void exec_vdup_vmov (void) - default: - abort(); - } -+#endif - } - - /* Do the same tests with vmov. Use the same expected results. */ -@@ -216,6 +268,9 @@ void exec_vdup_vmov (void) - TEST_VMOV(, uint, u, 64, 1); - TEST_VMOV(, poly, p, 8, 8); - TEST_VMOV(, poly, p, 16, 4); -+#if defined (FP16_SUPPORTED) -+ TEST_VMOV(, float, f, 16, 4); -+#endif - TEST_VMOV(, float, f, 32, 2); - - TEST_VMOV(q, int, s, 8, 16); -@@ -228,8 +283,26 @@ void exec_vdup_vmov (void) - TEST_VMOV(q, uint, u, 64, 2); - TEST_VMOV(q, poly, p, 8, 16); - TEST_VMOV(q, poly, p, 16, 8); -+#if defined (FP16_SUPPORTED) -+ TEST_VMOV(q, float, f, 16, 8); -+#endif - TEST_VMOV(q, float, f, 32, 4); - -+#if defined (FP16_SUPPORTED) -+ switch (i) { -+ case 0: -+ CHECK_RESULTS_NAMED (TEST_MSG, expected0, ""); -+ break; -+ case 1: -+ CHECK_RESULTS_NAMED (TEST_MSG, expected1, ""); -+ break; -+ case 2: -+ CHECK_RESULTS_NAMED (TEST_MSG, expected2, ""); -+ break; -+ default: -+ abort(); -+ } -+#else - switch (i) { - case 0: - CHECK_RESULTS_NAMED_NO_FP16 (TEST_MSG, expected0, ""); -@@ -243,6 +316,8 @@ void exec_vdup_vmov (void) - default: - abort(); - } -+#endif -+ - } - } - ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vdup_lane.c -@@ -17,6 +17,10 @@ VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf7, 0xf7, 0xf7, 0xf7, - 0xf7, 0xf7, 0xf7, 0xf7 }; - VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; - VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xca80, 0xca80, -+ 0xca80, 0xca80 }; -+#endif - VECT_VAR_DECL(expected,int,8,16) [] = { 0xf2, 0xf2, 0xf2, 0xf2, - 0xf2, 0xf2, 0xf2, 0xf2, - 0xf2, 0xf2, 0xf2, 0xf2, -@@ -43,10 +47,16 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5, - 0xf5, 0xf5, 0xf5, 0xf5 }; - VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, - 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xca80, 0xca80, -+ 0xca80, 0xca80, -+ 0xca80, 0xca80, -+ 0xca80, 0xca80 }; -+#endif - VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1700000, 0xc1700000, - 0xc1700000, 0xc1700000 }; - --#define TEST_MSG "VDUP_LANE/VDUP_LANEQ" -+#define TEST_MSG "VDUP_LANE/VDUPQ_LANE" - void exec_vdup_lane (void) - { - /* Basic test: vec1=vdup_lane(vec2, lane), then store the result. */ -@@ -63,6 +73,9 @@ void exec_vdup_lane (void) - clean_results (); - - TEST_MACRO_64BITS_VARIANTS_2_5(VLOAD, vector, buffer); -+#if defined (FP16_SUPPORTED) -+ VLOAD(vector, buffer, , float, f, 16, 4); -+#endif - VLOAD(vector, buffer, , float, f, 32, 2); - - /* Choose lane arbitrarily. */ -@@ -76,6 +89,9 @@ void exec_vdup_lane (void) - TEST_VDUP_LANE(, uint, u, 64, 1, 1, 0); - TEST_VDUP_LANE(, poly, p, 8, 8, 8, 7); - TEST_VDUP_LANE(, poly, p, 16, 4, 4, 3); -+#if defined (FP16_SUPPORTED) -+ TEST_VDUP_LANE(, float, f, 16, 4, 4, 3); -+#endif - TEST_VDUP_LANE(, float, f, 32, 2, 2, 1); - - TEST_VDUP_LANE(q, int, s, 8, 16, 8, 2); -@@ -88,9 +104,133 @@ void exec_vdup_lane (void) - TEST_VDUP_LANE(q, uint, u, 64, 2, 1, 0); - TEST_VDUP_LANE(q, poly, p, 8, 16, 8, 5); - TEST_VDUP_LANE(q, poly, p, 16, 8, 4, 1); -+#if defined (FP16_SUPPORTED) -+ TEST_VDUP_LANE(q, float, f, 16, 8, 4, 3); -+#endif - TEST_VDUP_LANE(q, float, f, 32, 4, 2, 1); - -+#if defined (FP16_SUPPORTED) -+ CHECK_RESULTS (TEST_MSG, ""); -+#else - CHECK_RESULTS_NO_FP16 (TEST_MSG, ""); -+#endif -+ -+#if defined (__aarch64__) -+ -+#undef TEST_MSG -+#define TEST_MSG "VDUP_LANEQ/VDUPQ_LANEQ" -+ -+ /* Expected results for vdup*_laneq tests. */ -+VECT_VAR_DECL(expected2,int,8,8) [] = { 0xfd, 0xfd, 0xfd, 0xfd, -+ 0xfd, 0xfd, 0xfd, 0xfd }; -+VECT_VAR_DECL(expected2,int,16,4) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; -+VECT_VAR_DECL(expected2,int,32,2) [] = { 0xfffffff1, 0xfffffff1 }; -+VECT_VAR_DECL(expected2,int,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(expected2,uint,8,8) [] = { 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected2,uint,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; -+VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 }; -+VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf7, 0xf7, 0xf7, 0xf7, -+ 0xf7, 0xf7, 0xf7, 0xf7 }; -+VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff3, 0xfff3, 0xfff3, 0xfff3 }; -+VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xca80, 0xca80, -+ 0xca80, 0xca80 }; -+#endif -+VECT_VAR_DECL(expected2,int,8,16) [] = { 0xfb, 0xfb, 0xfb, 0xfb, -+ 0xfb, 0xfb, 0xfb, 0xfb, -+ 0xfb, 0xfb, 0xfb, 0xfb, -+ 0xfb, 0xfb, 0xfb, 0xfb }; -+VECT_VAR_DECL(expected2,int,16,8) [] = { 0xfff7, 0xfff7, 0xfff7, 0xfff7, -+ 0xfff7, 0xfff7, 0xfff7, 0xfff7 }; -+VECT_VAR_DECL(expected2,int,32,4) [] = { 0xfffffff1, 0xfffffff1, -+ 0xfffffff1, 0xfffffff1 }; -+VECT_VAR_DECL(expected2,int,64,2) [] = { 0xfffffffffffffff0, -+ 0xfffffffffffffff0 }; -+VECT_VAR_DECL(expected2,uint,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5, -+ 0xf5, 0xf5, 0xf5, 0xf5, -+ 0xf5, 0xf5, 0xf5, 0xf5, -+ 0xf5, 0xf5, 0xf5, 0xf5 }; -+VECT_VAR_DECL(expected2,uint,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, -+ 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; -+VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xfffffff0, 0xfffffff0, -+ 0xfffffff0, 0xfffffff0 }; -+VECT_VAR_DECL(expected2,uint,64,2) [] = { 0xfffffffffffffff0, -+ 0xfffffffffffffff0 }; -+VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf5, 0xf5, 0xf5, 0xf5, -+ 0xf5, 0xf5, 0xf5, 0xf5, -+ 0xf5, 0xf5, 0xf5, 0xf5, -+ 0xf5, 0xf5, 0xf5, 0xf5 }; -+VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff1, 0xfff1, 0xfff1, 0xfff1, -+ 0xfff1, 0xfff1, 0xfff1, 0xfff1 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xc880, 0xc880, -+ 0xc880, 0xc880, -+ 0xc880, 0xc880, -+ 0xc880, 0xc880 }; -+#endif -+VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1700000, 0xc1700000, -+ 0xc1700000, 0xc1700000 }; -+ -+ /* Clean all results for vdup*_laneq tests. */ -+ clean_results (); -+ /* Basic test: vec1=vdup_lane(vec2, lane), then store the result. */ -+#define TEST_VDUP_LANEQ(Q, T1, T2, W, N, N2, L) \ -+ VECT_VAR(vector_res, T1, W, N) = \ -+ vdup##Q##_laneq_##T2##W(VECT_VAR(vector, T1, W, N2), L); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) -+ -+ /* Input vector can only have 64 bits. */ -+ DECL_VARIABLE_128BITS_VARIANTS(vector); -+ -+ clean_results (); -+ -+ TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vector, buffer); -+#if defined (FP16_SUPPORTED) -+ VLOAD(vector, buffer, q, float, f, 16, 8); -+#endif -+ VLOAD(vector, buffer, q, float, f, 32, 4); -+ -+ /* Choose lane arbitrarily. */ -+ TEST_VDUP_LANEQ(, int, s, 8, 8, 16, 13); -+ TEST_VDUP_LANEQ(, int, s, 16, 4, 8, 2); -+ TEST_VDUP_LANEQ(, int, s, 32, 2, 4, 1); -+ TEST_VDUP_LANEQ(, int, s, 64, 1, 2, 0); -+ TEST_VDUP_LANEQ(, uint, u, 8, 8, 16, 15); -+ TEST_VDUP_LANEQ(, uint, u, 16, 4, 8, 3); -+ TEST_VDUP_LANEQ(, uint, u, 32, 2, 4, 1); -+ TEST_VDUP_LANEQ(, uint, u, 64, 1, 2, 0); -+ TEST_VDUP_LANEQ(, poly, p, 8, 8, 16, 7); -+ TEST_VDUP_LANEQ(, poly, p, 16, 4, 8, 3); -+#if defined (FP16_SUPPORTED) -+ TEST_VDUP_LANEQ(, float, f, 16, 4, 8, 3); -+#endif -+ TEST_VDUP_LANEQ(, float, f, 32, 2, 4, 1); -+ -+ TEST_VDUP_LANEQ(q, int, s, 8, 16, 16, 11); -+ TEST_VDUP_LANEQ(q, int, s, 16, 8, 8, 7); -+ TEST_VDUP_LANEQ(q, int, s, 32, 4, 4, 1); -+ TEST_VDUP_LANEQ(q, int, s, 64, 2, 2, 0); -+ TEST_VDUP_LANEQ(q, uint, u, 8, 16, 16, 5); -+ TEST_VDUP_LANEQ(q, uint, u, 16, 8, 8, 1); -+ TEST_VDUP_LANEQ(q, uint, u, 32, 4, 4, 0); -+ TEST_VDUP_LANEQ(q, uint, u, 64, 2, 2, 0); -+ TEST_VDUP_LANEQ(q, poly, p, 8, 16, 16, 5); -+ TEST_VDUP_LANEQ(q, poly, p, 16, 8, 8, 1); -+#if defined (FP16_SUPPORTED) -+ TEST_VDUP_LANEQ(q, float, f, 16, 8, 8, 7); -+#endif -+ TEST_VDUP_LANEQ(q, float, f, 32, 4, 4, 1); -+ -+ CHECK_RESULTS_NAMED (TEST_MSG, expected2, ""); -+#if defined (FP16_SUPPORTED) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected2, ""); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected2, ""); -+#endif -+ -+#endif /* __aarch64__. */ - } - - int main (void) ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vduph_lane.c -@@ -0,0 +1,137 @@ -+/* { dg-do run } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+#define A -16 -+#define B -15 -+#define C -14 -+#define D -13 -+#define E -12 -+#define F -11 -+#define G -10 -+#define H -9 -+ -+#define F16_C(a) ((__fp16) a) -+#define AF F16_C (A) -+#define BF F16_C (B) -+#define CF F16_C (C) -+#define DF F16_C (D) -+#define EF F16_C (E) -+#define FF F16_C (F) -+#define GF F16_C (G) -+#define HF F16_C (H) -+ -+#define S16_C(a) ((int16_t) a) -+#define AS S16_C (A) -+#define BS S16_C (B) -+#define CS S16_C (C) -+#define DS S16_C (D) -+#define ES S16_C (E) -+#define FS S16_C (F) -+#define GS S16_C (G) -+#define HS S16_C (H) -+ -+#define U16_C(a) ((int16_t) a) -+#define AU U16_C (A) -+#define BU U16_C (B) -+#define CU U16_C (C) -+#define DU U16_C (D) -+#define EU U16_C (E) -+#define FU U16_C (F) -+#define GU U16_C (G) -+#define HU U16_C (H) -+ -+#define P16_C(a) ((poly16_t) a) -+#define AP P16_C (A) -+#define BP P16_C (B) -+#define CP P16_C (C) -+#define DP P16_C (D) -+#define EP P16_C (E) -+#define FP P16_C (F) -+#define GP P16_C (G) -+#define HP P16_C (H) -+ -+/* Expected results for vduph_lane. */ -+float16_t expected_f16 = AF; -+int16_t expected_s16 = DS; -+uint16_t expected_u16 = BU; -+poly16_t expected_p16 = CP; -+ -+/* Expected results for vduph_laneq. */ -+float16_t expected_q_f16 = EF; -+int16_t expected_q_s16 = BS; -+uint16_t expected_q_u16 = GU; -+poly16_t expected_q_p16 = FP; -+ -+void exec_vduph_lane_f16 (void) -+{ -+ /* vduph_lane. */ -+ DECL_VARIABLE(vsrc, float, 16, 4); -+ DECL_VARIABLE(vsrc, int, 16, 4); -+ DECL_VARIABLE(vsrc, uint, 16, 4); -+ DECL_VARIABLE(vsrc, poly, 16, 4); -+ VECT_VAR_DECL (buf_src, float, 16, 4) [] = {AF, BF, CF, DF}; -+ VECT_VAR_DECL (buf_src, int, 16, 4) [] = {AS, BS, CS, DS}; -+ VECT_VAR_DECL (buf_src, uint, 16, 4) [] = {AU, BU, CU, DU}; -+ VECT_VAR_DECL (buf_src, poly, 16, 4) [] = {AP, BP, CP, DP}; -+ VLOAD (vsrc, buf_src, , int, s, 16, 4); -+ VLOAD (vsrc, buf_src, , float, f, 16, 4); -+ VLOAD (vsrc, buf_src, , uint, u, 16, 4); -+ VLOAD (vsrc, buf_src, , poly, p, 16, 4); -+ -+ float16_t res_f = vduph_lane_f16 (VECT_VAR (vsrc, float, 16, 4), 0); -+ if (* (unsigned short *) &res_f != * (unsigned short *) &expected_f16) -+ abort (); -+ -+ int16_t res_s = vduph_lane_s16 (VECT_VAR (vsrc, int, 16, 4), 3); -+ if (* (unsigned short *) &res_s != * (unsigned short *) &expected_s16) -+ abort (); -+ -+ uint16_t res_u = vduph_lane_u16 (VECT_VAR (vsrc, uint, 16, 4), 1); -+ if (* (unsigned short *) &res_u != * (unsigned short *) &expected_u16) -+ abort (); -+ -+ poly16_t res_p = vduph_lane_p16 (VECT_VAR (vsrc, poly, 16, 4), 2); -+ if (* (unsigned short *) &res_p != * (unsigned short *) &expected_p16) -+ abort (); -+ -+ /* vduph_laneq. */ -+ DECL_VARIABLE(vsrc, float, 16, 8); -+ DECL_VARIABLE(vsrc, int, 16, 8); -+ DECL_VARIABLE(vsrc, uint, 16, 8); -+ DECL_VARIABLE(vsrc, poly, 16, 8); -+ VECT_VAR_DECL (buf_src, float, 16, 8) [] = {AF, BF, CF, DF, EF, FF, GF, HF}; -+ VECT_VAR_DECL (buf_src, int, 16, 8) [] = {AS, BS, CS, DS, ES, FS, GS, HS}; -+ VECT_VAR_DECL (buf_src, uint, 16, 8) [] = {AU, BU, CU, DU, EU, FU, GU, HU}; -+ VECT_VAR_DECL (buf_src, poly, 16, 8) [] = {AP, BP, CP, DP, EP, FP, GP, HP}; -+ VLOAD (vsrc, buf_src, q, int, s, 16, 8); -+ VLOAD (vsrc, buf_src, q, float, f, 16, 8); -+ VLOAD (vsrc, buf_src, q, uint, u, 16, 8); -+ VLOAD (vsrc, buf_src, q, poly, p, 16, 8); -+ -+ res_f = vduph_laneq_f16 (VECT_VAR (vsrc, float, 16, 8), 4); -+ if (* (unsigned short *) &res_f != * (unsigned short *) &expected_q_f16) -+ abort (); -+ -+ res_s = vduph_laneq_s16 (VECT_VAR (vsrc, int, 16, 8), 1); -+ if (* (unsigned short *) &res_s != * (unsigned short *) &expected_q_s16) -+ abort (); -+ -+ res_u = vduph_laneq_u16 (VECT_VAR (vsrc, uint, 16, 8), 6); -+ if (* (unsigned short *) &res_u != * (unsigned short *) &expected_q_u16) -+ abort (); -+ -+ res_p = vduph_laneq_p16 (VECT_VAR (vsrc, poly, 16, 8), 5); -+ if (* (unsigned short *) &res_p != * (unsigned short *) &expected_q_p16) -+ abort (); -+} -+ -+int -+main (void) -+{ -+ exec_vduph_lane_f16 (); -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vext.c -@@ -16,6 +16,10 @@ VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; - VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf6, 0xf7, 0x55, 0x55, - 0x55, 0x55, 0x55, 0x55 }; - VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcb00, 0xca80, -+ 0x4b4d, 0x4b4d }; -+#endif - VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0x42066666 }; - VECT_VAR_DECL(expected,int,8,16) [] = { 0xfe, 0xff, 0x11, 0x11, - 0x11, 0x11, 0x11, 0x11, -@@ -39,6 +43,12 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0xfc, 0xfd, 0xfe, 0xff, - 0x55, 0x55, 0x55, 0x55 }; - VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff6, 0xfff7, 0x66, 0x66, - 0x66, 0x66, 0x66, 0x66 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xc880, 0x4b4d, -+ 0x4b4d, 0x4b4d, -+ 0x4b4d, 0x4b4d, -+ 0x4b4d, 0x4b4d }; -+#endif - VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1500000, 0x4204cccd, - 0x4204cccd, 0x4204cccd }; - -@@ -60,6 +70,10 @@ void exec_vext (void) - clean_results (); - - TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer); -+#ifdef FP16_SUPPORTED -+ VLOAD(vector1, buffer, , float, f, 16, 4); -+ VLOAD(vector1, buffer, q, float, f, 16, 8); -+#endif - VLOAD(vector1, buffer, , float, f, 32, 2); - VLOAD(vector1, buffer, q, float, f, 32, 4); - -@@ -74,6 +88,9 @@ void exec_vext (void) - VDUP(vector2, , uint, u, 64, 1, 0x88); - VDUP(vector2, , poly, p, 8, 8, 0x55); - VDUP(vector2, , poly, p, 16, 4, 0x66); -+#if defined (FP16_SUPPORTED) -+ VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */ -+#endif - VDUP(vector2, , float, f, 32, 2, 33.6f); - - VDUP(vector2, q, int, s, 8, 16, 0x11); -@@ -86,6 +103,9 @@ void exec_vext (void) - VDUP(vector2, q, uint, u, 64, 2, 0x88); - VDUP(vector2, q, poly, p, 8, 16, 0x55); - VDUP(vector2, q, poly, p, 16, 8, 0x66); -+#if defined (FP16_SUPPORTED) -+ VDUP (vector2, q, float, f, 16, 8, 14.6f); -+#endif - VDUP(vector2, q, float, f, 32, 4, 33.2f); - - /* Choose arbitrary extract offsets. */ -@@ -99,6 +119,9 @@ void exec_vext (void) - TEST_VEXT(, uint, u, 64, 1, 0); - TEST_VEXT(, poly, p, 8, 8, 6); - TEST_VEXT(, poly, p, 16, 4, 2); -+#if defined (FP16_SUPPORTED) -+ TEST_VEXT(, float, f, 16, 4, 2); -+#endif - TEST_VEXT(, float, f, 32, 2, 1); - - TEST_VEXT(q, int, s, 8, 16, 14); -@@ -111,9 +134,16 @@ void exec_vext (void) - TEST_VEXT(q, uint, u, 64, 2, 1); - TEST_VEXT(q, poly, p, 8, 16, 12); - TEST_VEXT(q, poly, p, 16, 8, 6); -+#if defined (FP16_SUPPORTED) -+ TEST_VEXT(q, float, f, 16, 8, 7); -+#endif - TEST_VEXT(q, float, f, 32, 4, 3); - -+#if defined (FP16_SUPPORTED) -+ CHECK_RESULTS (TEST_MSG, ""); -+#else - CHECK_RESULTS_NO_FP16 (TEST_MSG, ""); -+#endif - } - - int main (void) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfma.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfma.c -@@ -3,11 +3,19 @@ - #include "compute-ref-data.h" - - #ifdef __ARM_FEATURE_FMA -+ - /* Expected results. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0x61c6, 0x61c8, 0x61ca, 0x61cc }; -+VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0x6435, 0x6436, 0x6437, 0x6438, -+ 0x6439, 0x643a, 0x643b, 0x643c }; -+#endif - VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x4438ca3d, 0x44390a3d }; --VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x44869eb8, 0x4486beb8, 0x4486deb8, 0x4486feb8 }; -+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x44869eb8, 0x4486beb8, -+ 0x4486deb8, 0x4486feb8 }; - #ifdef __aarch64__ --VECT_VAR_DECL(expected,hfloat,64,2) [] = { 0x408906e1532b8520, 0x40890ee1532b8520 }; -+VECT_VAR_DECL(expected,hfloat,64,2) [] = { 0x408906e1532b8520, -+ 0x40890ee1532b8520 }; - #endif - - #define TEST_MSG "VFMA/VFMAQ" -@@ -44,6 +52,18 @@ void exec_vfma (void) - DECL_VARIABLE(VAR, float, 32, 4); - #endif - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector1, float, 16, 4); -+ DECL_VARIABLE(vector2, float, 16, 4); -+ DECL_VARIABLE(vector3, float, 16, 4); -+ DECL_VARIABLE(vector_res, float, 16, 4); -+ -+ DECL_VARIABLE(vector1, float, 16, 8); -+ DECL_VARIABLE(vector2, float, 16, 8); -+ DECL_VARIABLE(vector3, float, 16, 8); -+ DECL_VARIABLE(vector_res, float, 16, 8); -+#endif -+ - DECL_VFMA_VAR(vector1); - DECL_VFMA_VAR(vector2); - DECL_VFMA_VAR(vector3); -@@ -52,6 +72,10 @@ void exec_vfma (void) - clean_results (); - - /* Initialize input "vector1" from "buffer". */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VLOAD(vector1, buffer, , float, f, 16, 4); -+ VLOAD(vector1, buffer, q, float, f, 16, 8); -+#endif - VLOAD(vector1, buffer, , float, f, 32, 2); - VLOAD(vector1, buffer, q, float, f, 32, 4); - #ifdef __aarch64__ -@@ -59,13 +83,21 @@ void exec_vfma (void) - #endif - - /* Choose init value arbitrarily. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector2, , float, f, 16, 4, 9.3f); -+ VDUP(vector2, q, float, f, 16, 8, 29.7f); -+#endif - VDUP(vector2, , float, f, 32, 2, 9.3f); - VDUP(vector2, q, float, f, 32, 4, 29.7f); - #ifdef __aarch64__ - VDUP(vector2, q, float, f, 64, 2, 15.8f); - #endif -- -+ - /* Choose init value arbitrarily. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector3, , float, f, 16, 4, 81.2f); -+ VDUP(vector3, q, float, f, 16, 8, 36.8f); -+#endif - VDUP(vector3, , float, f, 32, 2, 81.2f); - VDUP(vector3, q, float, f, 32, 4, 36.8f); - #ifdef __aarch64__ -@@ -73,12 +105,20 @@ void exec_vfma (void) - #endif - - /* Execute the tests. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VFMA(, float, f, 16, 4); -+ TEST_VFMA(q, float, f, 16, 8); -+#endif - TEST_VFMA(, float, f, 32, 2); - TEST_VFMA(q, float, f, 32, 4); - #ifdef __aarch64__ - TEST_VFMA(q, float, f, 64, 2); - #endif - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, ""); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, ""); -+#endif - CHECK_VFMA_RESULTS (TEST_MSG, ""); - } - #endif ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmah_f16_1.c -@@ -0,0 +1,40 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected[] = -+{ -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x3944 /* 0.658203 */, -+ 0xcefa /* -27.906250 */, -+ 0x5369 /* 59.281250 */, -+ 0x35ba /* 0.357910 */, -+ 0xc574 /* -5.453125 */, -+ 0xc5e6 /* -5.898438 */, -+ 0x3f66 /* 1.849609 */, -+ 0x5665 /* 102.312500 */, -+ 0xc02d /* -2.087891 */, -+ 0x4d79 /* 21.890625 */, -+ 0x547b /* 71.687500 */, -+ 0xcdf0 /* -23.750000 */, -+ 0xc625 /* -6.144531 */, -+ 0x4cf9 /* 19.890625 */, -+ 0x7e00 /* nan */, -+ 0x7e00 /* nan */ -+}; -+ -+#define TEST_MSG "VFMAH_F16" -+#define INSN_NAME vfmah_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "ternary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmas_lane_f16_1.c -@@ -0,0 +1,908 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_neon } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+#define FP16_C(a) ((__fp16) a) -+#define A0 FP16_C (123.4) -+#define A1 FP16_C (-5.8) -+#define A2 FP16_C (-0.0) -+#define A3 FP16_C (10) -+#define A4 FP16_C (123412.43) -+#define A5 FP16_C (-5.8) -+#define A6 FP16_C (90.8) -+#define A7 FP16_C (24) -+ -+#define B0 FP16_C (23.4) -+#define B1 FP16_C (-5.8) -+#define B2 FP16_C (8.9) -+#define B3 FP16_C (4.0) -+#define B4 FP16_C (3.4) -+#define B5 FP16_C (-550.8) -+#define B6 FP16_C (-31.8) -+#define B7 FP16_C (20000.0) -+ -+/* Expected results for vfma_lane. */ -+VECT_VAR_DECL (expected0_static, hfloat, 16, 4) [] -+ = { 0x613E /* A0 + B0 * B0. */, -+ 0xD86D /* A1 + B1 * B0. */, -+ 0x5A82 /* A2 + B2 * B0. */, -+ 0x567A /* A3 + B3 * B0. */}; -+ -+VECT_VAR_DECL (expected1_static, hfloat, 16, 4) [] -+ = { 0xCA33 /* A0 + B0 * B1. */, -+ 0x4EF6 /* A1 + B1 * B1. */, -+ 0xD274 /* A2 + B2 * B1. */, -+ 0xCA9A /* A3 + B3 * B1. */ }; -+ -+VECT_VAR_DECL (expected2_static, hfloat, 16, 4) [] -+ = { 0x5D2F /* A0 + B0 * B2. */, -+ 0xD32D /* A1 + B1 * B2. */, -+ 0x54F3 /* A2 + B2 * B2. */, -+ 0x51B3 /* A3 + B3 * B2. */ }; -+ -+VECT_VAR_DECL (expected3_static, hfloat, 16, 4) [] -+ = { 0x5AC8 /* A0 + B0 * B3. */, -+ 0xCF40 /* A1 + B1 * B3. */, -+ 0x5073 /* A2 + B2 * B3. */, -+ 0x4E80 /* A3 + B3 * B3. */ }; -+ -+/* Expected results for vfmaq_lane. */ -+VECT_VAR_DECL (expected0_static, hfloat, 16, 8) [] -+ = { 0x613E /* A0 + B0 * B0. */, -+ 0xD86D /* A1 + B1 * B0. */, -+ 0x5A82 /* A2 + B2 * B0. */, -+ 0x567A /* A3 + B3 * B0. */, -+ 0x7C00 /* A4 + B4 * B0. */, -+ 0xF24D /* A5 + B5 * B0. */, -+ 0xE11B /* A6 + B6 * B0. */, -+ 0x7C00 /* A7 + B7 * B0. */ }; -+ -+VECT_VAR_DECL (expected1_static, hfloat, 16, 8) [] -+ = { 0xCA33 /* A0 + B0 * B1. */, -+ 0x4EF6 /* A1 + B1 * B1. */, -+ 0xD274 /* A2 + B2 * B1. */, -+ 0xCA9A /* A3 + B3 * B1. */, -+ 0x7C00 /* A4 + B4 * B1. */, -+ 0x6A3B /* A5 + B5 * B1. */, -+ 0x5C4D /* A6 + B6 * B1. */, -+ 0xFC00 /* A7 + B7 * B1. */ }; -+ -+VECT_VAR_DECL (expected2_static, hfloat, 16, 8) [] -+ = { 0x5D2F /* A0 + B0 * B2. */, -+ 0xD32D /* A1 + B1 * B2. */, -+ 0x54F3 /* A2 + B2 * B2. */, -+ 0x51B3 /* A3 + B3 * B2. */, -+ 0x7C00 /* A4 + B4 * B2. */, -+ 0xECCB /* A5 + B5 * B2. */, -+ 0xDA01 /* A6 + B6 * B2. */, -+ 0x7C00 /* A7 + B7 * B2. */ }; -+ -+VECT_VAR_DECL (expected3_static, hfloat, 16, 8) [] -+ = { 0x5AC8 /* A0 + B0 * B3. */, -+ 0xCF40 /* A1 + B1 * B3. */, -+ 0x5073 /* A2 + B2 * B3. */, -+ 0x4E80 /* A3 + B3 * B3. */, -+ 0x7C00 /* A4 + B4 * B3. */, -+ 0xE851 /* A5 + B5 * B3. */, -+ 0xD08C /* A6 + B6 * B3. */, -+ 0x7C00 /* A7 + B7 * B3. */ }; -+ -+/* Expected results for vfma_laneq. */ -+VECT_VAR_DECL (expected0_laneq_static, hfloat, 16, 4) [] -+ = { 0x613E /* A0 + B0 * B0. */, -+ 0xD86D /* A1 + B1 * B0. */, -+ 0x5A82 /* A2 + B2 * B0. */, -+ 0x567A /* A3 + B3 * B0. */ }; -+ -+VECT_VAR_DECL (expected1_laneq_static, hfloat, 16, 4) [] -+ = { 0xCA33 /* A0 + B0 * B1. */, -+ 0x4EF6 /* A1 + B1 * B1. */, -+ 0xD274 /* A2 + B2 * B1. */, -+ 0xCA9A /* A3 + B3 * B1. */ }; -+ -+VECT_VAR_DECL (expected2_laneq_static, hfloat, 16, 4) [] -+ = { 0x5D2F /* A0 + B0 * B2. */, -+ 0xD32D /* A1 + B1 * B2. */, -+ 0x54F3 /* A2 + B2 * B2. */, -+ 0x51B3 /* A3 + B3 * B2. */ }; -+ -+VECT_VAR_DECL (expected3_laneq_static, hfloat, 16, 4) [] -+ = { 0x5AC8 /* A0 + B0 * B3. */, -+ 0xCF40 /* A1 + B1 * B3. */, -+ 0x5073 /* A2 + B2 * B3. */, -+ 0x4E80 /* A3 + B3 * B3. */ }; -+ -+VECT_VAR_DECL (expected4_laneq_static, hfloat, 16, 4) [] -+ = { 0x5A58 /* A0 + B0 * B4. */, -+ 0xCE62 /* A1 + B1 * B4. */, -+ 0x4F91 /* A2 + B2 * B4. */, -+ 0x4DE6 /* A3 + B3 * B4. */ }; -+ -+VECT_VAR_DECL (expected5_laneq_static, hfloat, 16, 4) [] -+ = { 0xF23D /* A0 + B0 * B5. */, -+ 0x6A3B /* A1 + B1 * B5. */, -+ 0xECCA /* A2 + B2 * B5. */, -+ 0xE849 /* A3 + B3 * B5. */ }; -+ -+VECT_VAR_DECL (expected6_laneq_static, hfloat, 16, 4) [] -+ = { 0xE0DA /* A0 + B0 * B6. */, -+ 0x5995 /* A1 + B1 * B6. */, -+ 0xDC6C /* A2 + B2 * B6. */, -+ 0xD753 /* A3 + B3 * B6. */ }; -+ -+VECT_VAR_DECL (expected7_laneq_static, hfloat, 16, 4) [] -+ = { 0x7C00 /* A0 + B0 * B7. */, -+ 0xFC00 /* A1 + B1 * B7. */, -+ 0x7C00 /* A2 + B2 * B7. */, -+ 0x7C00 /* A3 + B3 * B7. */ }; -+ -+/* Expected results for vfmaq_laneq. */ -+VECT_VAR_DECL (expected0_laneq_static, hfloat, 16, 8) [] -+ = { 0x613E /* A0 + B0 * B0. */, -+ 0xD86D /* A1 + B1 * B0. */, -+ 0x5A82 /* A2 + B2 * B0. */, -+ 0x567A /* A3 + B3 * B0. */, -+ 0x7C00 /* A4 + B4 * B0. */, -+ 0xF24D /* A5 + B5 * B0. */, -+ 0xE11B /* A6 + B6 * B0. */, -+ 0x7C00 /* A7 + B7 * B0. */ }; -+ -+VECT_VAR_DECL (expected1_laneq_static, hfloat, 16, 8) [] -+ = { 0xCA33 /* A0 + B0 * B1. */, -+ 0x4EF6 /* A1 + B1 * B1. */, -+ 0xD274 /* A2 + B2 * B1. */, -+ 0xCA9A /* A3 + B3 * B1. */, -+ 0x7C00 /* A4 + B4 * B1. */, -+ 0x6A3B /* A5 + B5 * B1. */, -+ 0x5C4D /* A6 + B6 * B1. */, -+ 0xFC00 /* A7 + B7 * B1. */ }; -+ -+VECT_VAR_DECL (expected2_laneq_static, hfloat, 16, 8) [] -+ = { 0x5D2F /* A0 + B0 * B2. */, -+ 0xD32D /* A1 + B1 * B2. */, -+ 0x54F3 /* A2 + B2 * B2. */, -+ 0x51B3 /* A3 + B3 * B2. */, -+ 0x7C00 /* A4 + B4 * B2. */, -+ 0xECCB /* A5 + B5 * B2. */, -+ 0xDA01 /* A6 + B6 * B2. */, -+ 0x7C00 /* A7 + B7 * B2. */ }; -+ -+VECT_VAR_DECL (expected3_laneq_static, hfloat, 16, 8) [] -+ = { 0x5AC8 /* A0 + B0 * B3. */, -+ 0xCF40 /* A1 + B1 * B3. */, -+ 0x5073 /* A2 + B2 * B3. */, -+ 0x4E80 /* A3 + B3 * B3. */, -+ 0x7C00 /* A4 + B4 * B3. */, -+ 0xE851 /* A5 + B5 * B3. */, -+ 0xD08C /* A6 + B6 * B3. */, -+ 0x7C00 /* A7 + B7 * B3. */ }; -+ -+VECT_VAR_DECL (expected4_laneq_static, hfloat, 16, 8) [] -+ = { 0x5A58 /* A0 + B0 * B4. */, -+ 0xCE62 /* A1 + B1 * B4. */, -+ 0x4F91 /* A2 + B2 * B4. */, -+ 0x4DE6 /* A3 + B3 * B4. */, -+ 0x7C00 /* A4 + B4 * B4. */, -+ 0xE757 /* A5 + B5 * B4. */, -+ 0xCC54 /* A6 + B6 * B4. */, -+ 0x7C00 /* A7 + B7 * B4. */ }; -+ -+VECT_VAR_DECL (expected5_laneq_static, hfloat, 16, 8) [] -+ = { 0xF23D /* A0 + B0 * B5. */, -+ 0x6A3B /* A1 + B1 * B5. */, -+ 0xECCA /* A2 + B2 * B5. */, -+ 0xE849 /* A3 + B3 * B5. */, -+ 0x7C00 /* A4 + B4 * B5. */, -+ 0x7C00 /* A5 + B5 * B5. */, -+ 0x744D /* A6 + B6 * B5. */, -+ 0xFC00 /* A7 + B7 * B5. */ }; -+ -+VECT_VAR_DECL (expected6_laneq_static, hfloat, 16, 8) [] -+ = { 0xE0DA /* A0 + B0 * B6. */, -+ 0x5995 /* A1 + B1 * B6. */, -+ 0xDC6C /* A2 + B2 * B6. */, -+ 0xD753 /* A3 + B3 * B6. */, -+ 0x7C00 /* A4 + B4 * B6. */, -+ 0x7447 /* A5 + B5 * B6. */, -+ 0x644E /* A6 + B6 * B6. */, -+ 0xFC00 /* A7 + B7 * B6. */ }; -+ -+VECT_VAR_DECL (expected7_laneq_static, hfloat, 16, 8) [] -+ = { 0x7C00 /* A0 + B0 * B7. */, -+ 0xFC00 /* A1 + B1 * B7. */, -+ 0x7C00 /* A2 + B2 * B7. */, -+ 0x7C00 /* A3 + B3 * B7. */, -+ 0x7C00 /* A4 + B4 * B7. */, -+ 0xFC00 /* A5 + B5 * B7. */, -+ 0xFC00 /* A6 + B6 * B7. */, -+ 0x7C00 /* A7 + B7 * B7. */ }; -+ -+/* Expected results for vfms_lane. */ -+VECT_VAR_DECL (expected0_fms_static, hfloat, 16, 4) [] -+ = { 0xDEA2 /* A0 + (-B0) * B0. */, -+ 0x5810 /* A1 + (-B1) * B0. */, -+ 0xDA82 /* A2 + (-B2) * B0. */, -+ 0xD53A /* A3 + (-B3) * B0. */ }; -+ -+VECT_VAR_DECL (expected1_fms_static, hfloat, 16, 4) [] -+ = { 0x5C0D /* A0 + (-B0) * B1. */, -+ 0xD0EE /* A1 + (-B1) * B1. */, -+ 0x5274 /* A2 + (-B2) * B1. */, -+ 0x5026 /* A3 + (-B3) * B1. */ }; -+ -+VECT_VAR_DECL (expected2_fms_static, hfloat, 16, 4) [] -+ = { 0xD54E /* A0 + (-B0) * B2. */, -+ 0x51BA /* A1 + (-B1) * B2. */, -+ 0xD4F3 /* A2 + (-B2) * B2. */, -+ 0xCE66 /* A3 + (-B3) * B2. */ }; -+ -+VECT_VAR_DECL (expected3_fms_static, hfloat, 16, 4) [] -+ = { 0x4F70 /* A0 + (-B0) * B3. */, -+ 0x4C5A /* A1 + (-B1) * B3. */, -+ 0xD073 /* A2 + (-B2) * B3. */, -+ 0xC600 /* A3 + (-B3) * B3. */ }; -+ -+/* Expected results for vfmsq_lane. */ -+VECT_VAR_DECL (expected0_fms_static, hfloat, 16, 8) [] -+ = { 0xDEA2 /* A0 + (-B0) * B0. */, -+ 0x5810 /* A1 + (-B1) * B0. */, -+ 0xDA82 /* A2 + (-B2) * B0. */, -+ 0xD53A /* A3 + (-B3) * B0. */, -+ 0x7C00 /* A4 + (-B4) * B0. */, -+ 0x724B /* A5 + (-B5) * B0. */, -+ 0x6286 /* A6 + (-B6) * B0. */, -+ 0xFC00 /* A7 + (-B7) * B0. */ }; -+ -+VECT_VAR_DECL (expected1_fms_static, hfloat, 16, 8) [] -+ = { 0x5C0D /* A0 + (-B0) * B1. */, -+ 0xD0EE /* A1 + (-B1) * B1. */, -+ 0x5274 /* A2 + (-B2) * B1. */, -+ 0x5026 /* A3 + (-B3) * B1. */, -+ 0x7C00 /* A4 + (-B4) * B1. */, -+ 0xEA41 /* A5 + (-B5) * B1. */, -+ 0xD5DA /* A6 + (-B6) * B1. */, -+ 0x7C00 /* A7 + (-B7) * B1. */ }; -+ -+VECT_VAR_DECL (expected2_fms_static, hfloat, 16, 8) [] -+ = { 0xD54E /* A0 + (-B0) * B2. */, -+ 0x51BA /* A1 + (-B1) * B2. */, -+ 0xD4F3 /* A2 + (-B2) * B2. */, -+ 0xCE66 /* A3 + (-B3) * B2. */, -+ 0x7C00 /* A4 + (-B4) * B2. */, -+ 0x6CC8 /* A5 + (-B5) * B2. */, -+ 0x5DD7 /* A6 + (-B6) * B2. */, -+ 0xFC00 /* A7 + (-B7) * B2. */ }; -+ -+VECT_VAR_DECL (expected3_fms_static, hfloat, 16, 8) [] -+ = { 0x4F70 /* A0 + (-B0) * B3. */, -+ 0x4C5A /* A1 + (-B1) * B3. */, -+ 0xD073 /* A2 + (-B2) * B3. */, -+ 0xC600 /* A3 + (-B3) * B3. */, -+ 0x7C00 /* A4 + (-B4) * B3. */, -+ 0x684B /* A5 + (-B5) * B3. */, -+ 0x5AD0 /* A6 + (-B6) * B3. */, -+ 0xFC00 /* A7 + (-B7) * B3. */ }; -+ -+/* Expected results for vfms_laneq. */ -+VECT_VAR_DECL (expected0_fms_laneq_static, hfloat, 16, 4) [] -+ = { 0xDEA2 /* A0 + (-B0) * B0. */, -+ 0x5810 /* A1 + (-B1) * B0. */, -+ 0xDA82 /* A2 + (-B2) * B0. */, -+ 0xD53A /* A3 + (-B3) * B0. */ }; -+ -+VECT_VAR_DECL (expected1_fms_laneq_static, hfloat, 16, 4) [] -+ = { 0x5C0D /* A0 + (-B0) * B1. */, -+ 0xD0EE /* A1 + (-B1) * B1. */, -+ 0x5274 /* A2 + (-B2) * B1. */, -+ 0x5026 /* A3 + (-B3) * B1. */ }; -+ -+VECT_VAR_DECL (expected2_fms_laneq_static, hfloat, 16, 4) [] -+ = { 0xD54E /* A0 + (-B0) * B2. */, -+ 0x51BA /* A1 + (-B1) * B2. */, -+ 0xD4F3 /* A2 + (-B2) * B2. */, -+ 0xCE66 /* A3 + (-B3) * B2. */ }; -+ -+VECT_VAR_DECL (expected3_fms_laneq_static, hfloat, 16, 4) [] -+ = { 0x4F70 /* A0 + (-B0) * B3. */, -+ 0x4C5A /* A1 + (-B1) * B3. */, -+ 0xD073 /* A2 + (-B2) * B3. */, -+ 0xC600 /* A3 + (-B3) * B3. */ }; -+ -+VECT_VAR_DECL (expected4_fms_laneq_static, hfloat, 16, 4) [] -+ = { 0x5179 /* A0 + (-B0) * B4. */, -+ 0x4AF6 /* A1 + (-B1) * B4. */, -+ 0xCF91 /* A2 + (-B2) * B4. */, -+ 0xC334 /* A3 + (-B3) * B4. */ }; -+ -+VECT_VAR_DECL (expected5_fms_laneq_static, hfloat, 16, 4) [] -+ = { 0x725C /* A0 + (-B0) * B5. */, -+ 0xEA41 /* A1 + (-B1) * B5. */, -+ 0x6CCA /* A2 + (-B2) * B5. */, -+ 0x6853 /* A3 + (-B3) * B5. */ }; -+ -+VECT_VAR_DECL (expected6_fms_laneq_static, hfloat, 16, 4) [] -+ = { 0x62C7 /* A0 + (-B0) * B6. */, -+ 0xD9F2 /* A1 + (-B1) * B6. */, -+ 0x5C6C /* A2 + (-B2) * B6. */, -+ 0x584A /* A3 + (-B3) * B6. */ }; -+ -+VECT_VAR_DECL (expected7_fms_laneq_static, hfloat, 16, 4) [] -+ = { 0xFC00 /* A0 + (-B0) * B7. */, -+ 0x7C00 /* A1 + (-B1) * B7. */, -+ 0xFC00 /* A2 + (-B2) * B7. */, -+ 0xFC00 /* A3 + (-B3) * B7. */ }; -+ -+/* Expected results for vfmsq_laneq. */ -+VECT_VAR_DECL (expected0_fms_laneq_static, hfloat, 16, 8) [] -+ = { 0xDEA2 /* A0 + (-B0) * B0. */, -+ 0x5810 /* A1 + (-B1) * B0. */, -+ 0xDA82 /* A2 + (-B2) * B0. */, -+ 0xD53A /* A3 + (-B3) * B0. */, -+ 0x7C00 /* A4 + (-B4) * B0. */, -+ 0x724B /* A5 + (-B5) * B0. */, -+ 0x6286 /* A6 + (-B6) * B0. */, -+ 0xFC00 /* A7 + (-B7) * B0. */ }; -+ -+VECT_VAR_DECL (expected1_fms_laneq_static, hfloat, 16, 8) [] -+ = { 0x5C0D /* A0 + (-B0) * B1. */, -+ 0xD0EE /* A1 + (-B1) * B1. */, -+ 0x5274 /* A2 + (-B2) * B1. */, -+ 0x5026 /* A3 + (-B3) * B1. */, -+ 0x7C00 /* A4 + (-B4) * B1. */, -+ 0xEA41 /* A5 + (-B5) * B1. */, -+ 0xD5DA /* A6 + (-B6) * B1. */, -+ 0x7C00 /* A7 + (-B7) * B1. */ }; -+ -+VECT_VAR_DECL (expected2_fms_laneq_static, hfloat, 16, 8) [] -+ = { 0xD54E /* A0 + (-B0) * B2. */, -+ 0x51BA /* A1 + (-B1) * B2. */, -+ 0xD4F3 /* A2 + (-B2) * B2. */, -+ 0xCE66 /* A3 + (-B3) * B2. */, -+ 0x7C00 /* A4 + (-B4) * B2. */, -+ 0x6CC8 /* A5 + (-B5) * B2. */, -+ 0x5DD7 /* A6 + (-B6) * B2. */, -+ 0xFC00 /* A7 + (-B7) * B2. */ }; -+ -+VECT_VAR_DECL (expected3_fms_laneq_static, hfloat, 16, 8) [] -+ = { 0x4F70 /* A0 + (-B0) * B3. */, -+ 0x4C5A /* A1 + (-B1) * B3. */, -+ 0xD073 /* A2 + (-B2) * B3. */, -+ 0xC600 /* A3 + (-B3) * B3. */, -+ 0x7C00 /* A4 + (-B4) * B3. */, -+ 0x684B /* A5 + (-B5) * B3. */, -+ 0x5AD0 /* A6 + (-B6) * B3. */, -+ 0xFC00 /* A7 + (-B7) * B3. */ }; -+ -+VECT_VAR_DECL (expected4_fms_laneq_static, hfloat, 16, 8) [] -+ = { 0x5179 /* A0 + (-B0) * B4. */, -+ 0x4AF6 /* A1 + (-B1) * B4. */, -+ 0xCF91 /* A2 + (-B2) * B4. */, -+ 0xC334 /* A3 + (-B3) * B4. */, -+ 0x7C00 /* A4 + (-B4) * B4. */, -+ 0x674C /* A5 + (-B5) * B4. */, -+ 0x5A37 /* A6 + (-B6) * B4. */, -+ 0xFC00 /* A7 + (-B7) * B4. */ }; -+ -+VECT_VAR_DECL (expected5_fms_laneq_static, hfloat, 16, 8) [] -+ = { 0x725C /* A0 + (-B0) * B5. */, -+ 0xEA41 /* A1 + (-B1) * B5. */, -+ 0x6CCA /* A2 + (-B2) * B5. */, -+ 0x6853 /* A3 + (-B3) * B5. */, -+ 0x7C00 /* A4 + (-B4) * B5. */, -+ 0xFC00 /* A5 + (-B5) * B5. */, -+ 0xF441 /* A6 + (-B6) * B5. */, -+ 0x7C00 /* A7 + (-B7) * B5. */ }; -+ -+VECT_VAR_DECL (expected6_fms_laneq_static, hfloat, 16, 8) [] -+ = { 0x62C7 /* A0 + (-B0) * B6. */, -+ 0xD9F2 /* A1 + (-B1) * B6. */, -+ 0x5C6C /* A2 + (-B2) * B6. */, -+ 0x584A /* A3 + (-B3) * B6. */, -+ 0x7C00 /* A4 + (-B4) * B6. */, -+ 0xF447 /* A5 + (-B5) * B6. */, -+ 0xE330 /* A6 + (-B6) * B6. */, -+ 0x7C00 /* A7 + (-B7) * B6. */ }; -+ -+VECT_VAR_DECL (expected7_fms_laneq_static, hfloat, 16, 8) [] -+ = { 0xFC00 /* A0 + (-B0) * B7. */, -+ 0x7C00 /* A1 + (-B1) * B7. */, -+ 0xFC00 /* A2 + (-B2) * B7. */, -+ 0xFC00 /* A3 + (-B3) * B7. */, -+ 0x7C00 /* A4 + (-B4) * B7. */, -+ 0x7C00 /* A5 + (-B5) * B7. */, -+ 0x7C00 /* A6 + (-B6) * B7. */, -+ 0xFC00 /* A7 + (-B7) * B7. */ }; -+ -+void exec_vfmas_lane_f16 (void) -+{ -+#undef TEST_MSG -+#define TEST_MSG "VFMA_LANE (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc_1, float, 16, 4); -+ DECL_VARIABLE(vsrc_2, float, 16, 4); -+ VECT_VAR_DECL (buf_src_1, float, 16, 4) [] = {A0, A1, A2, A3}; -+ VECT_VAR_DECL (buf_src_2, float, 16, 4) [] = {B0, B1, B2, B3}; -+ VLOAD (vsrc_1, buf_src_1, , float, f, 16, 4); -+ VLOAD (vsrc_2, buf_src_2, , float, f, 16, 4); -+ DECL_VARIABLE (vector_res, float, 16, 4) -+ = vfma_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), 0); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected0_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfma_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), 1); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected1_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfma_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), 2); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected2_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfma_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), 3); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected3_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VFMAQ_LANE (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc_1, float, 16, 8); -+ DECL_VARIABLE(vsrc_2, float, 16, 8); -+ VECT_VAR_DECL (buf_src_1, float, 16, 8) [] = {A0, A1, A2, A3, A4, A5, A6, A7}; -+ VECT_VAR_DECL (buf_src_2, float, 16, 8) [] = {B0, B1, B2, B3, B4, B5, B6, B7}; -+ VLOAD (vsrc_1, buf_src_1, q, float, f, 16, 8); -+ VLOAD (vsrc_2, buf_src_2, q, float, f, 16, 8); -+ DECL_VARIABLE (vector_res, float, 16, 8) -+ = vfmaq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 4), 0); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected0_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmaq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 4), 1); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected1_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmaq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 4), 2); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected2_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmaq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 4), 3); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected3_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VFMA_LANEQ (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc_3, float, 16, 8); -+ VECT_VAR_DECL (buf_src_3, float, 16, 8) [] = {B0, B1, B2, B3, B4, B5, B6, B7}; -+ VLOAD (vsrc_3, buf_src_3, q, float, f, 16, 8); -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfma_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_3, float, 16, 8), 0); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected0_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfma_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_3, float, 16, 8), 1); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected1_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfma_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_3, float, 16, 8), 2); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected2_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfma_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_3, float, 16, 8), 3); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected3_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfma_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_3, float, 16, 8), 4); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected4_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfma_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_3, float, 16, 8), 5); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected5_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfma_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_3, float, 16, 8), 6); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected6_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfma_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_3, float, 16, 8), 7); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected7_laneq_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VFMAQ_LANEQ (FP16)" -+ clean_results (); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmaq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_3, float, 16, 8), 0); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected0_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmaq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_3, float, 16, 8), 1); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected1_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmaq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_3, float, 16, 8), 2); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected2_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmaq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_3, float, 16, 8), 3); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected3_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmaq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_3, float, 16, 8), 4); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected4_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmaq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_3, float, 16, 8), 5); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected5_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmaq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_3, float, 16, 8), 6); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected6_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmaq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_3, float, 16, 8), 7); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected7_laneq_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VFMS_LANE (FP16)" -+ clean_results (); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfms_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), 0); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected0_fms_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfms_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), 1); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected1_fms_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfms_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), 2); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected2_fms_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfms_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), 3); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected3_fms_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VFMSQ_LANE (FP16)" -+ clean_results (); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmsq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 4), 0); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected0_fms_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmsq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 4), 1); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected1_fms_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmsq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 4), 2); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected2_fms_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmsq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 4), 3); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected3_fms_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VFMS_LANEQ (FP16)" -+ clean_results (); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfms_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_3, float, 16, 8), 0); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected0_fms_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfms_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_3, float, 16, 8), 1); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected1_fms_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfms_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_3, float, 16, 8), 2); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected2_fms_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfms_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_3, float, 16, 8), 3); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected3_fms_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfms_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_3, float, 16, 8), 4); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected4_fms_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfms_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_3, float, 16, 8), 5); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected5_fms_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfms_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_3, float, 16, 8), 6); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected6_fms_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfms_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), -+ VECT_VAR (vsrc_3, float, 16, 8), 7); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected7_fms_laneq_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VFMSQ_LANEQ (FP16)" -+ clean_results (); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmsq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_3, float, 16, 8), 0); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected0_fms_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmsq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_3, float, 16, 8), 1); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected1_fms_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmsq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_3, float, 16, 8), 2); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected2_fms_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmsq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_3, float, 16, 8), 3); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected3_fms_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmsq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_3, float, 16, 8), 4); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected4_fms_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmsq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_3, float, 16, 8), 5); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected5_fms_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmsq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_3, float, 16, 8), 6); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected6_fms_laneq_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmsq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), -+ VECT_VAR (vsrc_3, float, 16, 8), 7); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected7_fms_laneq_static, ""); -+} -+ -+int -+main (void) -+{ -+ exec_vfmas_lane_f16 (); -+ return 0; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmas_n_f16_1.c -@@ -0,0 +1,469 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_neon } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+#define FP16_C(a) ((__fp16) a) -+#define A0 FP16_C (123.4) -+#define A1 FP16_C (-5.8) -+#define A2 FP16_C (-0.0) -+#define A3 FP16_C (10) -+#define A4 FP16_C (123412.43) -+#define A5 FP16_C (-5.8) -+#define A6 FP16_C (90.8) -+#define A7 FP16_C (24) -+ -+#define B0 FP16_C (23.4) -+#define B1 FP16_C (-5.8) -+#define B2 FP16_C (8.9) -+#define B3 FP16_C (4.0) -+#define B4 FP16_C (3.4) -+#define B5 FP16_C (-550.8) -+#define B6 FP16_C (-31.8) -+#define B7 FP16_C (20000.0) -+ -+/* Expected results for vfma_n. */ -+VECT_VAR_DECL (expected_fma0_static, hfloat, 16, 4) [] -+ = { 0x613E /* A0 + B0 * B0. */, -+ 0xD86D /* A1 + B1 * B0. */, -+ 0x5A82 /* A2 + B2 * B0. */, -+ 0x567A /* A3 + B3 * B0. */ }; -+ -+VECT_VAR_DECL (expected_fma1_static, hfloat, 16, 4) [] -+ = { 0xCA33 /* A0 + B0 * B1. */, -+ 0x4EF6 /* A1 + B1 * B1. */, -+ 0xD274 /* A2 + B2 * B1. */, -+ 0xCA9A /* A3 + B3 * B1. */ }; -+ -+VECT_VAR_DECL (expected_fma2_static, hfloat, 16, 4) [] -+ = { 0x5D2F /* A0 + B0 * B2. */, -+ 0xD32D /* A1 + B1 * B2. */, -+ 0x54F3 /* A2 + B2 * B2. */, -+ 0x51B3 /* A3 + B3 * B2. */ }; -+ -+VECT_VAR_DECL (expected_fma3_static, hfloat, 16, 4) [] -+ = { 0x5AC8 /* A0 + B0 * B3. */, -+ 0xCF40 /* A1 + B1 * B3. */, -+ 0x5073 /* A2 + B2 * B3. */, -+ 0x4E80 /* A3 + B3 * B3. */ }; -+ -+VECT_VAR_DECL (expected_fma0_static, hfloat, 16, 8) [] -+ = { 0x613E /* A0 + B0 * B0. */, -+ 0xD86D /* A1 + B1 * B0. */, -+ 0x5A82 /* A2 + B2 * B0. */, -+ 0x567A /* A3 + B3 * B0. */, -+ 0x7C00 /* A4 + B4 * B0. */, -+ 0xF24D /* A5 + B5 * B0. */, -+ 0xE11B /* A6 + B6 * B0. */, -+ 0x7C00 /* A7 + B7 * B0. */ }; -+ -+VECT_VAR_DECL (expected_fma1_static, hfloat, 16, 8) [] -+ = { 0xCA33 /* A0 + B0 * B1. */, -+ 0x4EF6 /* A1 + B1 * B1. */, -+ 0xD274 /* A2 + B2 * B1. */, -+ 0xCA9A /* A3 + B3 * B1. */, -+ 0x7C00 /* A4 + B4 * B1. */, -+ 0x6A3B /* A5 + B5 * B1. */, -+ 0x5C4D /* A6 + B6 * B1. */, -+ 0xFC00 /* A7 + B7 * B1. */ }; -+ -+VECT_VAR_DECL (expected_fma2_static, hfloat, 16, 8) [] -+ = { 0x5D2F /* A0 + B0 * B2. */, -+ 0xD32D /* A1 + B1 * B2. */, -+ 0x54F3 /* A2 + B2 * B2. */, -+ 0x51B3 /* A3 + B3 * B2. */, -+ 0x7C00 /* A4 + B4 * B2. */, -+ 0xECCB /* A5 + B5 * B2. */, -+ 0xDA01 /* A6 + B6 * B2. */, -+ 0x7C00 /* A7 + B7 * B2. */ }; -+ -+VECT_VAR_DECL (expected_fma3_static, hfloat, 16, 8) [] -+ = { 0x5AC8 /* A0 + B0 * B3. */, -+ 0xCF40 /* A1 + B1 * B3. */, -+ 0x5073 /* A2 + B2 * B3. */, -+ 0x4E80 /* A3 + B3 * B3. */, -+ 0x7C00 /* A4 + B4 * B3. */, -+ 0xE851 /* A5 + B5 * B3. */, -+ 0xD08C /* A6 + B6 * B3. */, -+ 0x7C00 /* A7 + B7 * B3. */ }; -+ -+VECT_VAR_DECL (expected_fma4_static, hfloat, 16, 8) [] -+ = { 0x5A58 /* A0 + B0 * B4. */, -+ 0xCE62 /* A1 + B1 * B4. */, -+ 0x4F91 /* A2 + B2 * B4. */, -+ 0x4DE6 /* A3 + B3 * B4. */, -+ 0x7C00 /* A4 + B4 * B4. */, -+ 0xE757 /* A5 + B5 * B4. */, -+ 0xCC54 /* A6 + B6 * B4. */, -+ 0x7C00 /* A7 + B7 * B4. */ }; -+ -+VECT_VAR_DECL (expected_fma5_static, hfloat, 16, 8) [] -+ = { 0xF23D /* A0 + B0 * B5. */, -+ 0x6A3B /* A1 + B1 * B5. */, -+ 0xECCA /* A2 + B2 * B5. */, -+ 0xE849 /* A3 + B3 * B5. */, -+ 0x7C00 /* A4 + B4 * B5. */, -+ 0x7C00 /* A5 + B5 * B5. */, -+ 0x744D /* A6 + B6 * B5. */, -+ 0xFC00 /* A7 + B7 * B5. */ }; -+ -+VECT_VAR_DECL (expected_fma6_static, hfloat, 16, 8) [] -+ = { 0xE0DA /* A0 + B0 * B6. */, -+ 0x5995 /* A1 + B1 * B6. */, -+ 0xDC6C /* A2 + B2 * B6. */, -+ 0xD753 /* A3 + B3 * B6. */, -+ 0x7C00 /* A4 + B4 * B6. */, -+ 0x7447 /* A5 + B5 * B6. */, -+ 0x644E /* A6 + B6 * B6. */, -+ 0xFC00 /* A7 + B7 * B6. */ }; -+ -+VECT_VAR_DECL (expected_fma7_static, hfloat, 16, 8) [] -+ = { 0x7C00 /* A0 + B0 * B7. */, -+ 0xFC00 /* A1 + B1 * B7. */, -+ 0x7C00 /* A2 + B2 * B7. */, -+ 0x7C00 /* A3 + B3 * B7. */, -+ 0x7C00 /* A4 + B4 * B7. */, -+ 0xFC00 /* A5 + B5 * B7. */, -+ 0xFC00 /* A6 + B6 * B7. */, -+ 0x7C00 /* A7 + B7 * B7. */ }; -+ -+/* Expected results for vfms_n. */ -+VECT_VAR_DECL (expected_fms0_static, hfloat, 16, 4) [] -+ = { 0xDEA2 /* A0 + (-B0) * B0. */, -+ 0x5810 /* A1 + (-B1) * B0. */, -+ 0xDA82 /* A2 + (-B2) * B0. */, -+ 0xD53A /* A3 + (-B3) * B0. */ }; -+ -+VECT_VAR_DECL (expected_fms1_static, hfloat, 16, 4) [] -+ = { 0x5C0D /* A0 + (-B0) * B1. */, -+ 0xD0EE /* A1 + (-B1) * B1. */, -+ 0x5274 /* A2 + (-B2) * B1. */, -+ 0x5026 /* A3 + (-B3) * B1. */ }; -+ -+VECT_VAR_DECL (expected_fms2_static, hfloat, 16, 4) [] -+ = { 0xD54E /* A0 + (-B0) * B2. */, -+ 0x51BA /* A1 + (-B1) * B2. */, -+ 0xD4F3 /* A2 + (-B2) * B2. */, -+ 0xCE66 /* A3 + (-B3) * B2. */ }; -+ -+VECT_VAR_DECL (expected_fms3_static, hfloat, 16, 4) [] -+ = { 0x4F70 /* A0 + (-B0) * B3. */, -+ 0x4C5A /* A1 + (-B1) * B3. */, -+ 0xD073 /* A2 + (-B2) * B3. */, -+ 0xC600 /* A3 + (-B3) * B3. */ }; -+ -+VECT_VAR_DECL (expected_fms0_static, hfloat, 16, 8) [] -+ = { 0xDEA2 /* A0 + (-B0) * B0. */, -+ 0x5810 /* A1 + (-B1) * B0. */, -+ 0xDA82 /* A2 + (-B2) * B0. */, -+ 0xD53A /* A3 + (-B3) * B0. */, -+ 0x7C00 /* A4 + (-B4) * B0. */, -+ 0x724B /* A5 + (-B5) * B0. */, -+ 0x6286 /* A6 + (-B6) * B0. */, -+ 0xFC00 /* A7 + (-B7) * B0. */ }; -+ -+VECT_VAR_DECL (expected_fms1_static, hfloat, 16, 8) [] -+ = { 0x5C0D /* A0 + (-B0) * B1. */, -+ 0xD0EE /* A1 + (-B1) * B1. */, -+ 0x5274 /* A2 + (-B2) * B1. */, -+ 0x5026 /* A3 + (-B3) * B1. */, -+ 0x7C00 /* A4 + (-B4) * B1. */, -+ 0xEA41 /* A5 + (-B5) * B1. */, -+ 0xD5DA /* A6 + (-B6) * B1. */, -+ 0x7C00 /* A7 + (-B7) * B1. */ }; -+ -+VECT_VAR_DECL (expected_fms2_static, hfloat, 16, 8) [] -+ = { 0xD54E /* A0 + (-B0) * B2. */, -+ 0x51BA /* A1 + (-B1) * B2. */, -+ 0xD4F3 /* A2 + (-B2) * B2. */, -+ 0xCE66 /* A3 + (-B3) * B2. */, -+ 0x7C00 /* A4 + (-B4) * B2. */, -+ 0x6CC8 /* A5 + (-B5) * B2. */, -+ 0x5DD7 /* A6 + (-B6) * B2. */, -+ 0xFC00 /* A7 + (-B7) * B2. */ }; -+ -+VECT_VAR_DECL (expected_fms3_static, hfloat, 16, 8) [] -+ = { 0x4F70 /* A0 + (-B0) * B3. */, -+ 0x4C5A /* A1 + (-B1) * B3. */, -+ 0xD073 /* A2 + (-B2) * B3. */, -+ 0xC600 /* A3 + (-B3) * B3. */, -+ 0x7C00 /* A4 + (-B4) * B3. */, -+ 0x684B /* A5 + (-B5) * B3. */, -+ 0x5AD0 /* A6 + (-B6) * B3. */, -+ 0xFC00 /* A7 + (-B7) * B3. */ }; -+ -+VECT_VAR_DECL (expected_fms4_static, hfloat, 16, 8) [] -+ = { 0x5179 /* A0 + (-B0) * B4. */, -+ 0x4AF6 /* A1 + (-B1) * B4. */, -+ 0xCF91 /* A2 + (-B2) * B4. */, -+ 0xC334 /* A3 + (-B3) * B4. */, -+ 0x7C00 /* A4 + (-B4) * B4. */, -+ 0x674C /* A5 + (-B5) * B4. */, -+ 0x5A37 /* A6 + (-B6) * B4. */, -+ 0xFC00 /* A7 + (-B7) * B4. */ }; -+ -+VECT_VAR_DECL (expected_fms5_static, hfloat, 16, 8) [] -+ = { 0x725C /* A0 + (-B0) * B5. */, -+ 0xEA41 /* A1 + (-B1) * B5. */, -+ 0x6CCA /* A2 + (-B2) * B5. */, -+ 0x6853 /* A3 + (-B3) * B5. */, -+ 0x7C00 /* A4 + (-B4) * B5. */, -+ 0xFC00 /* A5 + (-B5) * B5. */, -+ 0xF441 /* A6 + (-B6) * B5. */, -+ 0x7C00 /* A7 + (-B7) * B5. */ }; -+ -+VECT_VAR_DECL (expected_fms6_static, hfloat, 16, 8) [] -+ = { 0x62C7 /* A0 + (-B0) * B6. */, -+ 0xD9F2 /* A1 + (-B1) * B6. */, -+ 0x5C6C /* A2 + (-B2) * B6. */, -+ 0x584A /* A3 + (-B3) * B6. */, -+ 0x7C00 /* A4 + (-B4) * B6. */, -+ 0xF447 /* A5 + (-B5) * B6. */, -+ 0xE330 /* A6 + (-B6) * B6. */, -+ 0x7C00 /* A7 + (-B7) * B6. */ }; -+ -+VECT_VAR_DECL (expected_fms7_static, hfloat, 16, 8) [] -+ = { 0xFC00 /* A0 + (-B0) * B7. */, -+ 0x7C00 /* A1 + (-B1) * B7. */, -+ 0xFC00 /* A2 + (-B2) * B7. */, -+ 0xFC00 /* A3 + (-B3) * B7. */, -+ 0x7C00 /* A4 + (-B4) * B7. */, -+ 0x7C00 /* A5 + (-B5) * B7. */, -+ 0x7C00 /* A6 + (-B6) * B7. */, -+ 0xFC00 /* A7 + (-B7) * B7. */ }; -+ -+void exec_vfmas_n_f16 (void) -+{ -+#undef TEST_MSG -+#define TEST_MSG "VFMA_N (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc_1, float, 16, 4); -+ DECL_VARIABLE(vsrc_2, float, 16, 4); -+ VECT_VAR_DECL (buf_src_1, float, 16, 4) [] = {A0, A1, A2, A3}; -+ VECT_VAR_DECL (buf_src_2, float, 16, 4) [] = {B0, B1, B2, B3}; -+ VLOAD (vsrc_1, buf_src_1, , float, f, 16, 4); -+ VLOAD (vsrc_2, buf_src_2, , float, f, 16, 4); -+ DECL_VARIABLE (vector_res, float, 16, 4) -+ = vfma_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), B0); -+ -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_fma0_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfma_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), B1); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_fma1_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfma_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), B2); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_fma2_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfma_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), B3); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_fma3_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VFMAQ_N (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc_1, float, 16, 8); -+ DECL_VARIABLE(vsrc_2, float, 16, 8); -+ VECT_VAR_DECL (buf_src_1, float, 16, 8) [] = {A0, A1, A2, A3, A4, A5, A6, A7}; -+ VECT_VAR_DECL (buf_src_2, float, 16, 8) [] = {B0, B1, B2, B3, B4, B5, B6, B7}; -+ VLOAD (vsrc_1, buf_src_1, q, float, f, 16, 8); -+ VLOAD (vsrc_2, buf_src_2, q, float, f, 16, 8); -+ DECL_VARIABLE (vector_res, float, 16, 8) -+ = vfmaq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), B0); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fma0_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmaq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), B1); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fma1_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmaq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), B2); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fma2_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmaq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), B3); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fma3_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmaq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), B4); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fma4_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmaq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), B5); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fma5_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmaq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), B6); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fma6_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmaq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), B7); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fma7_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VFMA_N (FP16)" -+ clean_results (); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfms_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), B0); -+ -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_fms0_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfms_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), B1); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_fms1_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfms_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), B2); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_fms2_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vfms_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), B3); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_fms3_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VFMAQ_N (FP16)" -+ clean_results (); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmsq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), B0); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fms0_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmsq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), B1); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fms1_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmsq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), B2); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fms2_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmsq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), B3); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fms3_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmsq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), B4); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fms4_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmsq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), B5); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fms5_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmsq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), B6); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fms6_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vfmsq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), B7); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_fms7_static, ""); -+} -+ -+int -+main (void) -+{ -+ exec_vfmas_n_f16 (); -+ return 0; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmash_lane_f16_1.c -@@ -0,0 +1,143 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_neon } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+#define FP16_C(a) ((__fp16) a) -+#define A0 FP16_C (123.4) -+#define B0 FP16_C (-5.8) -+#define C0 FP16_C (-3.8) -+#define D0 FP16_C (10) -+ -+#define A1 FP16_C (12.4) -+#define B1 FP16_C (-5.8) -+#define C1 FP16_C (90.8) -+#define D1 FP16_C (24) -+ -+#define A2 FP16_C (23.4) -+#define B2 FP16_C (-5.8) -+#define C2 FP16_C (8.9) -+#define D2 FP16_C (4) -+ -+#define E0 FP16_C (3.4) -+#define F0 FP16_C (-55.8) -+#define G0 FP16_C (-31.8) -+#define H0 FP16_C (2) -+ -+#define E1 FP16_C (123.4) -+#define F1 FP16_C (-5.8) -+#define G1 FP16_C (-3.8) -+#define H1 FP16_C (102) -+ -+#define E2 FP16_C (4.9) -+#define F2 FP16_C (-15.8) -+#define G2 FP16_C (39.8) -+#define H2 FP16_C (49) -+ -+extern void abort (); -+ -+float16_t src1[8] = { A0, B0, C0, D0, E0, F0, G0, H0 }; -+float16_t src2[8] = { A1, B1, C1, D1, E1, F1, G1, H1 }; -+VECT_VAR_DECL (src3, float, 16, 4) [] = { A2, B2, C2, D2 }; -+VECT_VAR_DECL (src3, float, 16, 8) [] = { A2, B2, C2, D2, E2, F2, G2, H2 }; -+ -+/* Expected results for vfmah_lane_f16. */ -+uint16_t expected[4] = { 0x5E76 /* A0 + A1 * A2. */, -+ 0x4EF6 /* B0 + B1 * B2. */, -+ 0x6249 /* C0 + C1 * C2. */, -+ 0x56A0 /* D0 + D1 * D2. */ }; -+ -+/* Expected results for vfmah_laneq_f16. */ -+uint16_t expected_laneq[8] = { 0x5E76 /* A0 + A1 * A2. */, -+ 0x4EF6 /* B0 + B1 * B2. */, -+ 0x6249 /* C0 + C1 * C2. */, -+ 0x56A0 /* D0 + D1 * D2. */, -+ 0x60BF /* E0 + E1 * E2. */, -+ 0x507A /* F0 + F1 * F2. */, -+ 0xD9B9 /* G0 + G1 * G2. */, -+ 0x6CE2 /* H0 + H1 * H2. */ }; -+ -+/* Expected results for vfmsh_lane_f16. */ -+uint16_t expected_fms[4] = { 0xD937 /* A0 + -A1 * A2. */, -+ 0xD0EE /* B0 + -B1 * B2. */, -+ 0xE258 /* C0 + -C1 * C2. */, -+ 0xD560 /* D0 + -D1 * D2. */ }; -+ -+/* Expected results for vfmsh_laneq_f16. */ -+uint16_t expected_fms_laneq[8] = { 0xD937 /* A0 + -A1 * A2. */, -+ 0xD0EE /* B0 + -B1 * B2. */, -+ 0xE258 /* C0 + -C1 * C2. */, -+ 0xD560 /* D0 + -D1 * D2. */, -+ 0xE0B2 /* E0 + -E1 * E2. */, -+ 0xD89C /* F0 + -F1 * F2. */, -+ 0x5778 /* G0 + -G1 * G2. */, -+ 0xECE1 /* H0 + -H1 * H2. */ }; -+ -+void exec_vfmash_lane_f16 (void) -+{ -+#define CHECK_LANE(N) \ -+ ret = vfmah_lane_f16 (src1[N], src2[N], VECT_VAR (vsrc3, float, 16, 4), N);\ -+ if (*(uint16_t *) &ret != expected[N])\ -+ abort (); -+ -+ DECL_VARIABLE(vsrc3, float, 16, 4); -+ VLOAD (vsrc3, src3, , float, f, 16, 4); -+ float16_t ret; -+ CHECK_LANE(0) -+ CHECK_LANE(1) -+ CHECK_LANE(2) -+ CHECK_LANE(3) -+ -+#undef CHECK_LANE -+#define CHECK_LANE(N) \ -+ ret = vfmah_laneq_f16 (src1[N], src2[N], VECT_VAR (vsrc3, float, 16, 8), N);\ -+ if (*(uint16_t *) &ret != expected_laneq[N]) \ -+ abort (); -+ -+ DECL_VARIABLE(vsrc3, float, 16, 8); -+ VLOAD (vsrc3, src3, q, float, f, 16, 8); -+ CHECK_LANE(0) -+ CHECK_LANE(1) -+ CHECK_LANE(2) -+ CHECK_LANE(3) -+ CHECK_LANE(4) -+ CHECK_LANE(5) -+ CHECK_LANE(6) -+ CHECK_LANE(7) -+ -+#undef CHECK_LANE -+#define CHECK_LANE(N) \ -+ ret = vfmsh_lane_f16 (src1[N], src2[N], VECT_VAR (vsrc3, float, 16, 4), N);\ -+ if (*(uint16_t *) &ret != expected_fms[N])\ -+ abort (); -+ -+ CHECK_LANE(0) -+ CHECK_LANE(1) -+ CHECK_LANE(2) -+ -+#undef CHECK_LANE -+#define CHECK_LANE(N) \ -+ ret = vfmsh_laneq_f16 (src1[N], src2[N], VECT_VAR (vsrc3, float, 16, 8), N);\ -+ if (*(uint16_t *) &ret != expected_fms_laneq[N]) \ -+ abort (); -+ -+ CHECK_LANE(0) -+ CHECK_LANE(1) -+ CHECK_LANE(2) -+ CHECK_LANE(3) -+ CHECK_LANE(4) -+ CHECK_LANE(5) -+ CHECK_LANE(6) -+ CHECK_LANE(7) -+} -+ -+int -+main (void) -+{ -+ exec_vfmash_lane_f16 (); -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfms.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfms.c -@@ -4,10 +4,17 @@ - - #ifdef __ARM_FEATURE_FMA - /* Expected results. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xe206, 0xe204, 0xe202, 0xe200 }; -+VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0xe455, 0xe454, 0xe453, 0xe452, -+ 0xe451, 0xe450, 0xe44f, 0xe44e }; -+#endif - VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc440ca3d, 0xc4408a3d }; --VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc48a9eb8, 0xc48a7eb8, 0xc48a5eb8, 0xc48a3eb8 }; -+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc48a9eb8, 0xc48a7eb8, -+ 0xc48a5eb8, 0xc48a3eb8 }; - #ifdef __aarch64__ --VECT_VAR_DECL(expected,hfloat,64,2) [] = { 0xc08a06e1532b8520, 0xc089fee1532b8520 }; -+VECT_VAR_DECL(expected,hfloat,64,2) [] = { 0xc08a06e1532b8520, -+ 0xc089fee1532b8520 }; - #endif - - #define TEST_MSG "VFMS/VFMSQ" -@@ -44,6 +51,18 @@ void exec_vfms (void) - DECL_VARIABLE(VAR, float, 32, 4); - #endif - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector1, float, 16, 4); -+ DECL_VARIABLE(vector2, float, 16, 4); -+ DECL_VARIABLE(vector3, float, 16, 4); -+ DECL_VARIABLE(vector_res, float, 16, 4); -+ -+ DECL_VARIABLE(vector1, float, 16, 8); -+ DECL_VARIABLE(vector2, float, 16, 8); -+ DECL_VARIABLE(vector3, float, 16, 8); -+ DECL_VARIABLE(vector_res, float, 16, 8); -+#endif -+ - DECL_VFMS_VAR(vector1); - DECL_VFMS_VAR(vector2); - DECL_VFMS_VAR(vector3); -@@ -52,6 +71,10 @@ void exec_vfms (void) - clean_results (); - - /* Initialize input "vector1" from "buffer". */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VLOAD(vector1, buffer, , float, f, 16, 4); -+ VLOAD(vector1, buffer, q, float, f, 16, 8); -+#endif - VLOAD(vector1, buffer, , float, f, 32, 2); - VLOAD(vector1, buffer, q, float, f, 32, 4); - #ifdef __aarch64__ -@@ -59,13 +82,21 @@ void exec_vfms (void) - #endif - - /* Choose init value arbitrarily. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector2, , float, f, 16, 4, 9.3f); -+ VDUP(vector2, q, float, f, 16, 8, 29.7f); -+#endif - VDUP(vector2, , float, f, 32, 2, 9.3f); - VDUP(vector2, q, float, f, 32, 4, 29.7f); - #ifdef __aarch64__ - VDUP(vector2, q, float, f, 64, 2, 15.8f); - #endif -- -+ - /* Choose init value arbitrarily. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector3, , float, f, 16, 4, 81.2f); -+ VDUP(vector3, q, float, f, 16, 8, 36.8f); -+#endif - VDUP(vector3, , float, f, 32, 2, 81.2f); - VDUP(vector3, q, float, f, 32, 4, 36.8f); - #ifdef __aarch64__ -@@ -73,12 +104,20 @@ void exec_vfms (void) - #endif - - /* Execute the tests. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VFMS(, float, f, 16, 4); -+ TEST_VFMS(q, float, f, 16, 8); -+#endif - TEST_VFMS(, float, f, 32, 2); - TEST_VFMS(q, float, f, 32, 4); - #ifdef __aarch64__ - TEST_VFMS(q, float, f, 64, 2); - #endif - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, ""); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, ""); -+#endif - CHECK_VFMS_RESULTS (TEST_MSG, ""); - } - #endif ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfms_vfma_n.c -@@ -0,0 +1,490 @@ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+#if defined(__aarch64__) && defined(__ARM_FEATURE_FMA) -+ -+#define A0 123.4f -+#define A1 -3.8f -+#define A2 -29.4f -+#define A3 (__builtin_inff ()) -+#define A4 0.0f -+#define A5 24.0f -+#define A6 124.0f -+#define A7 1024.0f -+ -+#define B0 -5.8f -+#define B1 -0.0f -+#define B2 -10.8f -+#define B3 10.0f -+#define B4 23.4f -+#define B5 -1234.8f -+#define B6 8.9f -+#define B7 4.0f -+ -+#define E0 9.8f -+#define E1 -1024.0f -+#define E2 (-__builtin_inff ()) -+#define E3 479.0f -+float32_t elem0 = E0; -+float32_t elem1 = E1; -+float32_t elem2 = E2; -+float32_t elem3 = E3; -+ -+#define DA0 1231234.4 -+#define DA1 -3.8 -+#define DA2 -2980.4 -+#define DA3 -5.8 -+#define DA4 0.01123 -+#define DA5 24.0 -+#define DA6 124.12345 -+#define DA7 1024.0 -+ -+#define DB0 -5.8 -+#define DB1 (__builtin_inf ()) -+#define DB2 -105.8 -+#define DB3 10.0 -+#define DB4 (-__builtin_inf ()) -+#define DB5 -1234.8 -+#define DB6 848.9 -+#define DB7 44444.0 -+ -+#define DE0 9.8 -+#define DE1 -1024.0 -+#define DE2 105.8 -+#define DE3 479.0 -+float64_t delem0 = DE0; -+float64_t delem1 = DE1; -+float64_t delem2 = DE2; -+float64_t delem3 = DE3; -+ -+/* Expected results for vfms_n. */ -+ -+VECT_VAR_DECL(expectedfms0, float, 32, 2) [] = {A0 + -B0 * E0, A1 + -B1 * E0}; -+VECT_VAR_DECL(expectedfms1, float, 32, 2) [] = {A2 + -B2 * E1, A3 + -B3 * E1}; -+VECT_VAR_DECL(expectedfms2, float, 32, 2) [] = {A4 + -B4 * E2, A5 + -B5 * E2}; -+VECT_VAR_DECL(expectedfms3, float, 32, 2) [] = {A6 + -B6 * E3, A7 + -B7 * E3}; -+VECT_VAR_DECL(expectedfma0, float, 32, 2) [] = {A0 + B0 * E0, A1 + B1 * E0}; -+VECT_VAR_DECL(expectedfma1, float, 32, 2) [] = {A2 + B2 * E1, A3 + B3 * E1}; -+VECT_VAR_DECL(expectedfma2, float, 32, 2) [] = {A4 + B4 * E2, A5 + B5 * E2}; -+VECT_VAR_DECL(expectedfma3, float, 32, 2) [] = {A6 + B6 * E3, A7 + B7 * E3}; -+ -+hfloat32_t * VECT_VAR (expectedfms0_static, hfloat, 32, 2) = -+ (hfloat32_t *) VECT_VAR (expectedfms0, float, 32, 2); -+hfloat32_t * VECT_VAR (expectedfms1_static, hfloat, 32, 2) = -+ (hfloat32_t *) VECT_VAR (expectedfms1, float, 32, 2); -+hfloat32_t * VECT_VAR (expectedfms2_static, hfloat, 32, 2) = -+ (hfloat32_t *) VECT_VAR (expectedfms2, float, 32, 2); -+hfloat32_t * VECT_VAR (expectedfms3_static, hfloat, 32, 2) = -+ (hfloat32_t *) VECT_VAR (expectedfms3, float, 32, 2); -+hfloat32_t * VECT_VAR (expectedfma0_static, hfloat, 32, 2) = -+ (hfloat32_t *) VECT_VAR (expectedfma0, float, 32, 2); -+hfloat32_t * VECT_VAR (expectedfma1_static, hfloat, 32, 2) = -+ (hfloat32_t *) VECT_VAR (expectedfma1, float, 32, 2); -+hfloat32_t * VECT_VAR (expectedfma2_static, hfloat, 32, 2) = -+ (hfloat32_t *) VECT_VAR (expectedfma2, float, 32, 2); -+hfloat32_t * VECT_VAR (expectedfma3_static, hfloat, 32, 2) = -+ (hfloat32_t *) VECT_VAR (expectedfma3, float, 32, 2); -+ -+ -+VECT_VAR_DECL(expectedfms0, float, 32, 4) [] = {A0 + -B0 * E0, A1 + -B1 * E0, -+ A2 + -B2 * E0, A3 + -B3 * E0}; -+VECT_VAR_DECL(expectedfms1, float, 32, 4) [] = {A4 + -B4 * E1, A5 + -B5 * E1, -+ A6 + -B6 * E1, A7 + -B7 * E1}; -+VECT_VAR_DECL(expectedfms2, float, 32, 4) [] = {A0 + -B0 * E2, A2 + -B2 * E2, -+ A4 + -B4 * E2, A6 + -B6 * E2}; -+VECT_VAR_DECL(expectedfms3, float, 32, 4) [] = {A1 + -B1 * E3, A3 + -B3 * E3, -+ A5 + -B5 * E3, A7 + -B7 * E3}; -+VECT_VAR_DECL(expectedfma0, float, 32, 4) [] = {A0 + B0 * E0, A1 + B1 * E0, -+ A2 + B2 * E0, A3 + B3 * E0}; -+VECT_VAR_DECL(expectedfma1, float, 32, 4) [] = {A4 + B4 * E1, A5 + B5 * E1, -+ A6 + B6 * E1, A7 + B7 * E1}; -+VECT_VAR_DECL(expectedfma2, float, 32, 4) [] = {A0 + B0 * E2, A2 + B2 * E2, -+ A4 + B4 * E2, A6 + B6 * E2}; -+VECT_VAR_DECL(expectedfma3, float, 32, 4) [] = {A1 + B1 * E3, A3 + B3 * E3, -+ A5 + B5 * E3, A7 + B7 * E3}; -+ -+hfloat32_t * VECT_VAR (expectedfms0_static, hfloat, 32, 4) = -+ (hfloat32_t *) VECT_VAR (expectedfms0, float, 32, 4); -+hfloat32_t * VECT_VAR (expectedfms1_static, hfloat, 32, 4) = -+ (hfloat32_t *) VECT_VAR (expectedfms1, float, 32, 4); -+hfloat32_t * VECT_VAR (expectedfms2_static, hfloat, 32, 4) = -+ (hfloat32_t *) VECT_VAR (expectedfms2, float, 32, 4); -+hfloat32_t * VECT_VAR (expectedfms3_static, hfloat, 32, 4) = -+ (hfloat32_t *) VECT_VAR (expectedfms3, float, 32, 4); -+hfloat32_t * VECT_VAR (expectedfma0_static, hfloat, 32, 4) = -+ (hfloat32_t *) VECT_VAR (expectedfma0, float, 32, 4); -+hfloat32_t * VECT_VAR (expectedfma1_static, hfloat, 32, 4) = -+ (hfloat32_t *) VECT_VAR (expectedfma1, float, 32, 4); -+hfloat32_t * VECT_VAR (expectedfma2_static, hfloat, 32, 4) = -+ (hfloat32_t *) VECT_VAR (expectedfma2, float, 32, 4); -+hfloat32_t * VECT_VAR (expectedfma3_static, hfloat, 32, 4) = -+ (hfloat32_t *) VECT_VAR (expectedfma3, float, 32, 4); -+ -+VECT_VAR_DECL(expectedfms0, float, 64, 2) [] = {DA0 + -DB0 * DE0, -+ DA1 + -DB1 * DE0}; -+VECT_VAR_DECL(expectedfms1, float, 64, 2) [] = {DA2 + -DB2 * DE1, -+ DA3 + -DB3 * DE1}; -+VECT_VAR_DECL(expectedfms2, float, 64, 2) [] = {DA4 + -DB4 * DE2, -+ DA5 + -DB5 * DE2}; -+VECT_VAR_DECL(expectedfms3, float, 64, 2) [] = {DA6 + -DB6 * DE3, -+ DA7 + -DB7 * DE3}; -+VECT_VAR_DECL(expectedfma0, float, 64, 2) [] = {DA0 + DB0 * DE0, -+ DA1 + DB1 * DE0}; -+VECT_VAR_DECL(expectedfma1, float, 64, 2) [] = {DA2 + DB2 * DE1, -+ DA3 + DB3 * DE1}; -+VECT_VAR_DECL(expectedfma2, float, 64, 2) [] = {DA4 + DB4 * DE2, -+ DA5 + DB5 * DE2}; -+VECT_VAR_DECL(expectedfma3, float, 64, 2) [] = {DA6 + DB6 * DE3, -+ DA7 + DB7 * DE3}; -+hfloat64_t * VECT_VAR (expectedfms0_static, hfloat, 64, 2) = -+ (hfloat64_t *) VECT_VAR (expectedfms0, float, 64, 2); -+hfloat64_t * VECT_VAR (expectedfms1_static, hfloat, 64, 2) = -+ (hfloat64_t *) VECT_VAR (expectedfms1, float, 64, 2); -+hfloat64_t * VECT_VAR (expectedfms2_static, hfloat, 64, 2) = -+ (hfloat64_t *) VECT_VAR (expectedfms2, float, 64, 2); -+hfloat64_t * VECT_VAR (expectedfms3_static, hfloat, 64, 2) = -+ (hfloat64_t *) VECT_VAR (expectedfms3, float, 64, 2); -+hfloat64_t * VECT_VAR (expectedfma0_static, hfloat, 64, 2) = -+ (hfloat64_t *) VECT_VAR (expectedfma0, float, 64, 2); -+hfloat64_t * VECT_VAR (expectedfma1_static, hfloat, 64, 2) = -+ (hfloat64_t *) VECT_VAR (expectedfma1, float, 64, 2); -+hfloat64_t * VECT_VAR (expectedfma2_static, hfloat, 64, 2) = -+ (hfloat64_t *) VECT_VAR (expectedfma2, float, 64, 2); -+hfloat64_t * VECT_VAR (expectedfma3_static, hfloat, 64, 2) = -+ (hfloat64_t *) VECT_VAR (expectedfma3, float, 64, 2); -+ -+VECT_VAR_DECL(expectedfms0, float, 64, 1) [] = {DA0 + -DB0 * DE0}; -+VECT_VAR_DECL(expectedfms1, float, 64, 1) [] = {DA2 + -DB2 * DE1}; -+VECT_VAR_DECL(expectedfms2, float, 64, 1) [] = {DA4 + -DB4 * DE2}; -+VECT_VAR_DECL(expectedfms3, float, 64, 1) [] = {DA6 + -DB6 * DE3}; -+VECT_VAR_DECL(expectedfma0, float, 64, 1) [] = {DA0 + DB0 * DE0}; -+VECT_VAR_DECL(expectedfma1, float, 64, 1) [] = {DA2 + DB2 * DE1}; -+VECT_VAR_DECL(expectedfma2, float, 64, 1) [] = {DA4 + DB4 * DE2}; -+VECT_VAR_DECL(expectedfma3, float, 64, 1) [] = {DA6 + DB6 * DE3}; -+ -+hfloat64_t * VECT_VAR (expectedfms0_static, hfloat, 64, 1) = -+ (hfloat64_t *) VECT_VAR (expectedfms0, float, 64, 1); -+hfloat64_t * VECT_VAR (expectedfms1_static, hfloat, 64, 1) = -+ (hfloat64_t *) VECT_VAR (expectedfms1, float, 64, 1); -+hfloat64_t * VECT_VAR (expectedfms2_static, hfloat, 64, 1) = -+ (hfloat64_t *) VECT_VAR (expectedfms2, float, 64, 1); -+hfloat64_t * VECT_VAR (expectedfms3_static, hfloat, 64, 1) = -+ (hfloat64_t *) VECT_VAR (expectedfms3, float, 64, 1); -+hfloat64_t * VECT_VAR (expectedfma0_static, hfloat, 64, 1) = -+ (hfloat64_t *) VECT_VAR (expectedfma0, float, 64, 1); -+hfloat64_t * VECT_VAR (expectedfma1_static, hfloat, 64, 1) = -+ (hfloat64_t *) VECT_VAR (expectedfma1, float, 64, 1); -+hfloat64_t * VECT_VAR (expectedfma2_static, hfloat, 64, 1) = -+ (hfloat64_t *) VECT_VAR (expectedfma2, float, 64, 1); -+hfloat64_t * VECT_VAR (expectedfma3_static, hfloat, 64, 1) = -+ (hfloat64_t *) VECT_VAR (expectedfma3, float, 64, 1); -+ -+void exec_vfma_vfms_n (void) -+{ -+#undef TEST_MSG -+#define TEST_MSG "VFMS_VFMA_N (FP32)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc_1, float, 32, 2); -+ DECL_VARIABLE(vsrc_2, float, 32, 2); -+ VECT_VAR_DECL (buf_src_1, float, 32, 2) [] = {A0, A1}; -+ VECT_VAR_DECL (buf_src_2, float, 32, 2) [] = {B0, B1}; -+ VLOAD (vsrc_1, buf_src_1, , float, f, 32, 2); -+ VLOAD (vsrc_2, buf_src_2, , float, f, 32, 2); -+ DECL_VARIABLE (vector_res, float, 32, 2) = -+ vfms_n_f32 (VECT_VAR (vsrc_1, float, 32, 2), -+ VECT_VAR (vsrc_2, float, 32, 2), elem0); -+ vst1_f32 (VECT_VAR (result, float, 32, 2), -+ VECT_VAR (vector_res, float, 32, 2)); -+ CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfms0_static, ""); -+ VECT_VAR (vector_res, float, 32, 2) = -+ vfma_n_f32 (VECT_VAR (vsrc_1, float, 32, 2), -+ VECT_VAR (vsrc_2, float, 32, 2), elem0); -+ vst1_f32 (VECT_VAR (result, float, 32, 2), -+ VECT_VAR (vector_res, float, 32, 2)); -+ CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfma0_static, ""); -+ -+ VECT_VAR_DECL (buf_src_3, float, 32, 2) [] = {A2, A3}; -+ VECT_VAR_DECL (buf_src_4, float, 32, 2) [] = {B2, B3}; -+ VLOAD (vsrc_1, buf_src_3, , float, f, 32, 2); -+ VLOAD (vsrc_2, buf_src_4, , float, f, 32, 2); -+ VECT_VAR (vector_res, float, 32, 2) = -+ vfms_n_f32 (VECT_VAR (vsrc_1, float, 32, 2), -+ VECT_VAR (vsrc_2, float, 32, 2), elem1); -+ vst1_f32 (VECT_VAR (result, float, 32, 2), -+ VECT_VAR (vector_res, float, 32, 2)); -+ CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfms1_static, ""); -+ VECT_VAR (vector_res, float, 32, 2) = -+ vfma_n_f32 (VECT_VAR (vsrc_1, float, 32, 2), -+ VECT_VAR (vsrc_2, float, 32, 2), elem1); -+ vst1_f32 (VECT_VAR (result, float, 32, 2), -+ VECT_VAR (vector_res, float, 32, 2)); -+ CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfma1_static, ""); -+ -+ VECT_VAR_DECL (buf_src_5, float, 32, 2) [] = {A4, A5}; -+ VECT_VAR_DECL (buf_src_6, float, 32, 2) [] = {B4, B5}; -+ VLOAD (vsrc_1, buf_src_5, , float, f, 32, 2); -+ VLOAD (vsrc_2, buf_src_6, , float, f, 32, 2); -+ VECT_VAR (vector_res, float, 32, 2) = -+ vfms_n_f32 (VECT_VAR (vsrc_1, float, 32, 2), -+ VECT_VAR (vsrc_2, float, 32, 2), elem2); -+ vst1_f32 (VECT_VAR (result, float, 32, 2), -+ VECT_VAR (vector_res, float, 32, 2)); -+ CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfms2_static, ""); -+ VECT_VAR (vector_res, float, 32, 2) = -+ vfma_n_f32 (VECT_VAR (vsrc_1, float, 32, 2), -+ VECT_VAR (vsrc_2, float, 32, 2), elem2); -+ vst1_f32 (VECT_VAR (result, float, 32, 2), -+ VECT_VAR (vector_res, float, 32, 2)); -+ CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfma2_static, ""); -+ -+ VECT_VAR_DECL (buf_src_7, float, 32, 2) [] = {A6, A7}; -+ VECT_VAR_DECL (buf_src_8, float, 32, 2) [] = {B6, B7}; -+ VLOAD (vsrc_1, buf_src_7, , float, f, 32, 2); -+ VLOAD (vsrc_2, buf_src_8, , float, f, 32, 2); -+ VECT_VAR (vector_res, float, 32, 2) = -+ vfms_n_f32 (VECT_VAR (vsrc_1, float, 32, 2), -+ VECT_VAR (vsrc_2, float, 32, 2), elem3); -+ vst1_f32 (VECT_VAR (result, float, 32, 2), -+ VECT_VAR (vector_res, float, 32, 2)); -+ CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfms3_static, ""); -+ VECT_VAR (vector_res, float, 32, 2) = -+ vfma_n_f32 (VECT_VAR (vsrc_1, float, 32, 2), -+ VECT_VAR (vsrc_2, float, 32, 2), elem3); -+ vst1_f32 (VECT_VAR (result, float, 32, 2), -+ VECT_VAR (vector_res, float, 32, 2)); -+ CHECK_FP (TEST_MSG, float, 32, 2, PRIx16, expectedfma3_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VFMSQ_VFMAQ_N (FP32)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc_1, float, 32, 4); -+ DECL_VARIABLE(vsrc_2, float, 32, 4); -+ VECT_VAR_DECL (buf_src_1, float, 32, 4) [] = {A0, A1, A2, A3}; -+ VECT_VAR_DECL (buf_src_2, float, 32, 4) [] = {B0, B1, B2, B3}; -+ VLOAD (vsrc_1, buf_src_1, q, float, f, 32, 4); -+ VLOAD (vsrc_2, buf_src_2, q, float, f, 32, 4); -+ DECL_VARIABLE (vector_res, float, 32, 4) = -+ vfmsq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4), -+ VECT_VAR (vsrc_2, float, 32, 4), elem0); -+ vst1q_f32 (VECT_VAR (result, float, 32, 4), -+ VECT_VAR (vector_res, float, 32, 4)); -+ CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfms0_static, ""); -+ VECT_VAR (vector_res, float, 32, 4) = -+ vfmaq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4), -+ VECT_VAR (vsrc_2, float, 32, 4), elem0); -+ vst1q_f32 (VECT_VAR (result, float, 32, 4), -+ VECT_VAR (vector_res, float, 32, 4)); -+ CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfma0_static, ""); -+ -+ VECT_VAR_DECL (buf_src_3, float, 32, 4) [] = {A4, A5, A6, A7}; -+ VECT_VAR_DECL (buf_src_4, float, 32, 4) [] = {B4, B5, B6, B7}; -+ VLOAD (vsrc_1, buf_src_3, q, float, f, 32, 4); -+ VLOAD (vsrc_2, buf_src_4, q, float, f, 32, 4); -+ VECT_VAR (vector_res, float, 32, 4) = -+ vfmsq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4), -+ VECT_VAR (vsrc_2, float, 32, 4), elem1); -+ vst1q_f32 (VECT_VAR (result, float, 32, 4), -+ VECT_VAR (vector_res, float, 32, 4)); -+ CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfms1_static, ""); -+ VECT_VAR (vector_res, float, 32, 4) = -+ vfmaq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4), -+ VECT_VAR (vsrc_2, float, 32, 4), elem1); -+ vst1q_f32 (VECT_VAR (result, float, 32, 4), -+ VECT_VAR (vector_res, float, 32, 4)); -+ CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfma1_static, ""); -+ -+ VECT_VAR_DECL (buf_src_5, float, 32, 4) [] = {A0, A2, A4, A6}; -+ VECT_VAR_DECL (buf_src_6, float, 32, 4) [] = {B0, B2, B4, B6}; -+ VLOAD (vsrc_1, buf_src_5, q, float, f, 32, 4); -+ VLOAD (vsrc_2, buf_src_6, q, float, f, 32, 4); -+ VECT_VAR (vector_res, float, 32, 4) = -+ vfmsq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4), -+ VECT_VAR (vsrc_2, float, 32, 4), elem2); -+ vst1q_f32 (VECT_VAR (result, float, 32, 4), -+ VECT_VAR (vector_res, float, 32, 4)); -+ CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfms2_static, ""); -+ VECT_VAR (vector_res, float, 32, 4) = -+ vfmaq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4), -+ VECT_VAR (vsrc_2, float, 32, 4), elem2); -+ vst1q_f32 (VECT_VAR (result, float, 32, 4), -+ VECT_VAR (vector_res, float, 32, 4)); -+ CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfma2_static, ""); -+ -+ VECT_VAR_DECL (buf_src_7, float, 32, 4) [] = {A1, A3, A5, A7}; -+ VECT_VAR_DECL (buf_src_8, float, 32, 4) [] = {B1, B3, B5, B7}; -+ VLOAD (vsrc_1, buf_src_7, q, float, f, 32, 4); -+ VLOAD (vsrc_2, buf_src_8, q, float, f, 32, 4); -+ VECT_VAR (vector_res, float, 32, 4) = -+ vfmsq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4), -+ VECT_VAR (vsrc_2, float, 32, 4), elem3); -+ vst1q_f32 (VECT_VAR (result, float, 32, 4), -+ VECT_VAR (vector_res, float, 32, 4)); -+ CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfms3_static, ""); -+ VECT_VAR (vector_res, float, 32, 4) = -+ vfmaq_n_f32 (VECT_VAR (vsrc_1, float, 32, 4), -+ VECT_VAR (vsrc_2, float, 32, 4), elem3); -+ vst1q_f32 (VECT_VAR (result, float, 32, 4), -+ VECT_VAR (vector_res, float, 32, 4)); -+ CHECK_FP (TEST_MSG, float, 32, 4, PRIx16, expectedfma3_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VFMSQ_VFMAQ_N (FP64)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc_1, float, 64, 2); -+ DECL_VARIABLE(vsrc_2, float, 64, 2); -+ VECT_VAR_DECL (buf_src_1, float, 64, 2) [] = {DA0, DA1}; -+ VECT_VAR_DECL (buf_src_2, float, 64, 2) [] = {DB0, DB1}; -+ VLOAD (vsrc_1, buf_src_1, q, float, f, 64, 2); -+ VLOAD (vsrc_2, buf_src_2, q, float, f, 64, 2); -+ DECL_VARIABLE (vector_res, float, 64, 2) = -+ vfmsq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2), -+ VECT_VAR (vsrc_2, float, 64, 2), delem0); -+ vst1q_f64 (VECT_VAR (result, float, 64, 2), -+ VECT_VAR (vector_res, float, 64, 2)); -+ CHECK_FP (TEST_MSG, float, 64, 2, PRIx64, expectedfms0_static, ""); -+ VECT_VAR (vector_res, float, 64, 2) = -+ vfmaq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2), -+ VECT_VAR (vsrc_2, float, 64, 2), delem0); -+ vst1q_f64 (VECT_VAR (result, float, 64, 2), -+ VECT_VAR (vector_res, float, 64, 2)); -+ CHECK_FP (TEST_MSG, float, 64, 2, PRIx64, expectedfma0_static, ""); -+ -+ VECT_VAR_DECL (buf_src_3, float, 64, 2) [] = {DA2, DA3}; -+ VECT_VAR_DECL (buf_src_4, float, 64, 2) [] = {DB2, DB3}; -+ VLOAD (vsrc_1, buf_src_3, q, float, f, 64, 2); -+ VLOAD (vsrc_2, buf_src_4, q, float, f, 64, 2); -+ VECT_VAR (vector_res, float, 64, 2) = -+ vfmsq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2), -+ VECT_VAR (vsrc_2, float, 64, 2), delem1); -+ vst1q_f64 (VECT_VAR (result, float, 64, 2), -+ VECT_VAR (vector_res, float, 64, 2)); -+ CHECK_FP (TEST_MSG, float, 64, 2, PRIx64, expectedfms1_static, ""); -+ VECT_VAR (vector_res, float, 64, 2) = -+ vfmaq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2), -+ VECT_VAR (vsrc_2, float, 64, 2), delem1); -+ vst1q_f64 (VECT_VAR (result, float, 64, 2), -+ VECT_VAR (vector_res, float, 64, 2)); -+ CHECK_FP (TEST_MSG, float, 64, 2, PRIx64, expectedfma1_static, ""); -+ -+ VECT_VAR_DECL (buf_src_5, float, 64, 2) [] = {DA4, DA5}; -+ VECT_VAR_DECL (buf_src_6, float, 64, 2) [] = {DB4, DB5}; -+ VLOAD (vsrc_1, buf_src_5, q, float, f, 64, 2); -+ VLOAD (vsrc_2, buf_src_6, q, float, f, 64, 2); -+ VECT_VAR (vector_res, float, 64, 2) = -+ vfmsq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2), -+ VECT_VAR (vsrc_2, float, 64, 2), delem2); -+ vst1q_f64 (VECT_VAR (result, float, 64, 2), -+ VECT_VAR (vector_res, float, 64, 2)); -+ CHECK_FP (TEST_MSG, float, 64, 2, PRIx64, expectedfms2_static, ""); -+ VECT_VAR (vector_res, float, 64, 2) = -+ vfmaq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2), -+ VECT_VAR (vsrc_2, float, 64, 2), delem2); -+ vst1q_f64 (VECT_VAR (result, float, 64, 2), -+ VECT_VAR (vector_res, float, 64, 2)); -+ CHECK_FP (TEST_MSG, float, 64, 2, PRIx64, expectedfma2_static, ""); -+ -+ VECT_VAR_DECL (buf_src_7, float, 64, 2) [] = {DA6, DA7}; -+ VECT_VAR_DECL (buf_src_8, float, 64, 2) [] = {DB6, DB7}; -+ VLOAD (vsrc_1, buf_src_7, q, float, f, 64, 2); -+ VLOAD (vsrc_2, buf_src_8, q, float, f, 64, 2); -+ VECT_VAR (vector_res, float, 64, 2) = -+ vfmsq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2), -+ VECT_VAR (vsrc_2, float, 64, 2), delem3); -+ vst1q_f64 (VECT_VAR (result, float, 64, 2), -+ VECT_VAR (vector_res, float, 64, 2)); -+ CHECK_FP (TEST_MSG, float, 64, 2, PRIx64, expectedfms3_static, ""); -+ VECT_VAR (vector_res, float, 64, 2) = -+ vfmaq_n_f64 (VECT_VAR (vsrc_1, float, 64, 2), -+ VECT_VAR (vsrc_2, float, 64, 2), delem3); -+ vst1q_f64 (VECT_VAR (result, float, 64, 2), -+ VECT_VAR (vector_res, float, 64, 2)); -+ CHECK_FP (TEST_MSG, float, 64, 2, PRIx64, expectedfma3_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VFMS_VFMA_N (FP64)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc_1, float, 64, 1); -+ DECL_VARIABLE(vsrc_2, float, 64, 1); -+ VECT_VAR_DECL (buf_src_1, float, 64, 1) [] = {DA0}; -+ VECT_VAR_DECL (buf_src_2, float, 64, 1) [] = {DB0}; -+ VLOAD (vsrc_1, buf_src_1, , float, f, 64, 1); -+ VLOAD (vsrc_2, buf_src_2, , float, f, 64, 1); -+ DECL_VARIABLE (vector_res, float, 64, 1) = -+ vfms_n_f64 (VECT_VAR (vsrc_1, float, 64, 1), -+ VECT_VAR (vsrc_2, float, 64, 1), delem0); -+ vst1_f64 (VECT_VAR (result, float, 64, 1), -+ VECT_VAR (vector_res, float, 64, 1)); -+ CHECK_FP (TEST_MSG, float, 64, 1, PRIx64, expectedfms0_static, ""); -+ VECT_VAR (vector_res, float, 64, 1) = -+ vfma_n_f64 (VECT_VAR (vsrc_1, float, 64, 1), -+ VECT_VAR (vsrc_2, float, 64, 1), delem0); -+ vst1_f64 (VECT_VAR (result, float, 64, 1), -+ VECT_VAR (vector_res, float, 64, 1)); -+ CHECK_FP (TEST_MSG, float, 64, 1, PRIx64, expectedfma0_static, ""); -+ -+ VECT_VAR_DECL (buf_src_3, float, 64, 1) [] = {DA2}; -+ VECT_VAR_DECL (buf_src_4, float, 64, 1) [] = {DB2}; -+ VLOAD (vsrc_1, buf_src_3, , float, f, 64, 1); -+ VLOAD (vsrc_2, buf_src_4, , float, f, 64, 1); -+ VECT_VAR (vector_res, float, 64, 1) = -+ vfms_n_f64 (VECT_VAR (vsrc_1, float, 64, 1), -+ VECT_VAR (vsrc_2, float, 64, 1), delem1); -+ vst1_f64 (VECT_VAR (result, float, 64, 1), -+ VECT_VAR (vector_res, float, 64, 1)); -+ CHECK_FP (TEST_MSG, float, 64, 1, PRIx64, expectedfms1_static, ""); -+ VECT_VAR (vector_res, float, 64, 1) = -+ vfma_n_f64 (VECT_VAR (vsrc_1, float, 64, 1), -+ VECT_VAR (vsrc_2, float, 64, 1), delem1); -+ vst1_f64 (VECT_VAR (result, float, 64, 1), -+ VECT_VAR (vector_res, float, 64, 1)); -+ CHECK_FP (TEST_MSG, float, 64, 1, PRIx64, expectedfma1_static, ""); -+ -+ VECT_VAR_DECL (buf_src_5, float, 64, 1) [] = {DA4}; -+ VECT_VAR_DECL (buf_src_6, float, 64, 1) [] = {DB4}; -+ VLOAD (vsrc_1, buf_src_5, , float, f, 64, 1); -+ VLOAD (vsrc_2, buf_src_6, , float, f, 64, 1); -+ VECT_VAR (vector_res, float, 64, 1) = -+ vfms_n_f64 (VECT_VAR (vsrc_1, float, 64, 1), -+ VECT_VAR (vsrc_2, float, 64, 1), delem2); -+ vst1_f64 (VECT_VAR (result, float, 64, 1), -+ VECT_VAR (vector_res, float, 64, 1)); -+ CHECK_FP (TEST_MSG, float, 64, 1, PRIx64, expectedfms2_static, ""); -+ VECT_VAR (vector_res, float, 64, 1) = -+ vfma_n_f64 (VECT_VAR (vsrc_1, float, 64, 1), -+ VECT_VAR (vsrc_2, float, 64, 1), delem2); -+ vst1_f64 (VECT_VAR (result, float, 64, 1), -+ VECT_VAR (vector_res, float, 64, 1)); -+ CHECK_FP (TEST_MSG, float, 64, 1, PRIx64, expectedfma2_static, ""); -+ -+ VECT_VAR_DECL (buf_src_7, float, 64, 1) [] = {DA6}; -+ VECT_VAR_DECL (buf_src_8, float, 64, 1) [] = {DB6}; -+ VLOAD (vsrc_1, buf_src_7, , float, f, 64, 1); -+ VLOAD (vsrc_2, buf_src_8, , float, f, 64, 1); -+ VECT_VAR (vector_res, float, 64, 1) = -+ vfms_n_f64 (VECT_VAR (vsrc_1, float, 64, 1), -+ VECT_VAR (vsrc_2, float, 64, 1), delem3); -+ vst1_f64 (VECT_VAR (result, float, 64, 1), -+ VECT_VAR (vector_res, float, 64, 1)); -+ CHECK_FP (TEST_MSG, float, 64, 1, PRIx64, expectedfms3_static, ""); -+ VECT_VAR (vector_res, float, 64, 1) = -+ vfma_n_f64 (VECT_VAR (vsrc_1, float, 64, 1), -+ VECT_VAR (vsrc_2, float, 64, 1), delem3); -+ vst1_f64 (VECT_VAR (result, float, 64, 1), -+ VECT_VAR (vector_res, float, 64, 1)); -+ CHECK_FP (TEST_MSG, float, 64, 1, PRIx64, expectedfma3_static, ""); -+} -+#endif -+ -+int -+main (void) -+{ -+#if defined(__aarch64__) && defined(__ARM_FEATURE_FMA) -+ exec_vfma_vfms_n (); -+#endif -+ return 0; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vfmsh_f16_1.c -@@ -0,0 +1,40 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected[] = -+{ -+ 0x0000 /* 0.000000 */, -+ 0x8000 /* -0.000000 */, -+ 0x42af /* 3.341797 */, -+ 0x5043 /* 34.093750 */, -+ 0xccd2 /* -19.281250 */, -+ 0x3712 /* 0.441895 */, -+ 0x3acc /* 0.849609 */, -+ 0x4848 /* 8.562500 */, -+ 0xcc43 /* -17.046875 */, -+ 0xd65c /* -101.750000 */, -+ 0x4185 /* 2.759766 */, -+ 0xcd39 /* -20.890625 */, -+ 0xd45b /* -69.687500 */, -+ 0x5241 /* 50.031250 */, -+ 0xc675 /* -6.457031 */, -+ 0x4d07 /* 20.109375 */, -+ 0x7c00 /* inf */, -+ 0xfc00 /* -inf */ -+}; -+ -+#define TEST_MSG "VFMSH_F16" -+#define INSN_NAME vfmsh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "ternary_scalar_op.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_high.c -@@ -63,8 +63,8 @@ void exec_vget_high (void) - CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); - CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, ""); -- CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected, ""); - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ""); - } - ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_lane.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_lane.c -@@ -13,6 +13,7 @@ uint32_t expected_u32 = 0xfffffff1; - uint64_t expected_u64 = 0xfffffffffffffff0; - poly8_t expected_p8 = 0xf6; - poly16_t expected_p16 = 0xfff2; -+hfloat16_t expected_f16 = 0xcb80; - hfloat32_t expected_f32 = 0xc1700000; - - int8_t expectedq_s8 = 0xff; -@@ -25,6 +26,7 @@ uint32_t expectedq_u32 = 0xfffffff2; - uint64_t expectedq_u64 = 0xfffffffffffffff1; - poly8_t expectedq_p8 = 0xfe; - poly16_t expectedq_p16 = 0xfff6; -+hfloat16_t expectedq_f16 = 0xca80; - hfloat32_t expectedq_f32 = 0xc1500000; - - int error_found = 0; -@@ -52,6 +54,12 @@ void exec_vget_lane (void) - uint32_t var_int32; - float32_t var_float32; - } var_int32_float32; -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ union { -+ uint16_t var_int16; -+ float16_t var_float16; -+ } var_int16_float16; -+#endif - - #define TEST_VGET_LANE_FP(Q, T1, T2, W, N, L) \ - VAR(var, T1, W) = vget##Q##_lane_##T2##W(VECT_VAR(vector, T1, W, N), L); \ -@@ -81,10 +89,17 @@ void exec_vget_lane (void) - VAR_DECL(var, uint, 64); - VAR_DECL(var, poly, 8); - VAR_DECL(var, poly, 16); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ VAR_DECL(var, float, 16); -+#endif - VAR_DECL(var, float, 32); - - /* Initialize input values. */ - TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ VLOAD(vector, buffer, , float, f, 16, 4); -+ VLOAD(vector, buffer, q, float, f, 16, 8); -+#endif - VLOAD(vector, buffer, , float, f, 32, 2); - VLOAD(vector, buffer, q, float, f, 32, 4); - -@@ -99,6 +114,9 @@ void exec_vget_lane (void) - TEST_VGET_LANE(, uint, u, 64, 1, 0); - TEST_VGET_LANE(, poly, p, 8, 8, 6); - TEST_VGET_LANE(, poly, p, 16, 4, 2); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VGET_LANE_FP(, float, f, 16, 4, 1); -+#endif - TEST_VGET_LANE_FP(, float, f, 32, 2, 1); - - TEST_VGET_LANE(q, int, s, 8, 16, 15); -@@ -111,6 +129,9 @@ void exec_vget_lane (void) - TEST_VGET_LANE(q, uint, u, 64, 2, 1); - TEST_VGET_LANE(q, poly, p, 8, 16, 14); - TEST_VGET_LANE(q, poly, p, 16, 8, 6); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VGET_LANE_FP(q, float, f, 16, 8, 3); -+#endif - TEST_VGET_LANE_FP(q, float, f, 32, 4, 3); - } - ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vget_low.c -@@ -63,8 +63,8 @@ void exec_vget_low (void) - CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); - CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, ""); -- CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected, ""); - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) - CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, ""); - #endif ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f16_indices_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2_lane_f16_indices_1.c -@@ -2,6 +2,7 @@ - - /* { dg-do compile } */ - /* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+/* { dg-require-effective-target arm_neon_fp16_ok { target { arm*-*-* } } } */ - - float16x4x2_t - f_vld2_lane_f16 (float16_t * p, float16x4x2_t v) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f16_indices_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld2q_lane_f16_indices_1.c -@@ -2,6 +2,7 @@ - - /* { dg-do compile } */ - /* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+/* { dg-require-effective-target arm_neon_fp16_ok { target { arm*-*-* } } } */ - - float16x8x2_t - f_vld2q_lane_f16 (float16_t * p, float16x8x2_t v) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f16_indices_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3_lane_f16_indices_1.c -@@ -2,6 +2,7 @@ - - /* { dg-do compile } */ - /* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+/* { dg-require-effective-target arm_neon_fp16_ok { target { arm*-*-* } } } */ - - float16x4x3_t - f_vld3_lane_f16 (float16_t * p, float16x4x3_t v) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f16_indices_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld3q_lane_f16_indices_1.c -@@ -2,6 +2,7 @@ - - /* { dg-do compile } */ - /* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+/* { dg-require-effective-target arm_neon_fp16_ok { target { arm*-*-* } } } */ - - float16x8x3_t - f_vld3q_lane_f16 (float16_t * p, float16x8x3_t v) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f16_indices_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4_lane_f16_indices_1.c -@@ -2,6 +2,7 @@ - - /* { dg-do compile } */ - /* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+/* { dg-require-effective-target arm_neon_fp16_ok { target { arm*-*-* } } } */ - - float16x4x4_t - f_vld4_lane_f16 (float16_t * p, float16x4x4_t v) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f16_indices_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vld4q_lane_f16_indices_1.c -@@ -2,6 +2,7 @@ - - /* { dg-do compile } */ - /* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+/* { dg-require-effective-target arm_neon_fp16_ok { target { arm*-*-* } } } */ - - float16x8x4_t - f_vld4q_lane_f16 (float16_t * p, float16x8x4_t v) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX.c -@@ -528,8 +528,8 @@ void exec_vldX (void) - CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment); \ - CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \ - CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \ -- CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \ -- CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \ -+ CHECK_POLY(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \ -+ CHECK_POLY(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \ - CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \ - \ - CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment); \ -@@ -538,8 +538,8 @@ void exec_vldX (void) - CHECK(test_name, uint, 8, 16, PRIx8, EXPECTED, comment); \ - CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment); \ - CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \ -- CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \ -- CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \ -+ CHECK_POLY(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \ -+ CHECK_POLY(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \ - CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment) - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c -@@ -270,8 +270,8 @@ void exec_vldX_dup (void) - CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment); \ - CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \ - CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment); \ -- CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \ -- CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \ -+ CHECK_POLY(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \ -+ CHECK_POLY(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \ - CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment) - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vldX_lane.c -@@ -451,14 +451,14 @@ void exec_vldX_lane (void) - CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment); \ - CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment); \ - CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \ -- CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \ -- CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \ -+ CHECK_POLY(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \ -+ CHECK_POLY(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \ - CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \ - CHECK(test_name, int, 16, 8, PRIx16, EXPECTED, comment); \ - CHECK(test_name, int, 32, 4, PRIx32, EXPECTED, comment); \ - CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment); \ - CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \ -- CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \ -+ CHECK_POLY(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \ - CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment) - - #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmax.c -@@ -7,6 +7,10 @@ - - #define HAS_FLOAT_VARIANT - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+#define HAS_FLOAT16_VARIANT -+#endif -+ - /* Expected results. */ - VECT_VAR_DECL(expected,int,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3, - 0xf4, 0xf5, 0xf6, 0xf7 }; -@@ -16,6 +20,9 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf3, 0xf3, 0xf3, 0xf3, - 0xf4, 0xf5, 0xf6, 0xf7 }; - VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff1, 0xfff2, 0xfff3 }; - VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xcbc0, 0xcb80, 0xcb00, 0xca80 }; -+#endif - VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1780000, 0xc1700000 }; - VECT_VAR_DECL(expected,int,8,16) [] = { 0xf4, 0xf4, 0xf4, 0xf4, - 0xf4, 0xf5, 0xf6, 0xf7, -@@ -33,10 +40,36 @@ VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff2, 0xfff2, 0xfff2, 0xfff3, - 0xfff4, 0xfff5, 0xfff6, 0xfff7 }; - VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff1, 0xfffffff1, - 0xfffffff2, 0xfffffff3 }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0xcb40, 0xcb40, 0xcb00, 0xca80, -+ 0xca00, 0xc980, 0xc900, 0xc880 }; -+#endif - VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1680000, 0xc1680000, - 0xc1600000, 0xc1500000 }; - - /* Expected results with special FP values. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_nan, hfloat, 16, 8) [] = { 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00 }; -+VECT_VAR_DECL(expected_mnan, hfloat, 16, 8) [] = { 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00 }; -+VECT_VAR_DECL(expected_inf, hfloat, 16, 8) [] = { 0x7c00, 0x7c00, -+ 0x7c00, 0x7c00, -+ 0x7c00, 0x7c00, -+ 0x7c00, 0x7c00 }; -+VECT_VAR_DECL(expected_minf, hfloat, 16, 8) [] = { 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00 }; -+VECT_VAR_DECL(expected_zero1, hfloat, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_zero2, hfloat, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+#endif - VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, - 0x7fc00000, 0x7fc00000 }; - VECT_VAR_DECL(expected_mnan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxh_f16_1.c -@@ -0,0 +1,34 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+#define A 123.4 -+#define B -567.8 -+#define C -34.8 -+#define D 1024 -+#define E 663.1 -+#define F 169.1 -+#define G -4.8 -+#define H 77 -+ -+float16_t input_1[] = { A, B, C, D }; -+float16_t input_2[] = { E, F, G, H }; -+float16_t expected[] = { E, F, G, D }; -+ -+#define TEST_MSG "VMAXH_F16" -+#define INSN_NAME vmaxh_f16 -+ -+#define INPUT_1 input_1 -+#define INPUT_2 input_2 -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "binary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxnm_1.c -@@ -0,0 +1,47 @@ -+/* This file tests an intrinsic which currently has only an f16 variant and that -+ is only available when FP16 arithmetic instructions are supported. */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+#define INSN_NAME vmaxnm -+#define TEST_MSG "VMAXNM/VMAXNMQ" -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+#define HAS_FLOAT16_VARIANT -+#endif -+ -+/* Expected results. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xcbc0, 0xcb80, 0xcb00, 0xca80 }; -+VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0xcb40, 0xcb40, 0xcb00, 0xca80, -+ 0xca00, 0xc980, 0xc900, 0xc880 }; -+#endif -+ -+/* Expected results with special FP values. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_nan, hfloat, 16, 8) [] = { 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00 }; -+VECT_VAR_DECL(expected_mnan, hfloat, 16, 8) [] = { 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00 }; -+VECT_VAR_DECL(expected_inf, hfloat, 16, 8) [] = { 0x7c00, 0x7c00, -+ 0x7c00, 0x7c00, -+ 0x7c00, 0x7c00, -+ 0x7c00, 0x7c00 }; -+VECT_VAR_DECL(expected_minf, hfloat, 16, 8) [] = { 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00 }; -+VECT_VAR_DECL(expected_zero1, hfloat, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_zero2, hfloat, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+#endif -+ -+#include "binary_op_float.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxnmh_f16_1.c -@@ -0,0 +1,42 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+#define INFF __builtin_inf () -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected[] = -+{ -+ 0x3c00 /* 1.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0x4000 /* 2.000000 */, -+ 0x5640 /* 100.000000 */, -+ 0x4f80 /* 30.000000 */, -+ 0x3666 /* 0.399902 */, -+ 0x3800 /* 0.500000 */, -+ 0x3d52 /* 1.330078 */, -+ 0xc64d /* -6.300781 */, -+ 0x4d00 /* 20.000000 */, -+ 0x355d /* 0.335205 */, -+ 0x409a /* 2.300781 */, -+ 0x3c00 /* 1.000000 */, -+ 0x4a91 /* 13.132812 */, -+ 0x34f6 /* 0.310059 */, -+ 0x4d00 /* 20.000000 */, -+ 0x7c00 /* inf */, -+ 0x7c00 /* inf */ -+}; -+ -+#define TEST_MSG "VMAXNMH_F16" -+#define INSN_NAME vmaxnmh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "binary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxnmv_f16_1.c -@@ -0,0 +1,131 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_neon } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+#define FP16_C(a) ((__fp16) a) -+#define A0 FP16_C (34.8) -+#define B0 FP16_C (__builtin_nanf ("")) -+#define C0 FP16_C (-__builtin_nanf ("")) -+#define D0 FP16_C (0.0) -+ -+#define A1 FP16_C (1025.8) -+#define B1 FP16_C (13.4) -+#define C1 FP16_C (__builtin_nanf ("")) -+#define D1 FP16_C (10) -+#define E1 FP16_C (-0.0) -+#define F1 FP16_C (-__builtin_nanf ("")) -+#define G1 FP16_C (0.0) -+#define H1 FP16_C (10) -+ -+/* Expected results for vmaxnmv. */ -+uint16_t expect = 0x505A /* A0. */; -+uint16_t expect_alt = 0x6402 /* A1. */; -+ -+void exec_vmaxnmv_f16 (void) -+{ -+#undef TEST_MSG -+#define TEST_MSG "VMAXNMV (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc, float, 16, 4); -+ VECT_VAR_DECL (buf_src, float, 16, 4) [] = {A0, B0, C0, D0}; -+ VLOAD (vsrc, buf_src, , float, f, 16, 4); -+ float16_t vector_res = vmaxnmv_f16 (VECT_VAR (vsrc, float, 16, 4)); -+ -+ if (* (uint16_t *) &vector_res != expect) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src1, float, 16, 4) [] = {B0, A0, C0, D0}; -+ VLOAD (vsrc, buf_src1, , float, f, 16, 4); -+ vector_res = vmaxnmv_f16 (VECT_VAR (vsrc, float, 16, 4)); -+ -+ if (* (uint16_t *) &vector_res != expect) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src2, float, 16, 4) [] = {B0, C0, A0, D0}; -+ VLOAD (vsrc, buf_src2, , float, f, 16, 4); -+ vector_res = vmaxnmv_f16 (VECT_VAR (vsrc, float, 16, 4)); -+ -+ if (* (uint16_t *) &vector_res != expect) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src3, float, 16, 4) [] = {B0, C0, D0, A0}; -+ VLOAD (vsrc, buf_src3, , float, f, 16, 4); -+ vector_res = vmaxnmv_f16 (VECT_VAR (vsrc, float, 16, 4)); -+ -+ if (* (uint16_t *) &vector_res != expect) -+ abort (); -+ -+#undef TEST_MSG -+#define TEST_MSG "VMAXNMVQ (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc, float, 16, 8); -+ VECT_VAR_DECL (buf_src, float, 16, 8) [] = {A1, B1, C1, D1, E1, F1, G1, H1}; -+ VLOAD (vsrc, buf_src, q, float, f, 16, 8); -+ vector_res = vmaxnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src1, float, 16, 8) [] = {B1, A1, C1, D1, E1, F1, G1, H1}; -+ VLOAD (vsrc, buf_src1, q, float, f, 16, 8); -+ vector_res = vmaxnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src2, float, 16, 8) [] = {B1, C1, A1, D1, E1, F1, G1, H1}; -+ VLOAD (vsrc, buf_src2, q, float, f, 16, 8); -+ vector_res = vmaxnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src3, float, 16, 8) [] = {B1, C1, D1, A1, E1, F1, G1, H1}; -+ VLOAD (vsrc, buf_src3, q, float, f, 16, 8); -+ vector_res = vmaxnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src4, float, 16, 8) [] = {B1, C1, D1, E1, A1, F1, G1, H1}; -+ VLOAD (vsrc, buf_src4, q, float, f, 16, 8); -+ vector_res = vmaxnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src5, float, 16, 8) [] = {B1, C1, D1, E1, F1, A1, G1, H1}; -+ VLOAD (vsrc, buf_src5, q, float, f, 16, 8); -+ vector_res = vmaxnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src6, float, 16, 8) [] = {B1, C1, D1, E1, F1, G1, A1, H1}; -+ VLOAD (vsrc, buf_src6, q, float, f, 16, 8); -+ vector_res = vmaxnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src7, float, 16, 8) [] = {B1, C1, D1, E1, F1, G1, H1, A1}; -+ VLOAD (vsrc, buf_src7, q, float, f, 16, 8); -+ vector_res = vmaxnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+} -+ -+int -+main (void) -+{ -+ exec_vmaxnmv_f16 (); -+ return 0; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmaxv_f16_1.c -@@ -0,0 +1,131 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_neon } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+#define FP16_C(a) ((__fp16) a) -+#define A0 FP16_C (123.4) -+#define B0 FP16_C (-567.8) -+#define C0 FP16_C (34.8) -+#define D0 FP16_C (0.0) -+ -+#define A1 FP16_C (1025.8) -+#define B1 FP16_C (13.4) -+#define C1 FP16_C (-567.8) -+#define D1 FP16_C (10) -+#define E1 FP16_C (-0.0) -+#define F1 FP16_C (567.8) -+#define G1 FP16_C (0.0) -+#define H1 FP16_C (10) -+ -+/* Expected results for vmaxv. */ -+uint16_t expect = 0x57B6 /* A0. */; -+uint16_t expect_alt = 0x6402 /* A1. */; -+ -+void exec_vmaxv_f16 (void) -+{ -+#undef TEST_MSG -+#define TEST_MSG "VMAXV (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc, float, 16, 4); -+ VECT_VAR_DECL (buf_src, float, 16, 4) [] = {A0, B0, C0, D0}; -+ VLOAD (vsrc, buf_src, , float, f, 16, 4); -+ float16_t vector_res = vmaxv_f16 (VECT_VAR (vsrc, float, 16, 4)); -+ -+ if (* (uint16_t *) &vector_res != expect) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src1, float, 16, 4) [] = {B0, A0, C0, D0}; -+ VLOAD (vsrc, buf_src1, , float, f, 16, 4); -+ vector_res = vmaxv_f16 (VECT_VAR (vsrc, float, 16, 4)); -+ -+ if (* (uint16_t *) &vector_res != expect) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src2, float, 16, 4) [] = {B0, C0, A0, D0}; -+ VLOAD (vsrc, buf_src2, , float, f, 16, 4); -+ vector_res = vmaxv_f16 (VECT_VAR (vsrc, float, 16, 4)); -+ -+ if (* (uint16_t *) &vector_res != expect) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src3, float, 16, 4) [] = {B0, C0, D0, A0}; -+ VLOAD (vsrc, buf_src3, , float, f, 16, 4); -+ vector_res = vmaxv_f16 (VECT_VAR (vsrc, float, 16, 4)); -+ -+ if (* (uint16_t *) &vector_res != expect) -+ abort (); -+ -+#undef TEST_MSG -+#define TEST_MSG "VMAXVQ (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc, float, 16, 8); -+ VECT_VAR_DECL (buf_src, float, 16, 8) [] = {A1, B1, C1, D1, E1, F1, G1, H1}; -+ VLOAD (vsrc, buf_src, q, float, f, 16, 8); -+ vector_res = vmaxvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src1, float, 16, 8) [] = {B1, A1, C1, D1, E1, F1, G1, H1}; -+ VLOAD (vsrc, buf_src1, q, float, f, 16, 8); -+ vector_res = vmaxvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src2, float, 16, 8) [] = {B1, C1, A1, D1, E1, F1, G1, H1}; -+ VLOAD (vsrc, buf_src2, q, float, f, 16, 8); -+ vector_res = vmaxvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src3, float, 16, 8) [] = {B1, C1, D1, A1, E1, F1, G1, H1}; -+ VLOAD (vsrc, buf_src3, q, float, f, 16, 8); -+ vector_res = vmaxvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src4, float, 16, 8) [] = {B1, C1, D1, E1, A1, F1, G1, H1}; -+ VLOAD (vsrc, buf_src4, q, float, f, 16, 8); -+ vector_res = vmaxvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src5, float, 16, 8) [] = {B1, C1, D1, E1, F1, A1, G1, H1}; -+ VLOAD (vsrc, buf_src5, q, float, f, 16, 8); -+ vector_res = vmaxvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src6, float, 16, 8) [] = {B1, C1, D1, E1, F1, G1, A1, H1}; -+ VLOAD (vsrc, buf_src6, q, float, f, 16, 8); -+ vector_res = vmaxvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src7, float, 16, 8) [] = {B1, C1, D1, E1, F1, G1, H1, A1}; -+ VLOAD (vsrc, buf_src7, q, float, f, 16, 8); -+ vector_res = vmaxvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+} -+ -+int -+main (void) -+{ -+ exec_vmaxv_f16 (); -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmin.c -@@ -7,6 +7,10 @@ - - #define HAS_FLOAT_VARIANT - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+#define HAS_FLOAT16_VARIANT -+#endif -+ - /* Expected results. */ - VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, - 0xf3, 0xf3, 0xf3, 0xf3 }; -@@ -16,6 +20,9 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, - 0xf3, 0xf3, 0xf3, 0xf3 }; - VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff1, 0xfff1 }; - VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xcc00, 0xcbc0, 0xcbc0, 0xcbc0 }; -+#endif - VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1780000 }; - VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, - 0xf4, 0xf4, 0xf4, 0xf4, -@@ -31,11 +38,41 @@ VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, - 0xf9, 0xf9, 0xf9, 0xf9 }; - VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff2, - 0xfff2, 0xfff2, 0xfff2, 0xfff2 }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, 0xcb40, 0xcb40, -+ 0xcb40, 0xcb40, 0xcb40, 0xcb40 }; -+#endif - VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, - 0xfffffff1, 0xfffffff1 }; - VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, - 0xc1680000, 0xc1680000 }; - /* Expected results with special FP values. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_nan, hfloat, 16, 8) [] = { 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00 }; -+VECT_VAR_DECL(expected_mnan, hfloat, 16, 8) [] = { 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00 }; -+VECT_VAR_DECL(expected_inf, hfloat, 16, 8) [] = { 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00 }; -+VECT_VAR_DECL(expected_minf, hfloat, 16, 8) [] = { 0xfc00, 0xfc00, -+ 0xfc00, 0xfc00, -+ 0xfc00, 0xfc00, -+ 0xfc00, 0xfc00 }; -+VECT_VAR_DECL(expected_zero1, hfloat, 16, 8) [] = { 0x8000, 0x8000, -+ 0x8000, 0x8000, -+ 0x8000, 0x8000, -+ 0x8000, 0x8000 }; -+VECT_VAR_DECL(expected_zero2, hfloat, 16, 8) [] = { 0x8000, 0x8000, -+ 0x8000, 0x8000, -+ 0x8000, 0x8000, -+ 0x8000, 0x8000 }; -+#endif - VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, - 0x7fc00000, 0x7fc00000 }; - VECT_VAR_DECL(expected_mnan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminh_f16_1.c -@@ -0,0 +1,34 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+#define A 123.4 -+#define B -567.8 -+#define C -34.8 -+#define D 1024 -+#define E 663.1 -+#define F 169.1 -+#define G -4.8 -+#define H 77 -+ -+float16_t input_1[] = { A, B, C, D }; -+float16_t input_2[] = { E, F, G, H }; -+float16_t expected[] = { A, B, C, H }; -+ -+#define TEST_MSG "VMINH_F16" -+#define INSN_NAME vminh_f16 -+ -+#define INPUT_1 input_1 -+#define INPUT_2 input_2 -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "binary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminnm_1.c -@@ -0,0 +1,51 @@ -+/* This file tests an intrinsic which currently has only an f16 variant and that -+ is only available when FP16 arithmetic instructions are supported. */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+#define INSN_NAME vminnm -+#define TEST_MSG "VMINNM/VMINMQ" -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+#define HAS_FLOAT16_VARIANT -+#endif -+ -+/* Expected results. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xcc00, 0xcbc0, 0xcbc0, 0xcbc0 }; -+VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, 0xcb40, 0xcb40, -+ 0xcb40, 0xcb40, 0xcb40, 0xcb40 }; -+#endif -+ -+/* Expected results with special FP values. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_nan, hfloat, 16, 8) [] = { 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00 }; -+VECT_VAR_DECL(expected_mnan, hfloat, 16, 8) [] = { 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00 }; -+VECT_VAR_DECL(expected_inf, hfloat, 16, 8) [] = { 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00, -+ 0x3c00, 0x3c00 }; -+VECT_VAR_DECL(expected_minf, hfloat, 16, 8) [] = { 0xfc00, 0xfc00, -+ 0xfc00, 0xfc00, -+ 0xfc00, 0xfc00, -+ 0xfc00, 0xfc00 }; -+VECT_VAR_DECL(expected_zero1, hfloat, 16, 8) [] = { 0x8000, 0x8000, -+ 0x8000, 0x8000, -+ 0x8000, 0x8000, -+ 0x8000, 0x8000 }; -+VECT_VAR_DECL(expected_zero2, hfloat, 16, 8) [] = { 0x8000, 0x8000, -+ 0x8000, 0x8000, -+ 0x8000, 0x8000, -+ 0x8000, 0x8000 }; -+#endif -+ -+#include "binary_op_float.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminnmh_f16_1.c -@@ -0,0 +1,42 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+#define INFF __builtin_inf () -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected[] = -+{ -+ 0x0000 /* 0.000000 */, -+ 0x8000 /* -0.000000 */, -+ 0xc454 /* -4.328125 */, -+ 0x4233 /* 3.099609 */, -+ 0x4d00 /* 20.000000 */, -+ 0xa51f /* -0.020004 */, -+ 0xc09a /* -2.300781 */, -+ 0xc73b /* -7.230469 */, -+ 0xc79a /* -7.601562 */, -+ 0x34f6 /* 0.310059 */, -+ 0xc73b /* -7.230469 */, -+ 0x3800 /* 0.500000 */, -+ 0xc79a /* -7.601562 */, -+ 0x451a /* 5.101562 */, -+ 0xc64d /* -6.300781 */, -+ 0x3556 /* 0.333496 */, -+ 0xfc00 /* -inf */, -+ 0xfc00 /* -inf */ -+}; -+ -+#define TEST_MSG "VMINNMH_F16" -+#define INSN_NAME vminnmh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "binary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminnmv_f16_1.c -@@ -0,0 +1,131 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_neon } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+#define FP16_C(a) ((__fp16) a) -+#define A0 FP16_C (-567.8) -+#define B0 FP16_C (__builtin_nanf ("")) -+#define C0 FP16_C (34.8) -+#define D0 FP16_C (-__builtin_nanf ("")) -+ -+#define A1 FP16_C (-567.8) -+#define B1 FP16_C (1025.8) -+#define C1 FP16_C (-__builtin_nanf ("")) -+#define D1 FP16_C (10) -+#define E1 FP16_C (-0.0) -+#define F1 FP16_C (__builtin_nanf ("")) -+#define G1 FP16_C (0.0) -+#define H1 FP16_C (10) -+ -+/* Expected results for vminnmv. */ -+uint16_t expect = 0xE070 /* A0. */; -+uint16_t expect_alt = 0xE070 /* A1. */; -+ -+void exec_vminnmv_f16 (void) -+{ -+#undef TEST_MSG -+#define TEST_MSG "VMINNMV (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc, float, 16, 4); -+ VECT_VAR_DECL (buf_src, float, 16, 4) [] = {A0, B0, C0, D0}; -+ VLOAD (vsrc, buf_src, , float, f, 16, 4); -+ float16_t vector_res = vminnmv_f16 (VECT_VAR (vsrc, float, 16, 4)); -+ -+ if (* (uint16_t *) &vector_res != expect) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src1, float, 16, 4) [] = {B0, A0, C0, D0}; -+ VLOAD (vsrc, buf_src1, , float, f, 16, 4); -+ vector_res = vminnmv_f16 (VECT_VAR (vsrc, float, 16, 4)); -+ -+ if (* (uint16_t *) &vector_res != expect) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src2, float, 16, 4) [] = {B0, C0, A0, D0}; -+ VLOAD (vsrc, buf_src2, , float, f, 16, 4); -+ vector_res = vminnmv_f16 (VECT_VAR (vsrc, float, 16, 4)); -+ -+ if (* (uint16_t *) &vector_res != expect) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src3, float, 16, 4) [] = {B0, C0, D0, A0}; -+ VLOAD (vsrc, buf_src3, , float, f, 16, 4); -+ vector_res = vminnmv_f16 (VECT_VAR (vsrc, float, 16, 4)); -+ -+ if (* (uint16_t *) &vector_res != expect) -+ abort (); -+ -+#undef TEST_MSG -+#define TEST_MSG "VMINNMVQ (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc, float, 16, 8); -+ VECT_VAR_DECL (buf_src, float, 16, 8) [] = {A1, B1, C1, D1, E1, F1, G1, H1}; -+ VLOAD (vsrc, buf_src, q, float, f, 16, 8); -+ vector_res = vminnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src1, float, 16, 8) [] = {B1, A1, C1, D1, E1, F1, G1, H1}; -+ VLOAD (vsrc, buf_src1, q, float, f, 16, 8); -+ vector_res = vminnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src2, float, 16, 8) [] = {B1, C1, A1, D1, E1, F1, G1, H1}; -+ VLOAD (vsrc, buf_src2, q, float, f, 16, 8); -+ vector_res = vminnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src3, float, 16, 8) [] = {B1, C1, D1, A1, E1, F1, G1, H1}; -+ VLOAD (vsrc, buf_src3, q, float, f, 16, 8); -+ vector_res = vminnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src4, float, 16, 8) [] = {B1, C1, D1, E1, A1, F1, G1, H1}; -+ VLOAD (vsrc, buf_src4, q, float, f, 16, 8); -+ vector_res = vminnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src5, float, 16, 8) [] = {B1, C1, D1, E1, F1, A1, G1, H1}; -+ VLOAD (vsrc, buf_src5, q, float, f, 16, 8); -+ vector_res = vminnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src6, float, 16, 8) [] = {B1, C1, D1, E1, F1, G1, A1, H1}; -+ VLOAD (vsrc, buf_src6, q, float, f, 16, 8); -+ vector_res = vminnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src7, float, 16, 8) [] = {B1, C1, D1, E1, F1, G1, H1, A1}; -+ VLOAD (vsrc, buf_src7, q, float, f, 16, 8); -+ vector_res = vminnmvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+} -+ -+int -+main (void) -+{ -+ exec_vminnmv_f16 (); -+ return 0; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vminv_f16_1.c -@@ -0,0 +1,131 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_neon } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+#define FP16_C(a) ((__fp16) a) -+#define A0 FP16_C (-567.8) -+#define B0 FP16_C (123.4) -+#define C0 FP16_C (34.8) -+#define D0 FP16_C (0.0) -+ -+#define A1 FP16_C (-567.8) -+#define B1 FP16_C (1025.8) -+#define C1 FP16_C (13.4) -+#define D1 FP16_C (10) -+#define E1 FP16_C (-0.0) -+#define F1 FP16_C (567.8) -+#define G1 FP16_C (0.0) -+#define H1 FP16_C (10) -+ -+/* Expected results for vminv. */ -+uint16_t expect = 0xE070 /* A0. */; -+uint16_t expect_alt = 0xE070 /* A1. */; -+ -+void exec_vminv_f16 (void) -+{ -+#undef TEST_MSG -+#define TEST_MSG "VMINV (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc, float, 16, 4); -+ VECT_VAR_DECL (buf_src, float, 16, 4) [] = {A0, B0, C0, D0}; -+ VLOAD (vsrc, buf_src, , float, f, 16, 4); -+ float16_t vector_res = vminv_f16 (VECT_VAR (vsrc, float, 16, 4)); -+ -+ if (* (uint16_t *) &vector_res != expect) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src1, float, 16, 4) [] = {B0, A0, C0, D0}; -+ VLOAD (vsrc, buf_src1, , float, f, 16, 4); -+ vector_res = vminv_f16 (VECT_VAR (vsrc, float, 16, 4)); -+ -+ if (* (uint16_t *) &vector_res != expect) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src2, float, 16, 4) [] = {B0, C0, A0, D0}; -+ VLOAD (vsrc, buf_src2, , float, f, 16, 4); -+ vector_res = vminv_f16 (VECT_VAR (vsrc, float, 16, 4)); -+ -+ if (* (uint16_t *) &vector_res != expect) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src3, float, 16, 4) [] = {B0, C0, D0, A0}; -+ VLOAD (vsrc, buf_src3, , float, f, 16, 4); -+ vector_res = vminv_f16 (VECT_VAR (vsrc, float, 16, 4)); -+ -+ if (* (uint16_t *) &vector_res != expect) -+ abort (); -+ -+#undef TEST_MSG -+#define TEST_MSG "VMINVQ (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc, float, 16, 8); -+ VECT_VAR_DECL (buf_src, float, 16, 8) [] = {A1, B1, C1, D1, E1, F1, G1, H1}; -+ VLOAD (vsrc, buf_src, q, float, f, 16, 8); -+ vector_res = vminvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src1, float, 16, 8) [] = {B1, A1, C1, D1, E1, F1, G1, H1}; -+ VLOAD (vsrc, buf_src1, q, float, f, 16, 8); -+ vector_res = vminvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src2, float, 16, 8) [] = {B1, C1, A1, D1, E1, F1, G1, H1}; -+ VLOAD (vsrc, buf_src2, q, float, f, 16, 8); -+ vector_res = vminvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src3, float, 16, 8) [] = {B1, C1, D1, A1, E1, F1, G1, H1}; -+ VLOAD (vsrc, buf_src3, q, float, f, 16, 8); -+ vector_res = vminvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src4, float, 16, 8) [] = {B1, C1, D1, E1, A1, F1, G1, H1}; -+ VLOAD (vsrc, buf_src4, q, float, f, 16, 8); -+ vector_res = vminvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src5, float, 16, 8) [] = {B1, C1, D1, E1, F1, A1, G1, H1}; -+ VLOAD (vsrc, buf_src5, q, float, f, 16, 8); -+ vector_res = vminvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src6, float, 16, 8) [] = {B1, C1, D1, E1, F1, G1, A1, H1}; -+ VLOAD (vsrc, buf_src6, q, float, f, 16, 8); -+ vector_res = vminvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+ -+ VECT_VAR_DECL (buf_src7, float, 16, 8) [] = {B1, C1, D1, E1, F1, G1, H1, A1}; -+ VLOAD (vsrc, buf_src7, q, float, f, 16, 8); -+ vector_res = vminvq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ -+ if (* (uint16_t *) &vector_res != expect_alt) -+ abort (); -+} -+ -+int -+main (void) -+{ -+ exec_vminv_f16 (); -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmovn.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmovn.c -@@ -35,11 +35,11 @@ void exec_vmovn (void) - TEST_VMOVN(uint, u, 32, 16, 4); - TEST_VMOVN(uint, u, 64, 32, 2); - -- CHECK(TEST_MSG, int, 8, 8, PRIx32, expected, ""); -- CHECK(TEST_MSG, int, 16, 4, PRIx64, expected, ""); -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); - CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); -- CHECK(TEST_MSG, uint, 8, 8, PRIx32, expected, ""); -- CHECK(TEST_MSG, uint, 16, 4, PRIx64, expected, ""); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, ""); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); - } - ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul.c -@@ -13,6 +13,10 @@ VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfab0, 0xfb05, 0xfb5a, 0xfbaf }; - VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffff9a0, 0xfffffa06 }; - VECT_VAR_DECL(expected,poly,8,8) [] = { 0xc0, 0x84, 0x48, 0xc, - 0xd0, 0x94, 0x58, 0x1c }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xe02a, 0xdfcf, -+ 0xdf4a, 0xdec4 }; -+#endif - VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc4053333, 0xc3f9c000 }; - VECT_VAR_DECL(expected,int,8,16) [] = { 0x90, 0x7, 0x7e, 0xf5, - 0x6c, 0xe3, 0x5a, 0xd1, -@@ -34,13 +38,15 @@ VECT_VAR_DECL(expected,poly,8,16) [] = { 0x60, 0xca, 0x34, 0x9e, - 0xc8, 0x62, 0x9c, 0x36, - 0x30, 0x9a, 0x64, 0xce, - 0x98, 0x32, 0xcc, 0x66 }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0xe63a, 0xe5d6, 0xe573, 0xe50f, -+ 0xe4ac, 0xe448, 0xe3c8, 0xe301 }; -+#endif - VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc4c73333, 0xc4bac000, - 0xc4ae4ccd, 0xc4a1d999 }; - --#ifndef INSN_NAME - #define INSN_NAME vmul - #define TEST_MSG "VMUL" --#endif - - #define FNNAME1(NAME) exec_ ## NAME - #define FNNAME(NAME) FNNAME1(NAME) -@@ -80,6 +86,17 @@ void FNNAME (INSN_NAME) (void) - DECL_VMUL(poly, 8, 16); - DECL_VMUL(float, 32, 4); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector1, float, 16, 4); -+ DECL_VARIABLE(vector1, float, 16, 8); -+ -+ DECL_VARIABLE(vector2, float, 16, 4); -+ DECL_VARIABLE(vector2, float, 16, 8); -+ -+ DECL_VARIABLE(vector_res, float, 16, 4); -+ DECL_VARIABLE(vector_res, float, 16, 8); -+#endif -+ - clean_results (); - - /* Initialize input "vector1" from "buffer". */ -@@ -99,6 +116,10 @@ void FNNAME (INSN_NAME) (void) - VLOAD(vector1, buffer, q, uint, u, 32, 4); - VLOAD(vector1, buffer, q, poly, p, 8, 16); - VLOAD(vector1, buffer, q, float, f, 32, 4); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VLOAD(vector1, buffer, , float, f, 16, 4); -+ VLOAD(vector1, buffer, q, float, f, 16, 8); -+#endif - - /* Choose init value arbitrarily. */ - VDUP(vector2, , int, s, 8, 8, 0x11); -@@ -117,6 +138,10 @@ void FNNAME (INSN_NAME) (void) - VDUP(vector2, q, uint, u, 32, 4, 0xCC); - VDUP(vector2, q, poly, p, 8, 16, 0xAA); - VDUP(vector2, q, float, f, 32, 4, 99.6f); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector2, , float, f, 16, 4, 33.3f); -+ VDUP(vector2, q, float, f, 16, 8, 99.6f); -+#endif - - /* Execute the tests. */ - TEST_VMUL(INSN_NAME, , int, s, 8, 8); -@@ -135,6 +160,10 @@ void FNNAME (INSN_NAME) (void) - TEST_VMUL(INSN_NAME, q, uint, u, 32, 4); - TEST_VMUL(INSN_NAME, q, poly, p, 8, 16); - TEST_VMUL(INSN_NAME, q, float, f, 32, 4); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VMUL(INSN_NAME, , float, f, 16, 4); -+ TEST_VMUL(INSN_NAME, q, float, f, 16, 8); -+#endif - - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); - CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); -@@ -142,7 +171,7 @@ void FNNAME (INSN_NAME) (void) - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, ""); - CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected, ""); - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ""); - CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, ""); - CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); -@@ -150,8 +179,12 @@ void FNNAME (INSN_NAME) (void) - CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, ""); - CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); -- CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 16, PRIx8, expected, ""); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, ""); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, ""); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, ""); -+#endif - } - - int main (void) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_lane.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_lane.c -@@ -7,6 +7,9 @@ VECT_VAR_DECL(expected,int,16,4) [] = { 0xffc0, 0xffc4, 0xffc8, 0xffcc }; - VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffde0, 0xfffffe02 }; - VECT_VAR_DECL(expected,uint,16,4) [] = { 0xbbc0, 0xc004, 0xc448, 0xc88c }; - VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffface0, 0xffffb212 }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xddb3, 0xdd58, 0xdcfd, 0xdca1 }; -+#endif - VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc3b66666, 0xc3ab0000 }; - VECT_VAR_DECL(expected,int,16,8) [] = { 0xffc0, 0xffc4, 0xffc8, 0xffcc, - 0xffd0, 0xffd4, 0xffd8, 0xffdc }; -@@ -16,6 +19,10 @@ VECT_VAR_DECL(expected,uint,16,8) [] = { 0xbbc0, 0xc004, 0xc448, 0xc88c, - 0xccd0, 0xd114, 0xd558, 0xd99c }; - VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffface0, 0xffffb212, - 0xffffb744, 0xffffbc76 }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0xddb3, 0xdd58, 0xdcfd, 0xdca1, -+ 0xdc46, 0xdbd6, 0xdb20, 0xda69 }; -+#endif - VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc3b66666, 0xc3ab0000, - 0xc39f9999, 0xc3943333 }; - -@@ -45,11 +52,20 @@ void exec_vmul_lane (void) - - DECL_VMUL(vector); - DECL_VMUL(vector_res); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector, float, 16, 4); -+ DECL_VARIABLE(vector, float, 16, 8); -+ DECL_VARIABLE(vector_res, float, 16, 4); -+ DECL_VARIABLE(vector_res, float, 16, 8); -+#endif - - DECL_VARIABLE(vector2, int, 16, 4); - DECL_VARIABLE(vector2, int, 32, 2); - DECL_VARIABLE(vector2, uint, 16, 4); - DECL_VARIABLE(vector2, uint, 32, 2); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector2, float, 16, 4); -+#endif - DECL_VARIABLE(vector2, float, 32, 2); - - clean_results (); -@@ -59,11 +75,17 @@ void exec_vmul_lane (void) - VLOAD(vector, buffer, , int, s, 32, 2); - VLOAD(vector, buffer, , uint, u, 16, 4); - VLOAD(vector, buffer, , uint, u, 32, 2); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VLOAD(vector, buffer, , float, f, 16, 4); -+#endif - VLOAD(vector, buffer, , float, f, 32, 2); - VLOAD(vector, buffer, q, int, s, 16, 8); - VLOAD(vector, buffer, q, int, s, 32, 4); - VLOAD(vector, buffer, q, uint, u, 16, 8); - VLOAD(vector, buffer, q, uint, u, 32, 4); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VLOAD(vector, buffer, q, float, f, 16, 8); -+#endif - VLOAD(vector, buffer, q, float, f, 32, 4); - - /* Initialize vector2. */ -@@ -71,6 +93,9 @@ void exec_vmul_lane (void) - VDUP(vector2, , int, s, 32, 2, 0x22); - VDUP(vector2, , uint, u, 16, 4, 0x444); - VDUP(vector2, , uint, u, 32, 2, 0x532); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector2, , float, f, 16, 4, 22.8f); -+#endif - VDUP(vector2, , float, f, 32, 2, 22.8f); - - /* Choose lane arbitrarily. */ -@@ -78,22 +103,34 @@ void exec_vmul_lane (void) - TEST_VMUL_LANE(, int, s, 32, 2, 2, 1); - TEST_VMUL_LANE(, uint, u, 16, 4, 4, 2); - TEST_VMUL_LANE(, uint, u, 32, 2, 2, 1); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VMUL_LANE(, float, f, 16, 4, 4, 1); -+#endif - TEST_VMUL_LANE(, float, f, 32, 2, 2, 1); - TEST_VMUL_LANE(q, int, s, 16, 8, 4, 2); - TEST_VMUL_LANE(q, int, s, 32, 4, 2, 0); - TEST_VMUL_LANE(q, uint, u, 16, 8, 4, 2); - TEST_VMUL_LANE(q, uint, u, 32, 4, 2, 1); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VMUL_LANE(q, float, f, 16, 8, 4, 0); -+#endif - TEST_VMUL_LANE(q, float, f, 32, 4, 2, 0); - -- CHECK(TEST_MSG, int, 16, 4, PRIx64, expected, ""); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); - CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); -- CHECK(TEST_MSG, uint, 16, 4, PRIx64, expected, ""); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, ""); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ""); -- CHECK(TEST_MSG, int, 16, 8, PRIx64, expected, ""); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); - CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); -- CHECK(TEST_MSG, uint, 16, 8, PRIx64, expected, ""); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, ""); -+#endif - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, ""); - } - ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_lane_f16_1.c -@@ -0,0 +1,454 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_neon } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+#define FP16_C(a) ((__fp16) a) -+#define A FP16_C (13.4) -+#define B FP16_C (-56.8) -+#define C FP16_C (-34.8) -+#define D FP16_C (12) -+#define E FP16_C (63.1) -+#define F FP16_C (19.1) -+#define G FP16_C (-4.8) -+#define H FP16_C (77) -+ -+#define I FP16_C (0.7) -+#define J FP16_C (-78) -+#define K FP16_C (11.23) -+#define L FP16_C (98) -+#define M FP16_C (87.1) -+#define N FP16_C (-8) -+#define O FP16_C (-1.1) -+#define P FP16_C (-9.7) -+ -+/* Expected results for vmul_lane. */ -+VECT_VAR_DECL (expected0_static, hfloat, 16, 4) [] -+ = { 0x629B /* A * E. */, -+ 0xEB00 /* B * E. */, -+ 0xE84A /* C * E. */, -+ 0x61EA /* D * E. */ }; -+ -+VECT_VAR_DECL (expected1_static, hfloat, 16, 4) [] -+ = { 0x5BFF /* A * F. */, -+ 0xE43D /* B * F. */, -+ 0xE131 /* C * F. */, -+ 0x5B29 /* D * F. */ }; -+ -+VECT_VAR_DECL (expected2_static, hfloat, 16, 4) [] -+ = { 0xD405 /* A * G. */, -+ 0x5C43 /* B * G. */, -+ 0x5939 /* C * G. */, -+ 0xD334 /* D * G. */ }; -+ -+VECT_VAR_DECL (expected3_static, hfloat, 16, 4) [] -+ = { 0x6408 /* A * H. */, -+ 0xEC46 /* B * H. */, -+ 0xE93C /* C * H. */, -+ 0x6338 /* D * H. */ }; -+ -+/* Expected results for vmulq_lane. */ -+VECT_VAR_DECL (expected0_static, hfloat, 16, 8) [] -+ = { 0x629B /* A * E. */, -+ 0xEB00 /* B * E. */, -+ 0xE84A /* C * E. */, -+ 0x61EA /* D * E. */, -+ 0x5186 /* I * E. */, -+ 0xECCE /* J * E. */, -+ 0x6189 /* K * E. */, -+ 0x6E0A /* L * E. */ }; -+ -+VECT_VAR_DECL (expected1_static, hfloat, 16, 8) [] -+ = { 0x5BFF /* A * F. */, -+ 0xE43D /* B * F. */, -+ 0xE131 /* C * F. */, -+ 0x5B29 /* D * F. */, -+ 0x4AAF /* I * F. */, -+ 0xE5D1 /* J * F. */, -+ 0x5AB3 /* K * F. */, -+ 0x674F /* L * F. */ }; -+ -+VECT_VAR_DECL (expected2_static, hfloat, 16, 8) [] -+ = { 0xD405 /* A * G. */, -+ 0x5C43 /* B * G. */, -+ 0x5939 /* C * G. */, -+ 0xD334 /* D * G. */, -+ 0xC2B9 /* I * G. */, -+ 0x5DDA /* J * G. */, -+ 0xD2BD /* K * G. */, -+ 0xDF5A /* L * G. */ }; -+ -+VECT_VAR_DECL (expected3_static, hfloat, 16, 8) [] -+ = { 0x6408 /* A * H. */, -+ 0xEC46 /* B * H. */, -+ 0xE93C /* C * H. */, -+ 0x6338 /* D * H. */, -+ 0x52BD /* I * H. */, -+ 0xEDDE /* J * H. */, -+ 0x62C1 /* K * H. */, -+ 0x6F5E /* L * H. */ }; -+ -+/* Expected results for vmul_laneq. */ -+VECT_VAR_DECL (expected_laneq0_static, hfloat, 16, 4) [] -+ = { 0x629B /* A * E. */, -+ 0xEB00 /* B * E. */, -+ 0xE84A /* C * E. */, -+ 0x61EA /* D * E. */ }; -+ -+VECT_VAR_DECL (expected_laneq1_static, hfloat, 16, 4) [] -+ = { 0x5BFF /* A * F. */, -+ 0xE43D /* B * F. */, -+ 0xE131 /* C * F. */, -+ 0x5B29 /* D * F. */ }; -+ -+VECT_VAR_DECL (expected_laneq2_static, hfloat, 16, 4) [] -+ = { 0xD405 /* A * G. */, -+ 0x5C43 /* B * G. */, -+ 0x5939 /* C * G. */, -+ 0xD334 /* D * G. */ }; -+ -+VECT_VAR_DECL (expected_laneq3_static, hfloat, 16, 4) [] -+ = { 0x6408 /* A * H. */, -+ 0xEC46 /* B * H. */, -+ 0xE93C /* C * H. */, -+ 0x6338 /* D * H. */ }; -+ -+VECT_VAR_DECL (expected_laneq4_static, hfloat, 16, 4) [] -+ = { 0x648F /* A * M. */, -+ 0xECD5 /* B * M. */, -+ 0xE9ED /* C * M. */, -+ 0x6416 /* D * M. */ }; -+ -+VECT_VAR_DECL (expected_laneq5_static, hfloat, 16, 4) [] -+ = { 0xD6B3 /* A * N. */, -+ 0x5F1A /* B * N. */, -+ 0x5C5A /* C * N. */, -+ 0xD600 /* D * N. */ }; -+ -+VECT_VAR_DECL (expected_laneq6_static, hfloat, 16, 4) [] -+ = { 0xCB5E /* A * O. */, -+ 0x53CF /* B * O. */, -+ 0x50C9 /* C * O. */, -+ 0xCA99 /* D * O. */ }; -+ -+VECT_VAR_DECL (expected_laneq7_static, hfloat, 16, 4) [] -+ = { 0xD810 /* A * P. */, -+ 0x604F /* B * P. */, -+ 0x5D47 /* C * P. */, -+ 0xD747 /* D * P. */ }; -+ -+/* Expected results for vmulq_laneq. */ -+VECT_VAR_DECL (expected_laneq0_static, hfloat, 16, 8) [] -+ = { 0x629B /* A * E. */, -+ 0xEB00 /* B * E. */, -+ 0xE84A /* C * E. */, -+ 0x61EA /* D * E. */, -+ 0x5186 /* I * E. */, -+ 0xECCE /* J * E. */, -+ 0x6189 /* K * E. */, -+ 0x6E0A /* L * E. */ }; -+ -+VECT_VAR_DECL (expected_laneq1_static, hfloat, 16, 8) [] -+ = { 0x5BFF /* A * F. */, -+ 0xE43D /* B * F. */, -+ 0xE131 /* C * F. */, -+ 0x5B29 /* D * F. */, -+ 0x4AAF /* I * F. */, -+ 0xE5D1 /* J * F. */, -+ 0x5AB3 /* K * F. */, -+ 0x674F /* L * F. */ }; -+ -+VECT_VAR_DECL (expected_laneq2_static, hfloat, 16, 8) [] -+ = { 0xD405 /* A * G. */, -+ 0x5C43 /* B * G. */, -+ 0x5939 /* C * G. */, -+ 0xD334 /* D * G. */, -+ 0xC2B9 /* I * G. */, -+ 0x5DDA /* J * G. */, -+ 0xD2BD /* K * G. */, -+ 0xDF5A /* L * G. */ }; -+ -+VECT_VAR_DECL (expected_laneq3_static, hfloat, 16, 8) [] -+ = { 0x6408 /* A * H. */, -+ 0xEC46 /* B * H. */, -+ 0xE93C /* C * H. */, -+ 0x6338 /* D * H. */, -+ 0x52BD /* I * H. */, -+ 0xEDDE /* J * H. */, -+ 0x62C1 /* K * H. */, -+ 0x6F5E /* L * H. */ }; -+ -+VECT_VAR_DECL (expected_laneq4_static, hfloat, 16, 8) [] -+ = { 0x648F /* A * M. */, -+ 0xECD5 /* B * M. */, -+ 0xE9ED /* C * M. */, -+ 0x6416 /* D * M. */, -+ 0x53A0 /* I * M. */, -+ 0xEEA3 /* J * M. */, -+ 0x63A4 /* K * M. */, -+ 0x702B /* L * M. */ }; -+ -+VECT_VAR_DECL (expected_laneq5_static, hfloat, 16, 8) [] -+ = { 0xD6B3 /* A * N. */, -+ 0x5F1A /* B * N. */, -+ 0x5C5A /* C * N. */, -+ 0xD600 /* D * N. */, -+ 0xC59A /* I * N. */, -+ 0x60E0 /* J * N. */, -+ 0xD59D /* K * N. */, -+ 0xE220 /* L * N. */ }; -+ -+VECT_VAR_DECL (expected_laneq6_static, hfloat, 16, 8) [] -+ = { 0xCB5E /* A * O. */, -+ 0x53CF /* B * O. */, -+ 0x50C9 /* C * O. */, -+ 0xCA99 /* D * O. */, -+ 0xBA29 /* I * O. */, -+ 0x555C /* J * O. */, -+ 0xCA2C /* K * O. */, -+ 0xD6BC /* L * O. */ }; -+ -+VECT_VAR_DECL (expected_laneq7_static, hfloat, 16, 8) [] -+ = { 0xD810 /* A * P. */, -+ 0x604F /* B * P. */, -+ 0x5D47 /* C * P. */, -+ 0xD747 /* D * P. */, -+ 0xC6CB /* I * P. */, -+ 0x61EA /* J * P. */, -+ 0xD6CF /* K * P. */, -+ 0xE36E /* L * P. */ }; -+ -+void exec_vmul_lane_f16 (void) -+{ -+#undef TEST_MSG -+#define TEST_MSG "VMUL_LANE (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc_1, float, 16, 4); -+ DECL_VARIABLE(vsrc_2, float, 16, 4); -+ VECT_VAR_DECL (buf_src_1, float, 16, 4) [] = {A, B, C, D}; -+ VECT_VAR_DECL (buf_src_2, float, 16, 4) [] = {E, F, G, H}; -+ VLOAD (vsrc_1, buf_src_1, , float, f, 16, 4); -+ VLOAD (vsrc_2, buf_src_2, , float, f, 16, 4); -+ DECL_VARIABLE (vector_res, float, 16, 4) -+ = vmul_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), 0); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected0_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmul_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), 1); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected1_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmul_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), 2); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected2_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmul_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), 3); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected3_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VMULQ_LANE (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc_1, float, 16, 8); -+ VECT_VAR_DECL (buf_src_1, float, 16, 8) [] = {A, B, C, D, I, J, K, L}; -+ VLOAD (vsrc_1, buf_src_1, q, float, f, 16, 8); -+ DECL_VARIABLE (vector_res, float, 16, 8) -+ = vmulq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 4), 0); -+ -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected0_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 4), 1); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected1_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 4), 2); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected2_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 4), 3); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected3_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VMUL_LANEQ (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc_2, float, 16, 8); -+ VECT_VAR_DECL (buf_src_2, float, 16, 8) [] = {E, F, G, H, M, N, O, P}; -+ VLOAD (vsrc_2, buf_src_2, q, float, f, 16, 8); -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 8), 0); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq0_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 8), 1); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq1_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 8), 2); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq2_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 8), 3); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq3_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 8), 4); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq4_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 8), 5); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq5_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 8), 6); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq6_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmul_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 8), 7); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq7_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VMULQ_LANEQ (FP16)" -+ clean_results (); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), 0); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq0_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), 1); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq1_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), 2); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq2_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), 3); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq3_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), 4); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq4_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), 5); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq5_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), 6); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq6_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), 7); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq7_static, ""); -+} -+ -+int -+main (void) -+{ -+ exec_vmul_lane_f16 (); -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_n.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmul_n.c -@@ -7,6 +7,9 @@ VECT_VAR_DECL(expected,int,16,4) [] = { 0xfef0, 0xff01, 0xff12, 0xff23 }; - VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffde0, 0xfffffe02 }; - VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfcd0, 0xfd03, 0xfd36, 0xfd69 }; - VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffbc0, 0xfffffc04 }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xdd93, 0xdd3a, 0xdce1, 0xdc87 }; -+#endif - VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc3b26666, 0xc3a74000 }; - VECT_VAR_DECL(expected,int,16,8) [] = { 0xfab0, 0xfb05, 0xfb5a, 0xfbaf, - 0xfc04, 0xfc59, 0xfcae, 0xfd03 }; -@@ -16,6 +19,10 @@ VECT_VAR_DECL(expected,uint,16,8) [] = { 0xf890, 0xf907, 0xf97e, 0xf9f5, - 0xfa6c, 0xfae3, 0xfb5a, 0xfbd1 }; - VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffff780, 0xfffff808, - 0xfffff890, 0xfffff918 }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0xe58e, 0xe535, 0xe4dc, 0xe483, -+ 0xe42a, 0xe3a3, 0xe2f2, 0xe240 }; -+#endif - VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc4b1cccd, 0xc4a6b000, - 0xc49b9333, 0xc4907667 }; - -@@ -50,6 +57,13 @@ void FNNAME (INSN_NAME) (void) - DECL_VMUL(vector); - DECL_VMUL(vector_res); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector, float, 16, 4); -+ DECL_VARIABLE(vector, float, 16, 8); -+ DECL_VARIABLE(vector_res, float, 16, 4); -+ DECL_VARIABLE(vector_res, float, 16, 8); -+#endif -+ - clean_results (); - - /* Initialize vector from pre-initialized values. */ -@@ -57,11 +71,17 @@ void FNNAME (INSN_NAME) (void) - VLOAD(vector, buffer, , int, s, 32, 2); - VLOAD(vector, buffer, , uint, u, 16, 4); - VLOAD(vector, buffer, , uint, u, 32, 2); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VLOAD(vector, buffer, , float, f, 16, 4); -+#endif - VLOAD(vector, buffer, , float, f, 32, 2); - VLOAD(vector, buffer, q, int, s, 16, 8); - VLOAD(vector, buffer, q, int, s, 32, 4); - VLOAD(vector, buffer, q, uint, u, 16, 8); - VLOAD(vector, buffer, q, uint, u, 32, 4); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VLOAD(vector, buffer, q, float, f, 16, 8); -+#endif - VLOAD(vector, buffer, q, float, f, 32, 4); - - /* Choose multiplier arbitrarily. */ -@@ -69,22 +89,34 @@ void FNNAME (INSN_NAME) (void) - TEST_VMUL_N(, int, s, 32, 2, 0x22); - TEST_VMUL_N(, uint, u, 16, 4, 0x33); - TEST_VMUL_N(, uint, u, 32, 2, 0x44); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VMUL_N(, float, f, 16, 4, 22.3f); -+#endif - TEST_VMUL_N(, float, f, 32, 2, 22.3f); - TEST_VMUL_N(q, int, s, 16, 8, 0x55); - TEST_VMUL_N(q, int, s, 32, 4, 0x66); - TEST_VMUL_N(q, uint, u, 16, 8, 0x77); - TEST_VMUL_N(q, uint, u, 32, 4, 0x88); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VMUL_N(q, float, f, 16, 8, 88.9f); -+#endif - TEST_VMUL_N(q, float, f, 32, 4, 88.9f); - -- CHECK(TEST_MSG, int, 16, 4, PRIx64, expected, ""); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); - CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); -- CHECK(TEST_MSG, uint, 16, 4, PRIx64, expected, ""); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, ""); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ""); -- CHECK(TEST_MSG, int, 16, 8, PRIx64, expected, ""); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); - CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); -- CHECK(TEST_MSG, uint, 16, 8, PRIx64, expected, ""); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, ""); -+#endif - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, ""); - } - ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulh_f16_1.c -@@ -0,0 +1,42 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+#define INFF __builtin_inf () -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected[] = -+{ -+ 0x0000 /* 0.000000 */, -+ 0x8000 /* -0.000000 */, -+ 0xc854 /* -8.656250 */, -+ 0x5cd8 /* 310.000000 */, -+ 0x60b0 /* 600.000000 */, -+ 0xa019 /* -0.008003 */, -+ 0xbc9a /* -1.150391 */, -+ 0xc8cf /* -9.617188 */, -+ 0x51fd /* 47.906250 */, -+ 0x4634 /* 6.203125 */, -+ 0xc0d9 /* -2.423828 */, -+ 0x3c9a /* 1.150391 */, -+ 0xc79a /* -7.601562 */, -+ 0x5430 /* 67.000000 */, -+ 0xbfd0 /* -1.953125 */, -+ 0x46ac /* 6.671875 */, -+ 0xfc00 /* -inf */, -+ 0xfc00 /* -inf */ -+}; -+ -+#define TEST_MSG "VMULH_F16" -+#define INSN_NAME vmulh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "binary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulh_lane_f16_1.c -@@ -0,0 +1,90 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_neon } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+#define FP16_C(a) ((__fp16) a) -+#define A FP16_C (13.4) -+#define B FP16_C (-56.8) -+#define C FP16_C (-34.8) -+#define D FP16_C (12) -+#define E FP16_C (63.1) -+#define F FP16_C (19.1) -+#define G FP16_C (-4.8) -+#define H FP16_C (77) -+ -+#define I FP16_C (0.7) -+#define J FP16_C (-78) -+#define K FP16_C (11.23) -+#define L FP16_C (98) -+#define M FP16_C (87.1) -+#define N FP16_C (-8) -+#define O FP16_C (-1.1) -+#define P FP16_C (-9.7) -+ -+extern void abort (); -+ -+float16_t src1[8] = { A, B, C, D, I, J, K, L }; -+VECT_VAR_DECL (src2, float, 16, 4) [] = { E, F, G, H }; -+VECT_VAR_DECL (src2, float, 16, 8) [] = { E, F, G, H, M, N, O, P }; -+ -+/* Expected results for vmulh_lane. */ -+uint16_t expected[4] = { 0x629B /* A * E. */, 0xE43D /* B * F. */, -+ 0x5939 /* C * G. */, 0x6338 /* D * H. */ }; -+ -+ -+/* Expected results for vmulh_lane. */ -+uint16_t expected_laneq[8] = { 0x629B /* A * E. */, -+ 0xE43D /* B * F. */, -+ 0x5939 /* C * G. */, -+ 0x6338 /* D * H. */, -+ 0x53A0 /* I * M. */, -+ 0x60E0 /* J * N. */, -+ 0xCA2C /* K * O. */, -+ 0xE36E /* L * P. */ }; -+ -+void exec_vmulh_lane_f16 (void) -+{ -+#define CHECK_LANE(N)\ -+ ret = vmulh_lane_f16 (src1[N], VECT_VAR (vsrc2, float, 16, 4), N);\ -+ if (*(uint16_t *) &ret != expected[N])\ -+ abort (); -+ -+ DECL_VARIABLE(vsrc2, float, 16, 4); -+ VLOAD (vsrc2, src2, , float, f, 16, 4); -+ float16_t ret; -+ -+ CHECK_LANE(0) -+ CHECK_LANE(1) -+ CHECK_LANE(2) -+ CHECK_LANE(3) -+ -+#undef CHECK_LANE -+#define CHECK_LANE(N)\ -+ ret = vmulh_laneq_f16 (src1[N], VECT_VAR (vsrc2, float, 16, 8), N);\ -+ if (*(uint16_t *) &ret != expected_laneq[N])\ -+ abort (); -+ -+ DECL_VARIABLE(vsrc2, float, 16, 8); -+ VLOAD (vsrc2, src2, q, float, f, 16, 8); -+ -+ CHECK_LANE(0) -+ CHECK_LANE(1) -+ CHECK_LANE(2) -+ CHECK_LANE(3) -+ CHECK_LANE(4) -+ CHECK_LANE(5) -+ CHECK_LANE(6) -+ CHECK_LANE(7) -+} -+ -+int -+main (void) -+{ -+ exec_vmulh_lane_f16 (); -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull.c -@@ -59,13 +59,13 @@ void exec_vmull (void) - TEST_VMULL(uint, u, 32, 64, 2); - TEST_VMULL(poly, p, 8, 16, 8); - -- CHECK(TEST_MSG, int, 16, 8, PRIx64, expected, ""); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); - CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); -- CHECK(TEST_MSG, int, 64, 2, PRIx32, expected, ""); -- CHECK(TEST_MSG, uint, 16, 8, PRIx64, expected, ""); -+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ""); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); -- CHECK(TEST_MSG, uint, 64, 2, PRIx32, expected, ""); -- CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected, ""); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected, ""); - } - - int main (void) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_lane.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmull_lane.c -@@ -54,9 +54,9 @@ void exec_vmull_lane (void) - TEST_VMULL_LANE(uint, u, 32, 64, 2, 1); - - CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); -- CHECK(TEST_MSG, int, 64, 2, PRIx32, expected, ""); -+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected, ""); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); -- CHECK(TEST_MSG, uint, 64, 2, PRIx32, expected, ""); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected, ""); - } - - int main (void) ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulx_f16_1.c -@@ -0,0 +1,84 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_neon } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+#define FP16_C(a) ((__fp16) a) -+#define A FP16_C (13.4) -+#define B FP16_C (__builtin_inff ()) -+#define C FP16_C (-34.8) -+#define D FP16_C (-__builtin_inff ()) -+#define E FP16_C (63.1) -+#define F FP16_C (0.0) -+#define G FP16_C (-4.8) -+#define H FP16_C (0.0) -+ -+#define I FP16_C (0.7) -+#define J FP16_C (-__builtin_inff ()) -+#define K FP16_C (11.23) -+#define L FP16_C (98) -+#define M FP16_C (87.1) -+#define N FP16_C (-0.0) -+#define O FP16_C (-1.1) -+#define P FP16_C (7) -+ -+/* Expected results for vmulx. */ -+VECT_VAR_DECL (expected_static, hfloat, 16, 4) [] -+ = { 0x629B /* A * E. */, 0x4000 /* FP16_C (2.0f). */, -+ 0x5939 /* C * G. */, 0xC000 /* FP16_C (-2.0f). */ }; -+ -+VECT_VAR_DECL (expected_static, hfloat, 16, 8) [] -+ = { 0x629B /* A * E. */, 0x4000 /* FP16_C (2.0f). */, -+ 0x5939 /* C * G. */, 0xC000 /* FP16_C (-2.0f). */, -+ 0x53A0 /* I * M. */, 0x4000 /* FP16_C (2.0f). */, -+ 0xCA2C /* K * O. */, 0x615C /* L * P. */ }; -+ -+void exec_vmulx_f16 (void) -+{ -+#undef TEST_MSG -+#define TEST_MSG "VMULX (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc_1, float, 16, 4); -+ DECL_VARIABLE(vsrc_2, float, 16, 4); -+ VECT_VAR_DECL (buf_src_1, float, 16, 4) [] = {A, B, C, D}; -+ VECT_VAR_DECL (buf_src_2, float, 16, 4) [] = {E, F, G, H}; -+ VLOAD (vsrc_1, buf_src_1, , float, f, 16, 4); -+ VLOAD (vsrc_2, buf_src_2, , float, f, 16, 4); -+ DECL_VARIABLE (vector_res, float, 16, 4) -+ = vmulx_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4)); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VMULXQ (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc_1, float, 16, 8); -+ DECL_VARIABLE(vsrc_2, float, 16, 8); -+ VECT_VAR_DECL (buf_src_1, float, 16, 8) [] = {A, B, C, D, I, J, K, L}; -+ VECT_VAR_DECL (buf_src_2, float, 16, 8) [] = {E, F, G, H, M, N, O, P}; -+ VLOAD (vsrc_1, buf_src_1, q, float, f, 16, 8); -+ VLOAD (vsrc_2, buf_src_2, q, float, f, 16, 8); -+ DECL_VARIABLE (vector_res, float, 16, 8) -+ = vmulxq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8)); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_static, ""); -+} -+ -+int -+main (void) -+{ -+ exec_vmulx_f16 (); -+ return 0; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulx_lane_f16_1.c -@@ -0,0 +1,452 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_neon } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+#define FP16_C(a) ((__fp16) a) -+#define A FP16_C (13.4) -+#define B FP16_C (__builtin_inff ()) -+#define C FP16_C (-34.8) -+#define D FP16_C (-__builtin_inff ()) -+#define E FP16_C (-0.0) -+#define F FP16_C (19.1) -+#define G FP16_C (-4.8) -+#define H FP16_C (0.0) -+ -+#define I FP16_C (0.7) -+#define J FP16_C (-78) -+#define K FP16_C (-__builtin_inff ()) -+#define L FP16_C (98) -+#define M FP16_C (87.1) -+#define N FP16_C (-8) -+#define O FP16_C (-1.1) -+#define P FP16_C (-0.0) -+ -+/* Expected results for vmulx_lane. */ -+VECT_VAR_DECL (expected0_static, hfloat, 16, 4) [] -+ = { 0x8000 /* A * E. */, -+ 0xC000 /* FP16_C (-2.0f). */, -+ 0x0000 /* C * E. */, -+ 0x4000 /* FP16_C (2.0f). */ }; -+ -+VECT_VAR_DECL (expected1_static, hfloat, 16, 4) [] -+ = { 0x5BFF /* A * F. */, -+ 0x7C00 /* B * F. */, -+ 0xE131 /* C * F. */, -+ 0xFC00 /* D * F. */ }; -+ -+VECT_VAR_DECL (expected2_static, hfloat, 16, 4) [] -+ = { 0xD405 /* A * G. */, -+ 0xFC00 /* B * G. */, -+ 0x5939 /* C * G. */, -+ 0x7C00 /* D * G. */ }; -+ -+VECT_VAR_DECL (expected3_static, hfloat, 16, 4) [] -+ = { 0x0000 /* A * H. */, -+ 0x4000 /* FP16_C (2.0f). */, -+ 0x8000 /* C * H. */, -+ 0xC000 /* FP16_C (-2.0f). */ }; -+ -+/* Expected results for vmulxq_lane. */ -+VECT_VAR_DECL (expected0_static, hfloat, 16, 8) [] -+ = { 0x8000 /* A * E. */, -+ 0xC000 /* FP16_C (-2.0f). */, -+ 0x0000 /* C * E. */, -+ 0x4000 /* FP16_C (2.0f). */, -+ 0x8000 /* I * E. */, -+ 0x0000 /* J * E. */, -+ 0x4000 /* FP16_C (2.0f). */, -+ 0x8000 /* L * E. */ }; -+ -+VECT_VAR_DECL (expected1_static, hfloat, 16, 8) [] -+ = { 0x5BFF /* A * F. */, -+ 0x7C00 /* B * F. */, -+ 0xE131 /* C * F. */, -+ 0xFC00 /* D * F. */, -+ 0x4AAF /* I * F. */, -+ 0xE5D1 /* J * F. */, -+ 0xFC00 /* K * F. */, -+ 0x674F /* L * F. */ }; -+ -+VECT_VAR_DECL (expected2_static, hfloat, 16, 8) [] -+ = { 0xD405 /* A * G. */, -+ 0xFC00 /* B * G. */, -+ 0x5939 /* C * G. */, -+ 0x7C00 /* D * G. */, -+ 0xC2B9 /* I * G. */, -+ 0x5DDA /* J * G. */, -+ 0x7C00 /* K * G. */, -+ 0xDF5A /* L * G. */ }; -+ -+VECT_VAR_DECL (expected3_static, hfloat, 16, 8) [] -+ = { 0x0000 /* A * H. */, -+ 0x4000 /* FP16_C (2.0f). */, -+ 0x8000 /* C * H. */, -+ 0xC000 /* FP16_C (-2.0f). */, -+ 0x0000 /* I * H. */, -+ 0x8000 /* J * H. */, -+ 0xC000 /* FP16_C (-2.0f). */, -+ 0x0000 /* L * H. */}; -+ -+/* Expected results for vmulx_laneq. */ -+VECT_VAR_DECL (expected_laneq0_static, hfloat, 16, 4) [] -+ = { 0x8000 /* A * E. */, -+ 0xC000 /* FP16_C (-2.0f). */, -+ 0x0000 /* C * E. */, -+ 0x4000 /* FP16_C (2.0f). */ }; -+ -+VECT_VAR_DECL (expected_laneq1_static, hfloat, 16, 4) [] -+ = { 0x5BFF /* A * F. */, -+ 0x7C00 /* B * F. */, -+ 0xE131 /* C * F. */, -+ 0xFC00 /* D * F. */ }; -+ -+VECT_VAR_DECL (expected_laneq2_static, hfloat, 16, 4) [] -+ = { 0xD405 /* A * G. */, -+ 0xFC00 /* B * G. */, -+ 0x5939 /* C * G. */, -+ 0x7C00 /* D * G. */ }; -+ -+VECT_VAR_DECL (expected_laneq3_static, hfloat, 16, 4) [] -+ = { 0x0000 /* A * H. */, -+ 0x4000 /* FP16_C (2.0f). */, -+ 0x8000 /* C * H. */, -+ 0xC000 /* FP16_C (-2.0f). */ }; -+ -+VECT_VAR_DECL (expected_laneq4_static, hfloat, 16, 4) [] -+ = { 0x648F /* A * M. */, -+ 0x7C00 /* B * M. */, -+ 0xE9ED /* C * M. */, -+ 0xFC00 /* D * M. */ }; -+ -+VECT_VAR_DECL (expected_laneq5_static, hfloat, 16, 4) [] -+ = { 0xD6B3 /* A * N. */, -+ 0xFC00 /* B * N. */, -+ 0x5C5A /* C * N. */, -+ 0x7C00 /* D * N. */ }; -+ -+VECT_VAR_DECL (expected_laneq6_static, hfloat, 16, 4) [] -+ = { 0xCB5E /* A * O. */, -+ 0xFC00 /* B * O. */, -+ 0x50C9 /* C * O. */, -+ 0x7C00 /* D * O. */ }; -+ -+VECT_VAR_DECL (expected_laneq7_static, hfloat, 16, 4) [] -+ = { 0x8000 /* A * P. */, -+ 0xC000 /* FP16_C (-2.0f). */, -+ 0x0000 /* C * P. */, -+ 0x4000 /* FP16_C (2.0f). */ }; -+ -+VECT_VAR_DECL (expected_laneq0_static, hfloat, 16, 8) [] -+ = { 0x8000 /* A * E. */, -+ 0xC000 /* FP16_C (-2.0f). */, -+ 0x0000 /* C * E. */, -+ 0x4000 /* FP16_C (2.0f). */, -+ 0x8000 /* I * E. */, -+ 0x0000 /* J * E. */, -+ 0x4000 /* FP16_C (2.0f). */, -+ 0x8000 /* L * E. */ }; -+ -+VECT_VAR_DECL (expected_laneq1_static, hfloat, 16, 8) [] -+ = { 0x5BFF /* A * F. */, -+ 0x7C00 /* B * F. */, -+ 0xE131 /* C * F. */, -+ 0xFC00 /* D * F. */, -+ 0x4AAF /* I * F. */, -+ 0xE5D1 /* J * F. */, -+ 0xFC00 /* K * F. */, -+ 0x674F /* L * F. */ }; -+ -+VECT_VAR_DECL (expected_laneq2_static, hfloat, 16, 8) [] -+ = { 0xD405 /* A * G. */, -+ 0xFC00 /* B * G. */, -+ 0x5939 /* C * G. */, -+ 0x7C00 /* D * G. */, -+ 0xC2B9 /* I * G. */, -+ 0x5DDA /* J * G. */, -+ 0x7C00 /* K * G. */, -+ 0xDF5A /* L * G. */ }; -+ -+VECT_VAR_DECL (expected_laneq3_static, hfloat, 16, 8) [] -+ = { 0x0000 /* A * H. */, -+ 0x4000 /* FP16_C (2.0f). */, -+ 0x8000 /* C * H. */, -+ 0xC000 /* FP16_C (-2.0f). */, -+ 0x0000 /* I * H. */, -+ 0x8000 /* J * H. */, -+ 0xC000 /* FP16_C (-2.0f). */, -+ 0x0000 /* L * H. */ }; -+ -+VECT_VAR_DECL (expected_laneq4_static, hfloat, 16, 8) [] -+ = { 0x648F /* A * M. */, -+ 0x7C00 /* B * M. */, -+ 0xE9ED /* C * M. */, -+ 0xFC00 /* D * M. */, -+ 0x53A0 /* I * M. */, -+ 0xEEA3 /* J * M. */, -+ 0xFC00 /* K * M. */, -+ 0x702B /* L * M. */ }; -+ -+VECT_VAR_DECL (expected_laneq5_static, hfloat, 16, 8) [] -+ = { 0xD6B3 /* A * N. */, -+ 0xFC00 /* B * N. */, -+ 0x5C5A /* C * N. */, -+ 0x7C00 /* D * N. */, -+ 0xC59A /* I * N. */, -+ 0x60E0 /* J * N. */, -+ 0x7C00 /* K * N. */, -+ 0xE220 /* L * N. */ }; -+ -+VECT_VAR_DECL (expected_laneq6_static, hfloat, 16, 8) [] -+ = { 0xCB5E /* A * O. */, -+ 0xFC00 /* B * O. */, -+ 0x50C9 /* C * O. */, -+ 0x7C00 /* D * O. */, -+ 0xBA29 /* I * O. */, -+ 0x555C /* J * O. */, -+ 0x7C00 /* K * O. */, -+ 0xD6BC /* L * O. */ }; -+ -+VECT_VAR_DECL (expected_laneq7_static, hfloat, 16, 8) [] -+ = { 0x8000 /* A * P. */, -+ 0xC000 /* FP16_C (-2.0f). */, -+ 0x0000 /* C * P. */, -+ 0x4000 /* FP16_C (2.0f). */, -+ 0x8000 /* I * P. */, -+ 0x0000 /* J * P. */, -+ 0x4000 /* FP16_C (2.0f). */, -+ 0x8000 /* L * P. */ }; -+ -+void exec_vmulx_lane_f16 (void) -+{ -+#undef TEST_MSG -+#define TEST_MSG "VMULX_LANE (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc_1, float, 16, 4); -+ DECL_VARIABLE(vsrc_2, float, 16, 4); -+ VECT_VAR_DECL (buf_src_1, float, 16, 4) [] = {A, B, C, D}; -+ VECT_VAR_DECL (buf_src_2, float, 16, 4) [] = {E, F, G, H}; -+ VLOAD (vsrc_1, buf_src_1, , float, f, 16, 4); -+ VLOAD (vsrc_2, buf_src_2, , float, f, 16, 4); -+ DECL_VARIABLE (vector_res, float, 16, 4) -+ = vmulx_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), 0); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected0_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmulx_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), 1); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected1_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmulx_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), 2); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected2_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmulx_lane_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4), 3); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected3_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VMULXQ_LANE (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc_1, float, 16, 8); -+ VECT_VAR_DECL (buf_src_1, float, 16, 8) [] = {A, B, C, D, I, J, K, L}; -+ VLOAD (vsrc_1, buf_src_1, q, float, f, 16, 8); -+ DECL_VARIABLE (vector_res, float, 16, 8) -+ = vmulxq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 4), 0); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected0_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulxq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 4), 1); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected1_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulxq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 4), 2); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected2_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulxq_lane_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 4), 3); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected3_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VMULX_LANEQ (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc_2, float, 16, 8); -+ VECT_VAR_DECL (buf_src_2, float, 16, 8) [] = {E, F, G, H, M, N, O, P}; -+ VLOAD (vsrc_2, buf_src_2, q, float, f, 16, 8); -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmulx_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 8), 0); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq0_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmulx_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 8), 1); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq1_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmulx_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 8), 2); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq2_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmulx_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 8), 3); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq3_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmulx_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 8), 4); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq4_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmulx_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 8), 5); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq5_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmulx_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 8), 6); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq6_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmulx_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 8), 7); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_laneq7_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VMULXQ_LANEQ (FP16)" -+ clean_results (); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulxq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), 0); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq0_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulxq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), 1); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq1_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulxq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), 2); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq2_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulxq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), 3); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq3_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulxq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), 4); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq4_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulxq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), 5); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq5_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulxq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), 6); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq6_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulxq_laneq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8), 7); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_laneq7_static, ""); -+} -+ -+int -+main (void) -+{ -+ exec_vmulx_lane_f16 (); -+ return 0; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulx_n_f16_1.c -@@ -0,0 +1,177 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_neon } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+#define FP16_C(a) ((__fp16) a) -+#define A FP16_C (13.4) -+#define B FP16_C (__builtin_inff ()) -+#define C FP16_C (-34.8) -+#define D FP16_C (-__builtin_inff ()) -+#define E FP16_C (-0.0) -+#define F FP16_C (19.1) -+#define G FP16_C (-4.8) -+#define H FP16_C (0.0) -+ -+float16_t elemE = E; -+float16_t elemF = F; -+float16_t elemG = G; -+float16_t elemH = H; -+ -+#define I FP16_C (0.7) -+#define J FP16_C (-78) -+#define K FP16_C (11.23) -+#define L FP16_C (98) -+#define M FP16_C (87.1) -+#define N FP16_C (-8) -+#define O FP16_C (-1.1) -+#define P FP16_C (-9.7) -+ -+/* Expected results for vmulx_n. */ -+VECT_VAR_DECL (expected0_static, hfloat, 16, 4) [] -+ = { 0x8000 /* A * E. */, -+ 0xC000 /* FP16_C (-2.0f). */, -+ 0x0000 /* C * E. */, -+ 0x4000 /* FP16_C (2.0f). */ }; -+ -+VECT_VAR_DECL (expected1_static, hfloat, 16, 4) [] -+ = { 0x5BFF /* A * F. */, -+ 0x7C00 /* B * F. */, -+ 0xE131 /* C * F. */, -+ 0xFC00 /* D * F. */ }; -+ -+VECT_VAR_DECL (expected2_static, hfloat, 16, 4) [] -+ = { 0xD405 /* A * G. */, -+ 0xFC00 /* B * G. */, -+ 0x5939 /* C * G. */, -+ 0x7C00 /* D * G. */ }; -+ -+VECT_VAR_DECL (expected3_static, hfloat, 16, 4) [] -+ = { 0x0000 /* A * H. */, -+ 0x4000 /* FP16_C (2.0f). */, -+ 0x8000 /* C * H. */, -+ 0xC000 /* FP16_C (-2.0f). */ }; -+ -+VECT_VAR_DECL (expected0_static, hfloat, 16, 8) [] -+ = { 0x8000 /* A * E. */, -+ 0xC000 /* FP16_C (-2.0f). */, -+ 0x0000 /* C * E. */, -+ 0x4000 /* FP16_C (2.0f). */, -+ 0x8000 /* I * E. */, -+ 0x0000 /* J * E. */, -+ 0x8000 /* K * E. */, -+ 0x8000 /* L * E. */ }; -+ -+VECT_VAR_DECL (expected1_static, hfloat, 16, 8) [] -+ = { 0x5BFF /* A * F. */, -+ 0x7C00 /* B * F. */, -+ 0xE131 /* C * F. */, -+ 0xFC00 /* D * F. */, -+ 0x4AAF /* I * F. */, -+ 0xE5D1 /* J * F. */, -+ 0x5AB3 /* K * F. */, -+ 0x674F /* L * F. */ }; -+ -+VECT_VAR_DECL (expected2_static, hfloat, 16, 8) [] -+ = { 0xD405 /* A * G. */, -+ 0xFC00 /* B * G. */, -+ 0x5939 /* C * G. */, -+ 0x7C00 /* D * G. */, -+ 0xC2B9 /* I * G. */, -+ 0x5DDA /* J * G. */, -+ 0xD2BD /* K * G. */, -+ 0xDF5A /* L * G. */ }; -+ -+VECT_VAR_DECL (expected3_static, hfloat, 16, 8) [] -+ = { 0x0000 /* A * H. */, -+ 0x4000 /* FP16_C (2.0f). */, -+ 0x8000 /* C * H. */, -+ 0xC000 /* FP16_C (-2.0f). */, -+ 0x0000 /* I * H. */, -+ 0x8000 /* J * H. */, -+ 0x0000 /* K * H. */, -+ 0x0000 /* L * H. */ }; -+ -+void exec_vmulx_n_f16 (void) -+{ -+#undef TEST_MSG -+#define TEST_MSG "VMULX_N (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE (vsrc_1, float, 16, 4); -+ VECT_VAR_DECL (buf_src_1, float, 16, 4) [] = {A, B, C, D}; -+ VLOAD (vsrc_1, buf_src_1, , float, f, 16, 4); -+ DECL_VARIABLE (vector_res, float, 16, 4) -+ = vmulx_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), elemE); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected0_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmulx_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), elemF); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected1_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmulx_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), elemG); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected2_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vmulx_n_f16 (VECT_VAR (vsrc_1, float, 16, 4), elemH); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected3_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VMULXQ_N (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE (vsrc_1, float, 16, 8); -+ VECT_VAR_DECL (buf_src_1, float, 16, 8) [] = {A, B, C, D, I, J, K, L}; -+ VLOAD (vsrc_1, buf_src_1, q, float, f, 16, 8); -+ DECL_VARIABLE (vector_res, float, 16, 8) -+ = vmulxq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), elemE); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected0_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulxq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), elemF); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected1_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulxq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), elemG); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected2_static, ""); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vmulxq_n_f16 (VECT_VAR (vsrc_1, float, 16, 8), elemH); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected3_static, ""); -+} -+ -+int -+main (void) -+{ -+ exec_vmulx_n_f16 (); -+ return 0; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulxh_f16_1.c -@@ -0,0 +1,50 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+#define A 13.4 -+#define B __builtin_inff () -+#define C -34.8 -+#define D -__builtin_inff () -+#define E 63.1 -+#define F 0.0 -+#define G -4.8 -+#define H 0.0 -+ -+#define I 0.7 -+#define J -__builtin_inff () -+#define K 11.23 -+#define L 98 -+#define M 87.1 -+#define N -0.0 -+#define O -1.1 -+#define P 7 -+ -+float16_t input_1[] = { A, B, C, D, I, J, K, L }; -+float16_t input_2[] = { E, F, G, H, M, N, O, P }; -+uint16_t expected[] = { 0x629B /* A * E. */, -+ 0x4000 /* FP16_C (2.0f). */, -+ 0x5939 /* C * G. */, -+ 0xC000 /* FP16_C (-2.0f). */, -+ 0x53A0 /* I * M. */, -+ 0x4000 /* FP16_C (2.0f). */, -+ 0xCA2C /* K * O. */, -+ 0x615C /* L * P. */ }; -+ -+#define TEST_MSG "VMULXH_F16" -+#define INSN_NAME vmulxh_f16 -+ -+#define INPUT_1 input_1 -+#define INPUT_2 input_2 -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "binary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmulxh_lane_f16_1.c -@@ -0,0 +1,91 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_neon } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+#define FP16_C(a) ((__fp16) a) -+#define A FP16_C (13.4) -+#define B FP16_C (__builtin_inff ()) -+#define C FP16_C (-34.8) -+#define D FP16_C (-__builtin_inff ()) -+#define E FP16_C (63.1) -+#define F FP16_C (0.0) -+#define G FP16_C (-4.8) -+#define H FP16_C (0.0) -+ -+#define I FP16_C (0.7) -+#define J FP16_C (-__builtin_inff ()) -+#define K FP16_C (11.23) -+#define L FP16_C (98) -+#define M FP16_C (87.1) -+#define N FP16_C (-0.0) -+#define O FP16_C (-1.1) -+#define P FP16_C (7) -+ -+extern void abort (); -+ -+float16_t src1[8] = { A, B, C, D, I, J, K, L }; -+VECT_VAR_DECL (src2, float, 16, 4) [] = { E, F, G, H }; -+VECT_VAR_DECL (src2, float, 16, 8) [] = { E, F, G, H, M, N, O, P }; -+ -+/* Expected results for vmulxh_lane. */ -+uint16_t expected[4] = { 0x629B /* A * E. */, -+ 0x4000 /* FP16_C (2.0f). */, -+ 0x5939 /* C * G. */, -+ 0xC000 /* FP16_C (-2.0f). */ }; -+ -+/* Expected results for vmulxh_lane. */ -+uint16_t expected_laneq[8] = { 0x629B /* A * E. */, -+ 0x4000 /* FP16_C (2.0f). */, -+ 0x5939 /* C * G. */, -+ 0xC000 /* FP16_C (-2.0f). */, -+ 0x53A0 /* I * M. */, -+ 0x4000 /* FP16_C (2.0f). */, -+ 0xCA2C /* K * O. */, -+ 0x615C /* L * P. */ }; -+ -+void exec_vmulxh_lane_f16 (void) -+{ -+#define CHECK_LANE(N)\ -+ ret = vmulxh_lane_f16 (src1[N], VECT_VAR (vsrc2, float, 16, 4), N);\ -+ if (*(uint16_t *) &ret != expected[N])\ -+ abort (); -+ -+ DECL_VARIABLE(vsrc2, float, 16, 4); -+ VLOAD (vsrc2, src2, , float, f, 16, 4); -+ float16_t ret; -+ -+ CHECK_LANE(0) -+ CHECK_LANE(1) -+ CHECK_LANE(2) -+ CHECK_LANE(3) -+ -+#undef CHECK_LANE -+#define CHECK_LANE(N)\ -+ ret = vmulxh_laneq_f16 (src1[N], VECT_VAR (vsrc2, float, 16, 8), N);\ -+ if (*(uint16_t *) &ret != expected_laneq[N])\ -+ abort (); -+ -+ DECL_VARIABLE(vsrc2, float, 16, 8); -+ VLOAD (vsrc2, src2, q, float, f, 16, 8); -+ -+ CHECK_LANE(0) -+ CHECK_LANE(1) -+ CHECK_LANE(2) -+ CHECK_LANE(3) -+ CHECK_LANE(4) -+ CHECK_LANE(5) -+ CHECK_LANE(6) -+ CHECK_LANE(7) -+} -+ -+int -+main (void) -+{ -+ exec_vmulxh_lane_f16 (); -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmvn.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmvn.c -@@ -120,14 +120,14 @@ FNNAME (INSN_NAME) - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, ""); - CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected, ""); - CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, ""); - CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); - CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); - CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, ""); - CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); -- CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 16, PRIx8, expected, ""); - } - - int main (void) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vneg.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vneg.c -@@ -21,24 +21,53 @@ VECT_VAR_DECL(expected,int,32,4) [] = { 0x10, 0xf, 0xe, 0xd }; - /* Expected results for float32 variants. Needs to be separated since - the generic test function does not test floating-point - versions. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_float16, hfloat, 16, 4) [] = { 0xc09a, 0xc09a, -+ 0xc09a, 0xc09a }; -+VECT_VAR_DECL(expected_float16, hfloat, 16, 8) [] = { 0xc2cd, 0xc2cd, -+ 0xc2cd, 0xc2cd, -+ 0xc2cd, 0xc2cd, -+ 0xc2cd, 0xc2cd }; -+#endif - VECT_VAR_DECL(expected_float32,hfloat,32,2) [] = { 0xc0133333, 0xc0133333 }; - VECT_VAR_DECL(expected_float32,hfloat,32,4) [] = { 0xc059999a, 0xc059999a, - 0xc059999a, 0xc059999a }; - - void exec_vneg_f32(void) - { -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector, float, 16, 4); -+ DECL_VARIABLE(vector, float, 16, 8); -+#endif - DECL_VARIABLE(vector, float, 32, 2); - DECL_VARIABLE(vector, float, 32, 4); - -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector_res, float, 16, 4); -+ DECL_VARIABLE(vector_res, float, 16, 8); -+#endif - DECL_VARIABLE(vector_res, float, 32, 2); - DECL_VARIABLE(vector_res, float, 32, 4); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, , float, f, 16, 4, 2.3f); -+ VDUP(vector, q, float, f, 16, 8, 3.4f); -+#endif - VDUP(vector, , float, f, 32, 2, 2.3f); - VDUP(vector, q, float, f, 32, 4, 3.4f); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_UNARY_OP(INSN_NAME, , float, f, 16, 4); -+ TEST_UNARY_OP(INSN_NAME, q, float, f, 16, 8); -+#endif - TEST_UNARY_OP(INSN_NAME, , float, f, 32, 2); - TEST_UNARY_OP(INSN_NAME, q, float, f, 32, 4); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_float16, ""); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_float16, ""); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_float32, ""); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, ""); - } ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vnegh_f16_1.c -@@ -0,0 +1,39 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+uint16_t expected[] = -+{ -+ 0x8000 /* -0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0xc000 /* -2.000000 */, -+ 0xc233 /* -3.099609 */, -+ 0xcd00 /* -20.000000 */, -+ 0xb666 /* -0.399902 */, -+ 0x409a /* 2.300781 */, -+ 0xbd52 /* -1.330078 */, -+ 0x479a /* 7.601562 */, -+ 0xb4f6 /* -0.310059 */, -+ 0xb55d /* -0.335205 */, -+ 0xb800 /* -0.500000 */, -+ 0xbc00 /* -1.000000 */, -+ 0xca91 /* -13.132812 */, -+ 0x464d /* 6.300781 */, -+ 0xcd00 /* -20.000000 */, -+ 0xfc00 /* -inf */, -+ 0x7c00 /* inf */ -+}; -+ -+#define TEST_MSG "VNEGH_F16" -+#define INSN_NAME vnegh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpXXX.inc -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpXXX.inc -@@ -21,6 +21,9 @@ void FNNAME (INSN_NAME) (void) - DECL_VARIABLE(vector, uint, 8, 8); - DECL_VARIABLE(vector, uint, 16, 4); - DECL_VARIABLE(vector, uint, 32, 2); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector, float, 16, 4); -+#endif - DECL_VARIABLE(vector, float, 32, 2); - - DECL_VARIABLE(vector_res, int, 8, 8); -@@ -29,6 +32,9 @@ void FNNAME (INSN_NAME) (void) - DECL_VARIABLE(vector_res, uint, 8, 8); - DECL_VARIABLE(vector_res, uint, 16, 4); - DECL_VARIABLE(vector_res, uint, 32, 2); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector_res, float, 16, 4); -+#endif - DECL_VARIABLE(vector_res, float, 32, 2); - - clean_results (); -@@ -40,6 +46,9 @@ void FNNAME (INSN_NAME) (void) - VLOAD(vector, buffer, , uint, u, 8, 8); - VLOAD(vector, buffer, , uint, u, 16, 4); - VLOAD(vector, buffer, , uint, u, 32, 2); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VLOAD(vector, buffer, , float, f, 16, 4); -+#endif - VLOAD(vector, buffer, , float, f, 32, 2); - - /* Apply a binary operator named INSN_NAME. */ -@@ -49,14 +58,20 @@ void FNNAME (INSN_NAME) (void) - TEST_VPXXX(INSN_NAME, uint, u, 8, 8); - TEST_VPXXX(INSN_NAME, uint, u, 16, 4); - TEST_VPXXX(INSN_NAME, uint, u, 32, 2); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VPXXX(INSN_NAME, float, f, 16, 4); -+#endif - TEST_VPXXX(INSN_NAME, float, f, 32, 2); - -- CHECK(TEST_MSG, int, 8, 8, PRIx32, expected, ""); -- CHECK(TEST_MSG, int, 16, 4, PRIx64, expected, ""); -+ CHECK(TEST_MSG, int, 8, 8, PRIx8, expected, ""); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected, ""); - CHECK(TEST_MSG, int, 32, 2, PRIx32, expected, ""); -- CHECK(TEST_MSG, uint, 8, 8, PRIx32, expected, ""); -- CHECK(TEST_MSG, uint, 16, 4, PRIx64, expected, ""); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected, ""); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, ""); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, ""); - } - ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpadd.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpadd.c -@@ -14,6 +14,9 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe1, 0xe5, 0xe9, 0xed, - 0xe1, 0xe5, 0xe9, 0xed }; - VECT_VAR_DECL(expected,uint,16,4) [] = { 0xffe1, 0xffe5, 0xffe1, 0xffe5 }; - VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe1, 0xffffffe1 }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xcfc0, 0xcec0, 0xcfc0, 0xcec0 }; -+#endif - VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1f80000, 0xc1f80000 }; - - #include "vpXXX.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpmax.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpmax.c -@@ -15,6 +15,9 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf1, 0xf3, 0xf5, 0xf7, - 0xf1, 0xf3, 0xf5, 0xf7 }; - VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff1, 0xfff3, 0xfff1, 0xfff3 }; - VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff1, 0xfffffff1 }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xcb80, 0xca80, 0xcb80, 0xca80 }; -+#endif - VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1700000, 0xc1700000 }; - - #include "vpXXX.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpmin.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpmin.c -@@ -15,6 +15,9 @@ VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf2, 0xf4, 0xf6, - 0xf0, 0xf2, 0xf4, 0xf6 }; - VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff2, 0xfff0, 0xfff2 }; - VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff0 }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xcc00, 0xcb00, 0xcc00, 0xcb00 }; -+#endif - VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1800000 }; - - #include "vpXXX.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vpminmaxnm_f16_1.c -@@ -0,0 +1,114 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_neon } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+#define FP16_C(a) ((__fp16) a) -+#define A FP16_C (123.4) -+#define B FP16_C (__builtin_nanf ("")) /* NaN */ -+#define C FP16_C (-34.8) -+#define D FP16_C (1024) -+#define E FP16_C (663.1) -+#define F FP16_C (169.1) -+#define G FP16_C (-4.8) -+#define H FP16_C (-__builtin_nanf ("")) /* NaN */ -+ -+#define I FP16_C (0.7) -+#define J FP16_C (-78) -+#define K FP16_C (101.23) -+#define L FP16_C (-1098) -+#define M FP16_C (870.1) -+#define N FP16_C (-8781) -+#define O FP16_C (__builtin_inff ()) /* +Inf */ -+#define P FP16_C (-__builtin_inff ()) /* -Inf */ -+ -+ -+/* Expected results for vpminnm. */ -+VECT_VAR_DECL (expected_min_static, hfloat, 16, 4) [] -+ = { 0x57B6 /* A. */, 0xD05A /* C. */, 0x5949 /* F. */, 0xC4CD /* G. */ }; -+ -+VECT_VAR_DECL (expected_min_static, hfloat, 16, 8) [] -+ = { 0x57B6 /* A. */, 0xD05A /* C. */, 0xD4E0 /* J. */, 0xE44A /* L. */, -+ 0x5949 /* F. */, 0xC4CD /* G. */, 0xF04A /* N. */, 0xFC00 /* P. */ }; -+ -+/* expected_max results for vpmaxnm. */ -+VECT_VAR_DECL (expected_max_static, hfloat, 16, 4) [] -+ = { 0x57B6 /* A. */, 0x6400 /* D. */, 0x612E /* E. */, 0xC4CD /* G. */ }; -+ -+VECT_VAR_DECL (expected_max_static, hfloat, 16, 8) [] -+ = { 0x57B6 /* A. */, 0x6400 /* D. */, 0x399A /* I. */, 0x5654 /* K. */, -+ 0x612E /* E. */, 0xC4CD /* G. */, 0x62CC /* M. */, 0x7C00 /* O. */ }; -+ -+void exec_vpminmaxnm_f16 (void) -+{ -+#undef TEST_MSG -+#define TEST_MSG "VPMINNM (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc_1, float, 16, 4); -+ DECL_VARIABLE(vsrc_2, float, 16, 4); -+ VECT_VAR_DECL (buf_src_1, float, 16, 4) [] = {A, B, C, D}; -+ VECT_VAR_DECL (buf_src_2, float, 16, 4) [] = {E, F, G, H}; -+ VLOAD (vsrc_1, buf_src_1, , float, f, 16, 4); -+ VLOAD (vsrc_2, buf_src_2, , float, f, 16, 4); -+ DECL_VARIABLE (vector_res, float, 16, 4) -+ = vpminnm_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4)); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_min_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VPMINNMQ (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc_1, float, 16, 8); -+ DECL_VARIABLE(vsrc_2, float, 16, 8); -+ VECT_VAR_DECL (buf_src_1, float, 16, 8) [] = {A, B, C, D, I, J, K, L}; -+ VECT_VAR_DECL (buf_src_2, float, 16, 8) [] = {E, F, G, H, M, N, O, P}; -+ VLOAD (vsrc_1, buf_src_1, q, float, f, 16, 8); -+ VLOAD (vsrc_2, buf_src_2, q, float, f, 16, 8); -+ DECL_VARIABLE (vector_res, float, 16, 8) -+ = vpminnmq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8)); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_min_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VPMAXNM (FP16)" -+ clean_results (); -+ -+ VECT_VAR (vector_res, float, 16, 4) -+ = vpmaxnm_f16 (VECT_VAR (vsrc_1, float, 16, 4), -+ VECT_VAR (vsrc_2, float, 16, 4)); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_max_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VPMAXNMQ (FP16)" -+ clean_results (); -+ -+ VECT_VAR (vector_res, float, 16, 8) -+ = vpmaxnmq_f16 (VECT_VAR (vsrc_1, float, 16, 8), -+ VECT_VAR (vsrc_2, float, 16, 8)); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_max_static, ""); -+} -+ -+int -+main (void) -+{ -+ exec_vpminmaxnm_f16 (); -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqabs.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqabs.c -@@ -90,9 +90,9 @@ void vqabs_extra() - TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 32, 4, expected_cumulative_sat_min_neg, MSG); - - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_min_neg, MSG); -- CHECK(TEST_MSG, int, 16, 4, PRIx8, expected_min_neg, MSG); -- CHECK(TEST_MSG, int, 32, 2, PRIx8, expected_min_neg, MSG); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_min_neg, MSG); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_min_neg, MSG); - CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_min_neg, MSG); -- CHECK(TEST_MSG, int, 16, 8, PRIx8, expected_min_neg, MSG); -- CHECK(TEST_MSG, int, 32, 4, PRIx8, expected_min_neg, MSG); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_min_neg, MSG); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_min_neg, MSG); - } ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqdmull.c -@@ -63,8 +63,8 @@ void FNNAME (INSN_NAME) (void) - TEST_VQDMULL(int, s, 16, 32, 4, expected_cumulative_sat, ""); - TEST_VQDMULL(int, s, 32, 64, 2, expected_cumulative_sat, ""); - -- CHECK (TEST_MSG, int, 32, 4, PRIx16, expected, ""); -- CHECK (TEST_MSG, int, 64, 2, PRIx32, expected, ""); -+ CHECK (TEST_MSG, int, 32, 4, PRIx32, expected, ""); -+ CHECK (TEST_MSG, int, 64, 2, PRIx64, expected, ""); - - VDUP(vector, , int, s, 16, 4, 0x8000); - VDUP(vector2, , int, s, 16, 4, 0x8000); -@@ -75,8 +75,8 @@ void FNNAME (INSN_NAME) (void) - TEST_VQDMULL(int, s, 16, 32, 4, expected_cumulative_sat2, TEST_MSG2); - TEST_VQDMULL(int, s, 32, 64, 2, expected_cumulative_sat2, TEST_MSG2); - -- CHECK (TEST_MSG, int, 32, 4, PRIx16, expected2, TEST_MSG2); -- CHECK (TEST_MSG, int, 64, 2, PRIx32, expected2, TEST_MSG2); -+ CHECK (TEST_MSG, int, 32, 4, PRIx32, expected2, TEST_MSG2); -+ CHECK (TEST_MSG, int, 64, 2, PRIx64, expected2, TEST_MSG2); - } - - int main (void) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqneg.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqneg.c -@@ -90,9 +90,9 @@ void vqneg_extra() - TEST_UNARY_SAT_OP(INSN_NAME, q, int, s, 32, 4, expected_cumulative_sat_min_neg, MSG); - - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_min_neg, MSG); -- CHECK(TEST_MSG, int, 16, 4, PRIx8, expected_min_neg, MSG); -- CHECK(TEST_MSG, int, 32, 2, PRIx8, expected_min_neg, MSG); -+ CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_min_neg, MSG); -+ CHECK(TEST_MSG, int, 32, 2, PRIx32, expected_min_neg, MSG); - CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_min_neg, MSG); -- CHECK(TEST_MSG, int, 16, 8, PRIx8, expected_min_neg, MSG); -- CHECK(TEST_MSG, int, 32, 4, PRIx8, expected_min_neg, MSG); -+ CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_min_neg, MSG); -+ CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_min_neg, MSG); - } ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqtbX.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqtbX.c -@@ -318,13 +318,13 @@ void exec_vqtbX (void) - - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vqtbl1, ""); - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vqtbl1, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vqtbl1, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vqtbl1, ""); - - #undef TEST_MSG - #define TEST_MSG "VQTBL1Q" - CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_vqtbl1q, ""); - CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_vqtbl1q, ""); -- CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vqtbl1q, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 16, PRIx8, expected_vqtbl1q, ""); - - /* Check vqtbl2. */ - clean_results (); -@@ -334,13 +334,13 @@ void exec_vqtbX (void) - - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vqtbl2, ""); - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vqtbl2, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vqtbl2, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vqtbl2, ""); - - #undef TEST_MSG - #define TEST_MSG "VQTBL2Q" - CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_vqtbl2q, ""); - CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_vqtbl2q, ""); -- CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vqtbl2q, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 16, PRIx8, expected_vqtbl2q, ""); - - /* Check vqtbl3. */ - clean_results (); -@@ -350,13 +350,13 @@ void exec_vqtbX (void) - - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vqtbl3, ""); - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vqtbl3, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vqtbl3, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vqtbl3, ""); - - #undef TEST_MSG - #define TEST_MSG "VQTBL3Q" - CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_vqtbl3q, ""); - CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_vqtbl3q, ""); -- CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vqtbl3q, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 16, PRIx8, expected_vqtbl3q, ""); - - /* Check vqtbl4. */ - clean_results (); -@@ -366,13 +366,13 @@ void exec_vqtbX (void) - - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vqtbl4, ""); - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vqtbl4, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vqtbl4, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vqtbl4, ""); - - #undef TEST_MSG - #define TEST_MSG "VQTBL4Q" - CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_vqtbl4q, ""); - CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_vqtbl4q, ""); -- CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vqtbl4q, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 16, PRIx8, expected_vqtbl4q, ""); - - - /* Now test VQTBX. */ -@@ -455,13 +455,13 @@ void exec_vqtbX (void) - - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vqtbx1, ""); - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vqtbx1, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vqtbx1, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vqtbx1, ""); - - #undef TEST_MSG - #define TEST_MSG "VQTBX1Q" - CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_vqtbx1q, ""); - CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_vqtbx1q, ""); -- CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vqtbx1q, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 16, PRIx8, expected_vqtbx1q, ""); - - /* Check vqtbx2. */ - clean_results (); -@@ -471,13 +471,13 @@ void exec_vqtbX (void) - - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vqtbx2, ""); - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vqtbx2, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vqtbx2, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vqtbx2, ""); - - #undef TEST_MSG - #define TEST_MSG "VQTBX2Q" - CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_vqtbx2q, ""); - CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_vqtbx2q, ""); -- CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vqtbx2q, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 16, PRIx8, expected_vqtbx2q, ""); - - /* Check vqtbx3. */ - clean_results (); -@@ -487,13 +487,13 @@ void exec_vqtbX (void) - - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vqtbx3, ""); - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vqtbx3, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vqtbx3, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vqtbx3, ""); - - #undef TEST_MSG - #define TEST_MSG "VQTBX3Q" - CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_vqtbx3q, ""); - CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_vqtbx3q, ""); -- CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vqtbx3q, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 16, PRIx8, expected_vqtbx3q, ""); - - /* Check vqtbx4. */ - clean_results (); -@@ -503,13 +503,13 @@ void exec_vqtbX (void) - - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vqtbx4, ""); - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vqtbx4, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vqtbx4, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vqtbx4, ""); - - #undef TEST_MSG - #define TEST_MSG "VQTBX4Q" - CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_vqtbx4q, ""); - CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_vqtbx4q, ""); -- CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vqtbx4q, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 16, PRIx8, expected_vqtbx4q, ""); - } - - int main (void) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpe.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpe.c -@@ -7,6 +7,14 @@ - VECT_VAR_DECL(expected_positive,uint,32,2) [] = { 0xffffffff, 0xffffffff }; - VECT_VAR_DECL(expected_positive,uint,32,4) [] = { 0xbf000000, 0xbf000000, - 0xbf000000, 0xbf000000 }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_positive, hfloat, 16, 4) [] = { 0x3834, 0x3834, -+ 0x3834, 0x3834 }; -+VECT_VAR_DECL(expected_positive, hfloat, 16, 8) [] = { 0x2018, 0x2018, -+ 0x2018, 0x2018, -+ 0x2018, 0x2018, -+ 0x2018, 0x2018 }; -+#endif - VECT_VAR_DECL(expected_positive,hfloat,32,2) [] = { 0x3f068000, 0x3f068000 }; - VECT_VAR_DECL(expected_positive,hfloat,32,4) [] = { 0x3c030000, 0x3c030000, - 0x3c030000, 0x3c030000 }; -@@ -15,24 +23,56 @@ VECT_VAR_DECL(expected_positive,hfloat,32,4) [] = { 0x3c030000, 0x3c030000, - VECT_VAR_DECL(expected_negative,uint,32,2) [] = { 0x80000000, 0x80000000 }; - VECT_VAR_DECL(expected_negative,uint,32,4) [] = { 0xee800000, 0xee800000, - 0xee800000, 0xee800000 }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_negative, hfloat, 16, 4) [] = { 0xae64, 0xae64, -+ 0xae64, 0xae64 }; -+VECT_VAR_DECL(expected_negative, hfloat, 16, 8) [] = { 0xa018, 0xa018, -+ 0xa018, 0xa018, -+ 0xa018, 0xa018, -+ 0xa018, 0xa018 }; -+#endif - VECT_VAR_DECL(expected_negative,hfloat,32,2) [] = { 0xbdcc8000, 0xbdcc8000 }; - VECT_VAR_DECL(expected_negative,hfloat,32,4) [] = { 0xbc030000, 0xbc030000, - 0xbc030000, 0xbc030000 }; - - /* Expected results with FP special values (NaN, infinity). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_fp1, hfloat, 16, 4) [] = { 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00 }; -+VECT_VAR_DECL(expected_fp1, hfloat, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+#endif - VECT_VAR_DECL(expected_fp1,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; - VECT_VAR_DECL(expected_fp1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; - - /* Expected results with FP special values (zero, large value). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_fp2, hfloat, 16, 4) [] = { 0x7c00, 0x7c00, -+ 0x7c00, 0x7c00 }; -+VECT_VAR_DECL(expected_fp2, hfloat, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; -+#endif - VECT_VAR_DECL(expected_fp2,hfloat,32,2) [] = { 0x7f800000, 0x7f800000 }; - VECT_VAR_DECL(expected_fp2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; - - /* Expected results with FP special values (-0, -infinity). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_fp3, hfloat, 16, 4) [] = { 0xfc00, 0xfc00, -+ 0xfc00, 0xfc00}; -+VECT_VAR_DECL(expected_fp3, hfloat, 16, 8) [] = { 0x8000, 0x8000, -+ 0x8000, 0x8000, -+ 0x8000, 0x8000, -+ 0x8000, 0x8000 }; -+#endif - VECT_VAR_DECL(expected_fp3,hfloat,32,2) [] = { 0xff800000, 0xff800000 }; - VECT_VAR_DECL(expected_fp3,hfloat,32,4) [] = { 0x80000000, 0x80000000, - 0x80000000, 0x80000000 }; - - /* Expected results with FP special large negative value. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_fp4, hfloat, 16, 4) [] = { 0x8000, 0x8000, -+ 0x8000, 0x8000 }; -+#endif - VECT_VAR_DECL(expected_fp4,hfloat,32,2) [] = { 0x80000000, 0x80000000 }; - - #define TEST_MSG "VRECPE/VRECPEQ" -@@ -50,11 +90,19 @@ void exec_vrecpe(void) - /* No need for 64 bits variants. */ - DECL_VARIABLE(vector, uint, 32, 2); - DECL_VARIABLE(vector, uint, 32, 4); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector, float, 16, 4); -+ DECL_VARIABLE(vector, float, 16, 8); -+#endif - DECL_VARIABLE(vector, float, 32, 2); - DECL_VARIABLE(vector, float, 32, 4); - - DECL_VARIABLE(vector_res, uint, 32, 2); - DECL_VARIABLE(vector_res, uint, 32, 4); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector_res, float, 16, 4); -+ DECL_VARIABLE(vector_res, float, 16, 8); -+#endif - DECL_VARIABLE(vector_res, float, 32, 2); - DECL_VARIABLE(vector_res, float, 32, 4); - -@@ -62,88 +110,165 @@ void exec_vrecpe(void) - - /* Choose init value arbitrarily, positive. */ - VDUP(vector, , uint, u, 32, 2, 0x12345678); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, , float, f, 16, 4, 1.9f); -+#endif - VDUP(vector, , float, f, 32, 2, 1.9f); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, q, float, f, 16, 8, 125.0f); -+#endif - VDUP(vector, q, uint, u, 32, 4, 0xABCDEF10); - VDUP(vector, q, float, f, 32, 4, 125.0f); - - /* Apply the operator. */ - TEST_VRECPE(, uint, u, 32, 2); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VRECPE(, float, f, 16, 4); -+#endif - TEST_VRECPE(, float, f, 32, 2); - TEST_VRECPE(q, uint, u, 32, 4); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VRECPE(q, float, f, 16, 8); -+#endif - TEST_VRECPE(q, float, f, 32, 4); - - #define CMT " (positive input)" - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_positive, CMT); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_positive, CMT); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_positive, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_positive, CMT); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_positive, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_positive, CMT); - - /* Choose init value arbitrarily,negative. */ - VDUP(vector, , uint, u, 32, 2, 0xFFFFFFFF); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, , float, f, 16, 4, -10.0f); -+#endif - VDUP(vector, , float, f, 32, 2, -10.0f); - VDUP(vector, q, uint, u, 32, 4, 0x89081234); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, q, float, f, 16, 8, -125.0f); -+#endif - VDUP(vector, q, float, f, 32, 4, -125.0f); - - /* Apply the operator. */ - TEST_VRECPE(, uint, u, 32, 2); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VRECPE(, float, f, 16, 4); -+#endif - TEST_VRECPE(, float, f, 32, 2); - TEST_VRECPE(q, uint, u, 32, 4); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VRECPE(q, float, f, 16, 8); -+#endif - TEST_VRECPE(q, float, f, 32, 4); - - #undef CMT - #define CMT " (negative input)" - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_negative, CMT); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_negative, CMT); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_negative, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_negative, CMT); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_negative, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_negative, CMT); - - /* Test FP variants with special input values (NaN, infinity). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, , float, f, 16, 4, NAN); -+ VDUP(vector, q, float, f, 16, 8, HUGE_VALF); -+#endif - VDUP(vector, , float, f, 32, 2, NAN); - VDUP(vector, q, float, f, 32, 4, HUGE_VALF); - - /* Apply the operator. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VRECPE(, float, f, 16, 4); -+ TEST_VRECPE(q, float, f, 16, 8); -+#endif - TEST_VRECPE(, float, f, 32, 2); - TEST_VRECPE(q, float, f, 32, 4); - - #undef CMT - #define CMT " FP special (NaN, infinity)" -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp1, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp1, CMT); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp1, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp1, CMT); - - /* Test FP variants with special input values (zero, large value). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, , float, f, 16, 4, 0.0f); -+ VDUP(vector, q, float, f, 16, 8, 8.97229e37f /*9.0e37f*/); -+#endif - VDUP(vector, , float, f, 32, 2, 0.0f); - VDUP(vector, q, float, f, 32, 4, 8.97229e37f /*9.0e37f*/); - - /* Apply the operator. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VRECPE(, float, f, 16, 4); -+ TEST_VRECPE(q, float, f, 16, 8); -+#endif - TEST_VRECPE(, float, f, 32, 2); - TEST_VRECPE(q, float, f, 32, 4); - - #undef CMT - #define CMT " FP special (zero, large value)" -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp2, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp2, CMT); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp2, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp2, CMT); - - /* Test FP variants with special input values (-0, -infinity). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, , float, f, 16, 4, -0.0f); -+ VDUP(vector, q, float, f, 16, 8, -HUGE_VALF); -+#endif - VDUP(vector, , float, f, 32, 2, -0.0f); - VDUP(vector, q, float, f, 32, 4, -HUGE_VALF); - - /* Apply the operator. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VRECPE(, float, f, 16, 4); -+ TEST_VRECPE(q, float, f, 16, 8); -+#endif - TEST_VRECPE(, float, f, 32, 2); - TEST_VRECPE(q, float, f, 32, 4); - - #undef CMT - #define CMT " FP special (-0, -infinity)" -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp3, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp3, CMT); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp3, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp3, CMT); - - /* Test FP variants with special input values (large negative value). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, , float, f, 16, 4, -9.0e37f); -+#endif - VDUP(vector, , float, f, 32, 2, -9.0e37f); - - /* Apply the operator. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VRECPE(, float, f, 16, 4); -+#endif - TEST_VRECPE(, float, f, 32, 2); - - #undef CMT - #define CMT " FP special (large negative value)" -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp4, CMT); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp4, CMT); - } - ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpeh_f16_1.c -@@ -0,0 +1,42 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+#define A 123.4 -+#define B 567.8 -+#define C 34.8 -+#define D 1024 -+#define E 663.1 -+#define F 144.0 -+#define G 4.8 -+#define H 77 -+ -+#define RECP_A 0x2028 /* 1/A. */ -+#define RECP_B 0x1734 /* 1/B. */ -+#define RECP_C 0x275C /* 1/C. */ -+#define RECP_D 0x13FC /* 1/D. */ -+#define RECP_E 0x162C /* 1/E. */ -+#define RECP_F 0x1F18 /* 1/F. */ -+#define RECP_G 0x32A8 /* 1/G. */ -+#define RECP_H 0x22A4 /* 1/H. */ -+ -+float16_t input[] = { A, B, C, D, E, F, G, H }; -+uint16_t expected[] = { RECP_A, RECP_B, RECP_C, RECP_D, -+ RECP_E, RECP_F, RECP_G, RECP_H }; -+ -+#define TEST_MSG "VRECPEH_F16" -+#define INSN_NAME vrecpeh_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecps.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecps.c -@@ -4,22 +4,51 @@ - #include <math.h> - - /* Expected results with positive input. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xd70c, 0xd70c, 0xd70c, 0xd70c }; -+VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0xcedc, 0xcedc, 0xcedc, 0xcedc, -+ 0xcedc, 0xcedc, 0xcedc, 0xcedc }; -+#endif - VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc2e19eb7, 0xc2e19eb7 }; - VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1db851f, 0xc1db851f, - 0xc1db851f, 0xc1db851f }; - - /* Expected results with FP special values (NaN). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_fp1, hfloat, 16, 4) [] = { 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00 }; -+VECT_VAR_DECL(expected_fp1, hfloat, 16, 8) [] = { 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00 }; -+#endif - VECT_VAR_DECL(expected_fp1,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; - VECT_VAR_DECL(expected_fp1,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, - 0x7fc00000, 0x7fc00000 }; - - /* Expected results with FP special values (infinity, 0) and normal - values. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_fp2, hfloat, 16, 4) [] = { 0xfc00, 0xfc00, -+ 0xfc00, 0xfc00 }; -+VECT_VAR_DECL(expected_fp2, hfloat, 16, 8) [] = { 0x4000, 0x4000, -+ 0x4000, 0x4000, -+ 0x4000, 0x4000, -+ 0x4000, 0x4000 }; -+#endif - VECT_VAR_DECL(expected_fp2,hfloat,32,2) [] = { 0xff800000, 0xff800000 }; - VECT_VAR_DECL(expected_fp2,hfloat,32,4) [] = { 0x40000000, 0x40000000, - 0x40000000, 0x40000000 }; - - /* Expected results with FP special values (infinity, 0). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_fp3, hfloat, 16, 4) [] = { 0x4000, 0x4000, -+ 0x4000, 0x4000 }; -+VECT_VAR_DECL(expected_fp3, hfloat, 16, 8) [] = { 0x4000, 0x4000, -+ 0x4000, 0x4000, -+ 0x4000, 0x4000, -+ 0x4000, 0x4000 }; -+#endif - VECT_VAR_DECL(expected_fp3,hfloat,32,2) [] = { 0x40000000, 0x40000000 }; - VECT_VAR_DECL(expected_fp3,hfloat,32,4) [] = { 0x40000000, 0x40000000, - 0x40000000, 0x40000000 }; -@@ -38,74 +67,143 @@ void exec_vrecps(void) - VECT_VAR(vector_res, T1, W, N)) - - /* No need for integer variants. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector, float, 16, 4); -+ DECL_VARIABLE(vector, float, 16, 8); -+#endif - DECL_VARIABLE(vector, float, 32, 2); - DECL_VARIABLE(vector, float, 32, 4); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector2, float, 16, 4); -+ DECL_VARIABLE(vector2, float, 16, 8); -+#endif - DECL_VARIABLE(vector2, float, 32, 2); - DECL_VARIABLE(vector2, float, 32, 4); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector_res, float, 16, 4); -+ DECL_VARIABLE(vector_res, float, 16, 8); -+#endif - DECL_VARIABLE(vector_res, float, 32, 2); - DECL_VARIABLE(vector_res, float, 32, 4); - - clean_results (); - - /* Choose init value arbitrarily. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, , float, f, 16, 4, 12.9f); -+ VDUP(vector, q, float, f, 16, 8, 9.2f); -+#endif - VDUP(vector, , float, f, 32, 2, 12.9f); - VDUP(vector, q, float, f, 32, 4, 9.2f); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector2, , float, f, 16, 4, 8.9f); -+ VDUP(vector2, q, float, f, 16, 8, 3.2f); -+#endif - VDUP(vector2, , float, f, 32, 2, 8.9f); - VDUP(vector2, q, float, f, 32, 4, 3.2f); - - /* Apply the operator. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VRECPS(, float, f, 16, 4); -+ TEST_VRECPS(q, float, f, 16, 8); -+#endif - TEST_VRECPS(, float, f, 32, 2); - TEST_VRECPS(q, float, f, 32, 4); - - #define CMT " (positive input)" -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, CMT); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, CMT); - - - /* Test FP variants with special input values (NaN). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, , float, f, 16, 4, NAN); -+ VDUP(vector2, q, float, f, 16, 8, NAN); -+#endif - VDUP(vector, , float, f, 32, 2, NAN); - VDUP(vector2, q, float, f, 32, 4, NAN); - - /* Apply the operator. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VRECPS(, float, f, 16, 4); -+ TEST_VRECPS(q, float, f, 16, 8); -+#endif - TEST_VRECPS(, float, f, 32, 2); - TEST_VRECPS(q, float, f, 32, 4); - - #undef CMT - #define CMT " FP special (NaN)" -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp1, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp1, CMT); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp1, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp1, CMT); - - - /* Test FP variants with special input values (infinity, 0). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, , float, f, 16, 4, HUGE_VALF); -+ VDUP(vector, q, float, f, 16, 8, 0.0f); -+ VDUP(vector2, q, float, f, 16, 8, 3.2f); /* Restore a normal value. */ -+#endif - VDUP(vector, , float, f, 32, 2, HUGE_VALF); - VDUP(vector, q, float, f, 32, 4, 0.0f); - VDUP(vector2, q, float, f, 32, 4, 3.2f); /* Restore a normal value. */ - -+ - /* Apply the operator. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VRECPS(, float, f, 16, 4); -+ TEST_VRECPS(q, float, f, 16, 8); -+#endif - TEST_VRECPS(, float, f, 32, 2); - TEST_VRECPS(q, float, f, 32, 4); - - #undef CMT - #define CMT " FP special (infinity, 0) and normal value" -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp2, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp2, CMT); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp2, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp2, CMT); - - - /* Test FP variants with only special input values (infinity, 0). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, , float, f, 16, 4, HUGE_VALF); -+ VDUP(vector, q, float, f, 16, 8, 0.0f); -+ VDUP(vector2, , float, f, 16, 4, 0.0f); -+ VDUP(vector2, q, float, f, 16, 8, HUGE_VALF); -+#endif - VDUP(vector, , float, f, 32, 2, HUGE_VALF); - VDUP(vector, q, float, f, 32, 4, 0.0f); - VDUP(vector2, , float, f, 32, 2, 0.0f); - VDUP(vector2, q, float, f, 32, 4, HUGE_VALF); - -+ - /* Apply the operator */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VRECPS(, float, f, 16, 4); -+ TEST_VRECPS(q, float, f, 16, 8); -+#endif - TEST_VRECPS(, float, f, 32, 2); - TEST_VRECPS(q, float, f, 32, 4); - - #undef CMT - #define CMT " FP special (infinity, 0)" -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp3, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp3, CMT); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp3, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp3, CMT); - } ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpsh_f16_1.c -@@ -0,0 +1,50 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+#define A 12.4 -+#define B -5.8 -+#define C -3.8 -+#define D 10 -+#define E 66.1 -+#define F 16.1 -+#define G -4.8 -+#define H -77 -+ -+#define I 0.7 -+#define J -78 -+#define K 10.23 -+#define L 98 -+#define M 87 -+#define N -87.81 -+#define O -1.1 -+#define P 47.8 -+ -+float16_t input_1[] = { A, B, C, D, I, J, K, L }; -+float16_t input_2[] = { E, F, G, H, M, N, O, P }; -+uint16_t expected[] = { 0xE264 /* 2.0f - A * E. */, -+ 0x55F6 /* 2.0f - B * F. */, -+ 0xCC10 /* 2.0f - C * G. */, -+ 0x6208 /* 2.0f - D * H. */, -+ 0xD35D /* 2.0f - I * M. */, -+ 0xEEB0 /* 2.0f - J * N. */, -+ 0x4A9F /* 2.0f - K * O. */, -+ 0xEC93 /* 2.0f - L * P. */ }; -+ -+#define TEST_MSG "VRECPSH_F16" -+#define INSN_NAME vrecpsh_f16 -+ -+#define INPUT_1 input_1 -+#define INPUT_2 input_2 -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "binary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrecpxh_f16_1.c -@@ -0,0 +1,32 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+ -+float16_t input[] = { 123.4, 567.8, 34.8, 1024, 663.1, 144.0, 4.8, 77 }; -+/* Expected results are calculated by: -+ for (index = 0; index < 8; index++) -+ { -+ uint16_t src_cast = * (uint16_t *) &src[index]; -+ * (uint16_t *) &expected[index] = -+ (src_cast & 0x8000) | (~src_cast & 0x7C00); -+ } */ -+uint16_t expected[8] = { 0x2800, 0x1C00, 0x2C00, 0x1800, -+ 0x1C00, 0x2400, 0x3800, 0x2800 }; -+ -+#define TEST_MSG "VRECPXH_F16" -+#define INSN_NAME vrecpxh_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret.c -@@ -21,6 +21,8 @@ VECT_VAR_DECL(expected_s8_8,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, - 0xf4, 0xf5, 0xf6, 0xf7 }; - VECT_VAR_DECL(expected_s8_9,int,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, - 0xf2, 0xff, 0xf3, 0xff }; -+VECT_VAR_DECL(expected_s8_10,int,8,8) [] = { 0x00, 0xcc, 0x80, 0xcb, -+ 0x00, 0xcb, 0x80, 0xca }; - - /* Expected results for vreinterpret_s16_xx. */ - VECT_VAR_DECL(expected_s16_1,int,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; -@@ -32,6 +34,7 @@ VECT_VAR_DECL(expected_s16_6,int,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; - VECT_VAR_DECL(expected_s16_7,int,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; - VECT_VAR_DECL(expected_s16_8,int,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; - VECT_VAR_DECL(expected_s16_9,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; -+VECT_VAR_DECL(expected_s16_10,int,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; - - /* Expected results for vreinterpret_s32_xx. */ - VECT_VAR_DECL(expected_s32_1,int,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; -@@ -43,6 +46,7 @@ VECT_VAR_DECL(expected_s32_6,int,32,2) [] = { 0xfffffff0, 0xfffffff1 }; - VECT_VAR_DECL(expected_s32_7,int,32,2) [] = { 0xfffffff0, 0xffffffff }; - VECT_VAR_DECL(expected_s32_8,int,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; - VECT_VAR_DECL(expected_s32_9,int,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; -+VECT_VAR_DECL(expected_s32_10,int,32,2) [] = { 0xcb80cc00, 0xca80cb00 }; - - /* Expected results for vreinterpret_s64_xx. */ - VECT_VAR_DECL(expected_s64_1,int,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; -@@ -54,6 +58,7 @@ VECT_VAR_DECL(expected_s64_6,int,64,1) [] = { 0xfffffff1fffffff0 }; - VECT_VAR_DECL(expected_s64_7,int,64,1) [] = { 0xfffffffffffffff0 }; - VECT_VAR_DECL(expected_s64_8,int,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; - VECT_VAR_DECL(expected_s64_9,int,64,1) [] = { 0xfff3fff2fff1fff0 }; -+VECT_VAR_DECL(expected_s64_10,int,64,1) [] = { 0xca80cb00cb80cc00 }; - - /* Expected results for vreinterpret_u8_xx. */ - VECT_VAR_DECL(expected_u8_1,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, -@@ -74,6 +79,8 @@ VECT_VAR_DECL(expected_u8_8,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, - 0xf4, 0xf5, 0xf6, 0xf7 }; - VECT_VAR_DECL(expected_u8_9,uint,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, - 0xf2, 0xff, 0xf3, 0xff }; -+VECT_VAR_DECL(expected_u8_10,uint,8,8) [] = { 0x00, 0xcc, 0x80, 0xcb, -+ 0x00, 0xcb, 0x80, 0xca }; - - /* Expected results for vreinterpret_u16_xx. */ - VECT_VAR_DECL(expected_u16_1,uint,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; -@@ -85,6 +92,7 @@ VECT_VAR_DECL(expected_u16_6,uint,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; - VECT_VAR_DECL(expected_u16_7,uint,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; - VECT_VAR_DECL(expected_u16_8,uint,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; - VECT_VAR_DECL(expected_u16_9,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; -+VECT_VAR_DECL(expected_u16_10,uint,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; - - /* Expected results for vreinterpret_u32_xx. */ - VECT_VAR_DECL(expected_u32_1,uint,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; -@@ -96,6 +104,7 @@ VECT_VAR_DECL(expected_u32_6,uint,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; - VECT_VAR_DECL(expected_u32_7,uint,32,2) [] = { 0xfffffff0, 0xffffffff }; - VECT_VAR_DECL(expected_u32_8,uint,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; - VECT_VAR_DECL(expected_u32_9,uint,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; -+VECT_VAR_DECL(expected_u32_10,uint,32,2) [] = { 0xcb80cc00, 0xca80cb00 }; - - /* Expected results for vreinterpret_u64_xx. */ - VECT_VAR_DECL(expected_u64_1,uint,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; -@@ -107,6 +116,7 @@ VECT_VAR_DECL(expected_u64_6,uint,64,1) [] = { 0xfff3fff2fff1fff0 }; - VECT_VAR_DECL(expected_u64_7,uint,64,1) [] = { 0xfffffff1fffffff0 }; - VECT_VAR_DECL(expected_u64_8,uint,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; - VECT_VAR_DECL(expected_u64_9,uint,64,1) [] = { 0xfff3fff2fff1fff0 }; -+VECT_VAR_DECL(expected_u64_10,uint,64,1) [] = { 0xca80cb00cb80cc00 }; - - /* Expected results for vreinterpret_p8_xx. */ - VECT_VAR_DECL(expected_p8_1,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, -@@ -127,6 +137,8 @@ VECT_VAR_DECL(expected_p8_8,poly,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff }; - VECT_VAR_DECL(expected_p8_9,poly,8,8) [] = { 0xf0, 0xff, 0xf1, 0xff, - 0xf2, 0xff, 0xf3, 0xff }; -+VECT_VAR_DECL(expected_p8_10,poly,8,8) [] = { 0x00, 0xcc, 0x80, 0xcb, -+ 0x00, 0xcb, 0x80, 0xca }; - - /* Expected results for vreinterpret_p16_xx. */ - VECT_VAR_DECL(expected_p16_1,poly,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; -@@ -138,6 +150,7 @@ VECT_VAR_DECL(expected_p16_6,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; - VECT_VAR_DECL(expected_p16_7,poly,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; - VECT_VAR_DECL(expected_p16_8,poly,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; - VECT_VAR_DECL(expected_p16_9,poly,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; -+VECT_VAR_DECL(expected_p16_10,poly,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; - - /* Expected results for vreinterpretq_s8_xx. */ - VECT_VAR_DECL(expected_q_s8_1,int,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, -@@ -176,6 +189,10 @@ VECT_VAR_DECL(expected_q_s8_9,int,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, - 0xf2, 0xff, 0xf3, 0xff, - 0xf4, 0xff, 0xf5, 0xff, - 0xf6, 0xff, 0xf7, 0xff }; -+VECT_VAR_DECL(expected_q_s8_10,int,8,16) [] = { 0x00, 0xcc, 0x80, 0xcb, -+ 0x00, 0xcb, 0x80, 0xca, -+ 0x00, 0xca, 0x80, 0xc9, -+ 0x00, 0xc9, 0x80, 0xc8 }; - - /* Expected results for vreinterpretq_s16_xx. */ - VECT_VAR_DECL(expected_q_s16_1,int,16,8) [] = { 0xf1f0, 0xf3f2, -@@ -214,6 +231,10 @@ VECT_VAR_DECL(expected_q_s16_9,int,16,8) [] = { 0xfff0, 0xfff1, - 0xfff2, 0xfff3, - 0xfff4, 0xfff5, - 0xfff6, 0xfff7 }; -+VECT_VAR_DECL(expected_q_s16_10,int,16,8) [] = { 0xcc00, 0xcb80, -+ 0xcb00, 0xca80, -+ 0xca00, 0xc980, -+ 0xc900, 0xc880 }; - - /* Expected results for vreinterpretq_s32_xx. */ - VECT_VAR_DECL(expected_q_s32_1,int,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, -@@ -234,6 +255,8 @@ VECT_VAR_DECL(expected_q_s32_8,int,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, - 0xfbfaf9f8, 0xfffefdfc }; - VECT_VAR_DECL(expected_q_s32_9,int,32,4) [] = { 0xfff1fff0, 0xfff3fff2, - 0xfff5fff4, 0xfff7fff6 }; -+VECT_VAR_DECL(expected_q_s32_10,int,32,4) [] = { 0xcb80cc00, 0xca80cb00, -+ 0xc980ca00, 0xc880c900 }; - - /* Expected results for vreinterpretq_s64_xx. */ - VECT_VAR_DECL(expected_q_s64_1,int,64,2) [] = { 0xf7f6f5f4f3f2f1f0, -@@ -254,6 +277,8 @@ VECT_VAR_DECL(expected_q_s64_8,int,64,2) [] = { 0xf7f6f5f4f3f2f1f0, - 0xfffefdfcfbfaf9f8 }; - VECT_VAR_DECL(expected_q_s64_9,int,64,2) [] = { 0xfff3fff2fff1fff0, - 0xfff7fff6fff5fff4 }; -+VECT_VAR_DECL(expected_q_s64_10,int,64,2) [] = { 0xca80cb00cb80cc00, -+ 0xc880c900c980ca00 }; - - /* Expected results for vreinterpretq_u8_xx. */ - VECT_VAR_DECL(expected_q_u8_1,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, -@@ -292,6 +317,10 @@ VECT_VAR_DECL(expected_q_u8_9,uint,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, - 0xf2, 0xff, 0xf3, 0xff, - 0xf4, 0xff, 0xf5, 0xff, - 0xf6, 0xff, 0xf7, 0xff }; -+VECT_VAR_DECL(expected_q_u8_10,uint,8,16) [] = { 0x00, 0xcc, 0x80, 0xcb, -+ 0x00, 0xcb, 0x80, 0xca, -+ 0x00, 0xca, 0x80, 0xc9, -+ 0x00, 0xc9, 0x80, 0xc8 }; - - /* Expected results for vreinterpretq_u16_xx. */ - VECT_VAR_DECL(expected_q_u16_1,uint,16,8) [] = { 0xf1f0, 0xf3f2, -@@ -330,6 +359,10 @@ VECT_VAR_DECL(expected_q_u16_9,uint,16,8) [] = { 0xfff0, 0xfff1, - 0xfff2, 0xfff3, - 0xfff4, 0xfff5, - 0xfff6, 0xfff7 }; -+VECT_VAR_DECL(expected_q_u16_10,uint,16,8) [] = { 0xcc00, 0xcb80, -+ 0xcb00, 0xca80, -+ 0xca00, 0xc980, -+ 0xc900, 0xc880 }; - - /* Expected results for vreinterpretq_u32_xx. */ - VECT_VAR_DECL(expected_q_u32_1,uint,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, -@@ -350,6 +383,8 @@ VECT_VAR_DECL(expected_q_u32_8,uint,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, - 0xfbfaf9f8, 0xfffefdfc }; - VECT_VAR_DECL(expected_q_u32_9,uint,32,4) [] = { 0xfff1fff0, 0xfff3fff2, - 0xfff5fff4, 0xfff7fff6 }; -+VECT_VAR_DECL(expected_q_u32_10,uint,32,4) [] = { 0xcb80cc00, 0xca80cb00, -+ 0xc980ca00, 0xc880c900 }; - - /* Expected results for vreinterpretq_u64_xx. */ - VECT_VAR_DECL(expected_q_u64_1,uint,64,2) [] = { 0xf7f6f5f4f3f2f1f0, -@@ -370,6 +405,92 @@ VECT_VAR_DECL(expected_q_u64_8,uint,64,2) [] = { 0xf7f6f5f4f3f2f1f0, - 0xfffefdfcfbfaf9f8 }; - VECT_VAR_DECL(expected_q_u64_9,uint,64,2) [] = { 0xfff3fff2fff1fff0, - 0xfff7fff6fff5fff4 }; -+VECT_VAR_DECL(expected_q_u64_10,uint,64,2) [] = { 0xca80cb00cb80cc00, -+ 0xc880c900c980ca00 }; -+ -+/* Expected results for vreinterpretq_p8_xx. */ -+VECT_VAR_DECL(expected_q_p8_1,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, -+ 0xf4, 0xf5, 0xf6, 0xf7, -+ 0xf8, 0xf9, 0xfa, 0xfb, -+ 0xfc, 0xfd, 0xfe, 0xff }; -+VECT_VAR_DECL(expected_q_p8_2,poly,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, -+ 0xf2, 0xff, 0xf3, 0xff, -+ 0xf4, 0xff, 0xf5, 0xff, -+ 0xf6, 0xff, 0xf7, 0xff }; -+VECT_VAR_DECL(expected_q_p8_3,poly,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, -+ 0xf1, 0xff, 0xff, 0xff, -+ 0xf2, 0xff, 0xff, 0xff, -+ 0xf3, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_q_p8_4,poly,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xf1, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_q_p8_5,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, -+ 0xf4, 0xf5, 0xf6, 0xf7, -+ 0xf8, 0xf9, 0xfa, 0xfb, -+ 0xfc, 0xfd, 0xfe, 0xff }; -+VECT_VAR_DECL(expected_q_p8_6,poly,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, -+ 0xf2, 0xff, 0xf3, 0xff, -+ 0xf4, 0xff, 0xf5, 0xff, -+ 0xf6, 0xff, 0xf7, 0xff }; -+VECT_VAR_DECL(expected_q_p8_7,poly,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, -+ 0xf1, 0xff, 0xff, 0xff, -+ 0xf2, 0xff, 0xff, 0xff, -+ 0xf3, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_q_p8_8,poly,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xf1, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_q_p8_9,poly,8,16) [] = { 0xf0, 0xff, 0xf1, 0xff, -+ 0xf2, 0xff, 0xf3, 0xff, -+ 0xf4, 0xff, 0xf5, 0xff, -+ 0xf6, 0xff, 0xf7, 0xff }; -+VECT_VAR_DECL(expected_q_p8_10,poly,8,16) [] = { 0x00, 0xcc, 0x80, 0xcb, -+ 0x00, 0xcb, 0x80, 0xca, -+ 0x00, 0xca, 0x80, 0xc9, -+ 0x00, 0xc9, 0x80, 0xc8 }; -+ -+/* Expected results for vreinterpretq_p16_xx. */ -+VECT_VAR_DECL(expected_q_p16_1,poly,16,8) [] = { 0xf1f0, 0xf3f2, -+ 0xf5f4, 0xf7f6, -+ 0xf9f8, 0xfbfa, -+ 0xfdfc, 0xfffe }; -+VECT_VAR_DECL(expected_q_p16_2,poly,16,8) [] = { 0xfff0, 0xfff1, -+ 0xfff2, 0xfff3, -+ 0xfff4, 0xfff5, -+ 0xfff6, 0xfff7 }; -+VECT_VAR_DECL(expected_q_p16_3,poly,16,8) [] = { 0xfff0, 0xffff, -+ 0xfff1, 0xffff, -+ 0xfff2, 0xffff, -+ 0xfff3, 0xffff }; -+VECT_VAR_DECL(expected_q_p16_4,poly,16,8) [] = { 0xfff0, 0xffff, -+ 0xffff, 0xffff, -+ 0xfff1, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_q_p16_5,poly,16,8) [] = { 0xf1f0, 0xf3f2, -+ 0xf5f4, 0xf7f6, -+ 0xf9f8, 0xfbfa, -+ 0xfdfc, 0xfffe }; -+VECT_VAR_DECL(expected_q_p16_6,poly,16,8) [] = { 0xfff0, 0xfff1, -+ 0xfff2, 0xfff3, -+ 0xfff4, 0xfff5, -+ 0xfff6, 0xfff7 }; -+VECT_VAR_DECL(expected_q_p16_7,poly,16,8) [] = { 0xfff0, 0xffff, -+ 0xfff1, 0xffff, -+ 0xfff2, 0xffff, -+ 0xfff3, 0xffff }; -+VECT_VAR_DECL(expected_q_p16_8,poly,16,8) [] = { 0xfff0, 0xffff, -+ 0xffff, 0xffff, -+ 0xfff1, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_q_p16_9,poly,16,8) [] = { 0xf1f0, 0xf3f2, -+ 0xf5f4, 0xf7f6, -+ 0xf9f8, 0xfbfa, -+ 0xfdfc, 0xfffe }; -+VECT_VAR_DECL(expected_q_p16_10,poly,16,8) [] = { 0xcc00, 0xcb80, -+ 0xcb00, 0xca80, -+ 0xca00, 0xc980, -+ 0xc900, 0xc880 }; - - /* Expected results for vreinterpret_f32_xx. */ - VECT_VAR_DECL(expected_f32_1,hfloat,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; -@@ -382,6 +503,7 @@ VECT_VAR_DECL(expected_f32_7,hfloat,32,2) [] = { 0xfffffff0, 0xfffffff1 }; - VECT_VAR_DECL(expected_f32_8,hfloat,32,2) [] = { 0xfffffff0, 0xffffffff }; - VECT_VAR_DECL(expected_f32_9,hfloat,32,2) [] = { 0xf3f2f1f0, 0xf7f6f5f4 }; - VECT_VAR_DECL(expected_f32_10,hfloat,32,2) [] = { 0xfff1fff0, 0xfff3fff2 }; -+VECT_VAR_DECL(expected_f32_11,hfloat,32,2) [] = { 0xcb80cc00, 0xca80cb00 }; - - /* Expected results for vreinterpretq_f32_xx. */ - VECT_VAR_DECL(expected_q_f32_1,hfloat,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, -@@ -404,8 +526,10 @@ VECT_VAR_DECL(expected_q_f32_9,hfloat,32,4) [] = { 0xf3f2f1f0, 0xf7f6f5f4, - 0xfbfaf9f8, 0xfffefdfc }; - VECT_VAR_DECL(expected_q_f32_10,hfloat,32,4) [] = { 0xfff1fff0, 0xfff3fff2, - 0xfff5fff4, 0xfff7fff6 }; -+VECT_VAR_DECL(expected_q_f32_11,hfloat,32,4) [] = { 0xcb80cc00, 0xca80cb00, -+ 0xc980ca00, 0xc880c900 }; - --/* Expected results for vreinterpretq_xx_f32. */ -+/* Expected results for vreinterpret_xx_f32. */ - VECT_VAR_DECL(expected_xx_f32_1,int,8,8) [] = { 0x0, 0x0, 0x80, 0xc1, - 0x0, 0x0, 0x70, 0xc1 }; - VECT_VAR_DECL(expected_xx_f32_2,int,16,4) [] = { 0x0, 0xc180, 0x0, 0xc170 }; -@@ -419,6 +543,7 @@ VECT_VAR_DECL(expected_xx_f32_8,uint,64,1) [] = { 0xc1700000c1800000 }; - VECT_VAR_DECL(expected_xx_f32_9,poly,8,8) [] = { 0x0, 0x0, 0x80, 0xc1, - 0x0, 0x0, 0x70, 0xc1 }; - VECT_VAR_DECL(expected_xx_f32_10,poly,16,4) [] = { 0x0, 0xc180, 0x0, 0xc170 }; -+VECT_VAR_DECL(expected_xx_f32_11,hfloat,16,4) [] = { 0x0, 0xc180, 0x0, 0xc170 }; - - /* Expected results for vreinterpretq_xx_f32. */ - VECT_VAR_DECL(expected_q_xx_f32_1,int,8,16) [] = { 0x0, 0x0, 0x80, 0xc1, -@@ -447,6 +572,62 @@ VECT_VAR_DECL(expected_q_xx_f32_9,poly,8,16) [] = { 0x0, 0x0, 0x80, 0xc1, - 0x0, 0x0, 0x50, 0xc1 }; - VECT_VAR_DECL(expected_q_xx_f32_10,poly,16,8) [] = { 0x0, 0xc180, 0x0, 0xc170, - 0x0, 0xc160, 0x0, 0xc150 }; -+VECT_VAR_DECL(expected_q_xx_f32_11,hfloat,16,8) [] = { 0x0, 0xc180, 0x0, 0xc170, -+ 0x0, 0xc160, 0x0, 0xc150 }; -+ -+/* Expected results for vreinterpret_f16_xx. */ -+VECT_VAR_DECL(expected_f16_1,hfloat,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; -+VECT_VAR_DECL(expected_f16_2,hfloat,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; -+VECT_VAR_DECL(expected_f16_3,hfloat,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; -+VECT_VAR_DECL(expected_f16_4,hfloat,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_f16_5,hfloat,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; -+VECT_VAR_DECL(expected_f16_6,hfloat,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; -+VECT_VAR_DECL(expected_f16_7,hfloat,16,4) [] = { 0xfff0, 0xffff, 0xfff1, 0xffff }; -+VECT_VAR_DECL(expected_f16_8,hfloat,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_f16_9,hfloat,16,4) [] = { 0xf1f0, 0xf3f2, 0xf5f4, 0xf7f6 }; -+VECT_VAR_DECL(expected_f16_10,hfloat,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; -+ -+/* Expected results for vreinterpretq_f16_xx. */ -+VECT_VAR_DECL(expected_q_f16_1,hfloat,16,8) [] = { 0xf1f0, 0xf3f2, -+ 0xf5f4, 0xf7f6, -+ 0xf9f8, 0xfbfa, -+ 0xfdfc, 0xfffe }; -+VECT_VAR_DECL(expected_q_f16_2,hfloat,16,8) [] = { 0xfff0, 0xfff1, -+ 0xfff2, 0xfff3, -+ 0xfff4, 0xfff5, -+ 0xfff6, 0xfff7 }; -+VECT_VAR_DECL(expected_q_f16_3,hfloat,16,8) [] = { 0xfff0, 0xffff, -+ 0xfff1, 0xffff, -+ 0xfff2, 0xffff, -+ 0xfff3, 0xffff }; -+VECT_VAR_DECL(expected_q_f16_4,hfloat,16,8) [] = { 0xfff0, 0xffff, -+ 0xffff, 0xffff, -+ 0xfff1, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_q_f16_5,hfloat,16,8) [] = { 0xf1f0, 0xf3f2, -+ 0xf5f4, 0xf7f6, -+ 0xf9f8, 0xfbfa, -+ 0xfdfc, 0xfffe }; -+VECT_VAR_DECL(expected_q_f16_6,hfloat,16,8) [] = { 0xfff0, 0xfff1, -+ 0xfff2, 0xfff3, -+ 0xfff4, 0xfff5, -+ 0xfff6, 0xfff7 }; -+VECT_VAR_DECL(expected_q_f16_7,hfloat,16,8) [] = { 0xfff0, 0xffff, -+ 0xfff1, 0xffff, -+ 0xfff2, 0xffff, -+ 0xfff3, 0xffff }; -+VECT_VAR_DECL(expected_q_f16_8,hfloat,16,8) [] = { 0xfff0, 0xffff, -+ 0xffff, 0xffff, -+ 0xfff1, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL(expected_q_f16_9,hfloat,16,8) [] = { 0xf1f0, 0xf3f2, -+ 0xf5f4, 0xf7f6, -+ 0xf9f8, 0xfbfa, -+ 0xfdfc, 0xfffe }; -+VECT_VAR_DECL(expected_q_f16_10,hfloat,16,8) [] = { 0xfff0, 0xfff1, -+ 0xfff2, 0xfff3, -+ 0xfff4, 0xfff5, -+ 0xfff6, 0xfff7 }; - - #define TEST_MSG "VREINTERPRET/VREINTERPRETQ" - -@@ -484,6 +665,10 @@ void exec_vreinterpret (void) - - /* Initialize input "vector" from "buffer". */ - TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ VLOAD(vector, buffer, , float, f, 16, 4); -+ VLOAD(vector, buffer, q, float, f, 16, 8); -+#endif - VLOAD(vector, buffer, , float, f, 32, 2); - VLOAD(vector, buffer, q, float, f, 32, 4); - -@@ -497,6 +682,9 @@ void exec_vreinterpret (void) - TEST_VREINTERPRET(, int, s, 8, 8, uint, u, 64, 1, expected_s8_7); - TEST_VREINTERPRET(, int, s, 8, 8, poly, p, 8, 8, expected_s8_8); - TEST_VREINTERPRET(, int, s, 8, 8, poly, p, 16, 4, expected_s8_9); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET(, int, s, 8, 8, float, f, 16, 4, expected_s8_10); -+#endif - - /* vreinterpret_s16_xx. */ - TEST_VREINTERPRET(, int, s, 16, 4, int, s, 8, 8, expected_s16_1); -@@ -508,6 +696,9 @@ void exec_vreinterpret (void) - TEST_VREINTERPRET(, int, s, 16, 4, uint, u, 64, 1, expected_s16_7); - TEST_VREINTERPRET(, int, s, 16, 4, poly, p, 8, 8, expected_s16_8); - TEST_VREINTERPRET(, int, s, 16, 4, poly, p, 16, 4, expected_s16_9); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET(, int, s, 16, 4, float, f, 16, 4, expected_s16_10); -+#endif - - /* vreinterpret_s32_xx. */ - TEST_VREINTERPRET(, int, s, 32, 2, int, s, 8, 8, expected_s32_1); -@@ -519,6 +710,9 @@ void exec_vreinterpret (void) - TEST_VREINTERPRET(, int, s, 32, 2, uint, u, 64, 1, expected_s32_7); - TEST_VREINTERPRET(, int, s, 32, 2, poly, p, 8, 8, expected_s32_8); - TEST_VREINTERPRET(, int, s, 32, 2, poly, p, 16, 4, expected_s32_9); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET(, int, s, 32, 2, float, f, 16, 4, expected_s32_10); -+#endif - - /* vreinterpret_s64_xx. */ - TEST_VREINTERPRET(, int, s, 64, 1, int, s, 8, 8, expected_s64_1); -@@ -530,6 +724,9 @@ void exec_vreinterpret (void) - TEST_VREINTERPRET(, int, s, 64, 1, uint, u, 64, 1, expected_s64_7); - TEST_VREINTERPRET(, int, s, 64, 1, poly, p, 8, 8, expected_s64_8); - TEST_VREINTERPRET(, int, s, 64, 1, poly, p, 16, 4, expected_s64_9); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET(, int, s, 64, 1, float, f, 16, 4, expected_s64_10); -+#endif - - /* vreinterpret_u8_xx. */ - TEST_VREINTERPRET(, uint, u, 8, 8, int, s, 8, 8, expected_u8_1); -@@ -541,6 +738,9 @@ void exec_vreinterpret (void) - TEST_VREINTERPRET(, uint, u, 8, 8, uint, u, 64, 1, expected_u8_7); - TEST_VREINTERPRET(, uint, u, 8, 8, poly, p, 8, 8, expected_u8_8); - TEST_VREINTERPRET(, uint, u, 8, 8, poly, p, 16, 4, expected_u8_9); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET(, uint, u, 8, 8, float, f, 16, 4, expected_u8_10); -+#endif - - /* vreinterpret_u16_xx. */ - TEST_VREINTERPRET(, uint, u, 16, 4, int, s, 8, 8, expected_u16_1); -@@ -552,6 +752,9 @@ void exec_vreinterpret (void) - TEST_VREINTERPRET(, uint, u, 16, 4, uint, u, 64, 1, expected_u16_7); - TEST_VREINTERPRET(, uint, u, 16, 4, poly, p, 8, 8, expected_u16_8); - TEST_VREINTERPRET(, uint, u, 16, 4, poly, p, 16, 4, expected_u16_9); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET(, uint, u, 16, 4, float, f, 16, 4, expected_u16_10); -+#endif - - /* vreinterpret_u32_xx. */ - TEST_VREINTERPRET(, uint, u, 32, 2, int, s, 8, 8, expected_u32_1); -@@ -563,6 +766,9 @@ void exec_vreinterpret (void) - TEST_VREINTERPRET(, uint, u, 32, 2, uint, u, 64, 1, expected_u32_7); - TEST_VREINTERPRET(, uint, u, 32, 2, poly, p, 8, 8, expected_u32_8); - TEST_VREINTERPRET(, uint, u, 32, 2, poly, p, 16, 4, expected_u32_9); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET(, uint, u, 32, 2, float, f, 16, 4, expected_u32_10); -+#endif - - /* vreinterpret_u64_xx. */ - TEST_VREINTERPRET(, uint, u, 64, 1, int, s, 8, 8, expected_u64_1); -@@ -574,6 +780,9 @@ void exec_vreinterpret (void) - TEST_VREINTERPRET(, uint, u, 64, 1, uint, u, 32, 2, expected_u64_7); - TEST_VREINTERPRET(, uint, u, 64, 1, poly, p, 8, 8, expected_u64_8); - TEST_VREINTERPRET(, uint, u, 64, 1, poly, p, 16, 4, expected_u64_9); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET(, uint, u, 64, 1, float, f, 16, 4, expected_u64_10); -+#endif - - /* vreinterpret_p8_xx. */ - TEST_VREINTERPRET_POLY(, poly, p, 8, 8, int, s, 8, 8, expected_p8_1); -@@ -585,6 +794,9 @@ void exec_vreinterpret (void) - TEST_VREINTERPRET_POLY(, poly, p, 8, 8, uint, u, 32, 2, expected_p8_7); - TEST_VREINTERPRET_POLY(, poly, p, 8, 8, uint, u, 64, 1, expected_p8_8); - TEST_VREINTERPRET_POLY(, poly, p, 8, 8, poly, p, 16, 4, expected_p8_9); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET_POLY(, poly, p, 8, 8, float, f, 16, 4, expected_p8_10); -+#endif - - /* vreinterpret_p16_xx. */ - TEST_VREINTERPRET_POLY(, poly, p, 16, 4, int, s, 8, 8, expected_p16_1); -@@ -596,6 +808,9 @@ void exec_vreinterpret (void) - TEST_VREINTERPRET_POLY(, poly, p, 16, 4, uint, u, 32, 2, expected_p16_7); - TEST_VREINTERPRET_POLY(, poly, p, 16, 4, uint, u, 64, 1, expected_p16_8); - TEST_VREINTERPRET_POLY(, poly, p, 16, 4, poly, p, 8, 8, expected_p16_9); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET_POLY(, poly, p, 16, 4, float, f, 16, 4, expected_p16_10); -+#endif - - /* vreinterpretq_s8_xx. */ - TEST_VREINTERPRET(q, int, s, 8, 16, int, s, 16, 8, expected_q_s8_1); -@@ -607,6 +822,9 @@ void exec_vreinterpret (void) - TEST_VREINTERPRET(q, int, s, 8, 16, uint, u, 64, 2, expected_q_s8_7); - TEST_VREINTERPRET(q, int, s, 8, 16, poly, p, 8, 16, expected_q_s8_8); - TEST_VREINTERPRET(q, int, s, 8, 16, poly, p, 16, 8, expected_q_s8_9); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET(q, int, s, 8, 16, float, f, 16, 8, expected_q_s8_10); -+#endif - - /* vreinterpretq_s16_xx. */ - TEST_VREINTERPRET(q, int, s, 16, 8, int, s, 8, 16, expected_q_s16_1); -@@ -618,6 +836,9 @@ void exec_vreinterpret (void) - TEST_VREINTERPRET(q, int, s, 16, 8, uint, u, 64, 2, expected_q_s16_7); - TEST_VREINTERPRET(q, int, s, 16, 8, poly, p, 8, 16, expected_q_s16_8); - TEST_VREINTERPRET(q, int, s, 16, 8, poly, p, 16, 8, expected_q_s16_9); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET(q, int, s, 16, 8, float, f, 16, 8, expected_q_s16_10); -+#endif - - /* vreinterpretq_s32_xx. */ - TEST_VREINTERPRET(q, int, s, 32, 4, int, s, 8, 16, expected_q_s32_1); -@@ -629,6 +850,9 @@ void exec_vreinterpret (void) - TEST_VREINTERPRET(q, int, s, 32, 4, uint, u, 64, 2, expected_q_s32_7); - TEST_VREINTERPRET(q, int, s, 32, 4, poly, p, 8, 16, expected_q_s32_8); - TEST_VREINTERPRET(q, int, s, 32, 4, poly, p, 16, 8, expected_q_s32_9); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET(q, int, s, 32, 4, float, f, 16, 8, expected_q_s32_10); -+#endif - - /* vreinterpretq_s64_xx. */ - TEST_VREINTERPRET(q, int, s, 64, 2, int, s, 8, 16, expected_q_s64_1); -@@ -640,6 +864,9 @@ void exec_vreinterpret (void) - TEST_VREINTERPRET(q, int, s, 64, 2, uint, u, 64, 2, expected_q_s64_7); - TEST_VREINTERPRET(q, int, s, 64, 2, poly, p, 8, 16, expected_q_s64_8); - TEST_VREINTERPRET(q, int, s, 64, 2, poly, p, 16, 8, expected_q_s64_9); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET(q, int, s, 64, 2, float, f, 16, 8, expected_q_s64_10); -+#endif - - /* vreinterpretq_u8_xx. */ - TEST_VREINTERPRET(q, uint, u, 8, 16, int, s, 8, 16, expected_q_u8_1); -@@ -651,6 +878,9 @@ void exec_vreinterpret (void) - TEST_VREINTERPRET(q, uint, u, 8, 16, uint, u, 64, 2, expected_q_u8_7); - TEST_VREINTERPRET(q, uint, u, 8, 16, poly, p, 8, 16, expected_q_u8_8); - TEST_VREINTERPRET(q, uint, u, 8, 16, poly, p, 16, 8, expected_q_u8_9); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET(q, uint, u, 8, 16, float, f, 16, 8, expected_q_u8_10); -+#endif - - /* vreinterpretq_u16_xx. */ - TEST_VREINTERPRET(q, uint, u, 16, 8, int, s, 8, 16, expected_q_u16_1); -@@ -662,6 +892,9 @@ void exec_vreinterpret (void) - TEST_VREINTERPRET(q, uint, u, 16, 8, uint, u, 64, 2, expected_q_u16_7); - TEST_VREINTERPRET(q, uint, u, 16, 8, poly, p, 8, 16, expected_q_u16_8); - TEST_VREINTERPRET(q, uint, u, 16, 8, poly, p, 16, 8, expected_q_u16_9); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET(q, uint, u, 16, 8, float, f, 16, 8, expected_q_u16_10); -+#endif - - /* vreinterpretq_u32_xx. */ - TEST_VREINTERPRET(q, uint, u, 32, 4, int, s, 8, 16, expected_q_u32_1); -@@ -673,6 +906,9 @@ void exec_vreinterpret (void) - TEST_VREINTERPRET(q, uint, u, 32, 4, uint, u, 64, 2, expected_q_u32_7); - TEST_VREINTERPRET(q, uint, u, 32, 4, poly, p, 8, 16, expected_q_u32_8); - TEST_VREINTERPRET(q, uint, u, 32, 4, poly, p, 16, 8, expected_q_u32_9); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET(q, uint, u, 32, 4, float, f, 16, 8, expected_q_u32_10); -+#endif - - /* vreinterpretq_u64_xx. */ - TEST_VREINTERPRET(q, uint, u, 64, 2, int, s, 8, 16, expected_q_u64_1); -@@ -684,6 +920,37 @@ void exec_vreinterpret (void) - TEST_VREINTERPRET(q, uint, u, 64, 2, uint, u, 32, 4, expected_q_u64_7); - TEST_VREINTERPRET(q, uint, u, 64, 2, poly, p, 8, 16, expected_q_u64_8); - TEST_VREINTERPRET(q, uint, u, 64, 2, poly, p, 16, 8, expected_q_u64_9); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET(q, uint, u, 64, 2, float, f, 16, 8, expected_q_u64_10); -+#endif -+ -+ /* vreinterpretq_p8_xx. */ -+ TEST_VREINTERPRET_POLY(q, poly, p, 8, 16, int, s, 8, 16, expected_q_p8_1); -+ TEST_VREINTERPRET_POLY(q, poly, p, 8, 16, int, s, 16, 8, expected_q_p8_2); -+ TEST_VREINTERPRET_POLY(q, poly, p, 8, 16, int, s, 32, 4, expected_q_p8_3); -+ TEST_VREINTERPRET_POLY(q, poly, p, 8, 16, int, s, 64, 2, expected_q_p8_4); -+ TEST_VREINTERPRET_POLY(q, poly, p, 8, 16, uint, u, 8, 16, expected_q_p8_5); -+ TEST_VREINTERPRET_POLY(q, poly, p, 8, 16, uint, u, 16, 8, expected_q_p8_6); -+ TEST_VREINTERPRET_POLY(q, poly, p, 8, 16, uint, u, 32, 4, expected_q_p8_7); -+ TEST_VREINTERPRET_POLY(q, poly, p, 8, 16, uint, u, 64, 2, expected_q_p8_8); -+ TEST_VREINTERPRET_POLY(q, poly, p, 8, 16, poly, p, 16, 8, expected_q_p8_9); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET_POLY(q, poly, p, 8, 16, float, f, 16, 8, expected_q_p8_10); -+#endif -+ -+ /* vreinterpretq_p16_xx. */ -+ TEST_VREINTERPRET_POLY(q, poly, p, 16, 8, int, s, 8, 16, expected_q_p16_1); -+ TEST_VREINTERPRET_POLY(q, poly, p, 16, 8, int, s, 16, 8, expected_q_p16_2); -+ TEST_VREINTERPRET_POLY(q, poly, p, 16, 8, int, s, 32, 4, expected_q_p16_3); -+ TEST_VREINTERPRET_POLY(q, poly, p, 16, 8, int, s, 64, 2, expected_q_p16_4); -+ TEST_VREINTERPRET_POLY(q, poly, p, 16, 8, uint, u, 8, 16, expected_q_p16_5); -+ TEST_VREINTERPRET_POLY(q, poly, p, 16, 8, uint, u, 16, 8, expected_q_p16_6); -+ TEST_VREINTERPRET_POLY(q, poly, p, 16, 8, uint, u, 32, 4, expected_q_p16_7); -+ TEST_VREINTERPRET_POLY(q, poly, p, 16, 8, uint, u, 64, 2, expected_q_p16_8); -+ TEST_VREINTERPRET_POLY(q, poly, p, 16, 8, poly, p, 8, 16, expected_q_p16_9); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET_POLY(q, poly, p, 16, 8, float, f, 16, 8, expected_q_p16_10); -+#endif - - /* vreinterpret_f32_xx. */ - TEST_VREINTERPRET_FP(, float, f, 32, 2, int, s, 8, 8, expected_f32_1); -@@ -696,6 +963,9 @@ void exec_vreinterpret (void) - TEST_VREINTERPRET_FP(, float, f, 32, 2, uint, u, 64, 1, expected_f32_8); - TEST_VREINTERPRET_FP(, float, f, 32, 2, poly, p, 8, 8, expected_f32_9); - TEST_VREINTERPRET_FP(, float, f, 32, 2, poly, p, 16, 4, expected_f32_10); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET_FP(, float, f, 32, 2, float, f, 16, 4, expected_f32_11); -+#endif - - /* vreinterpretq_f32_xx. */ - TEST_VREINTERPRET_FP(q, float, f, 32, 4, int, s, 8, 16, expected_q_f32_1); -@@ -708,6 +978,9 @@ void exec_vreinterpret (void) - TEST_VREINTERPRET_FP(q, float, f, 32, 4, uint, u, 64, 2, expected_q_f32_8); - TEST_VREINTERPRET_FP(q, float, f, 32, 4, poly, p, 8, 16, expected_q_f32_9); - TEST_VREINTERPRET_FP(q, float, f, 32, 4, poly, p, 16, 8, expected_q_f32_10); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET_FP(q, float, f, 32, 4, float, f, 16, 8, expected_q_f32_11); -+#endif - - /* vreinterpret_xx_f32. */ - TEST_VREINTERPRET(, int, s, 8, 8, float, f, 32, 2, expected_xx_f32_1); -@@ -720,6 +993,9 @@ void exec_vreinterpret (void) - TEST_VREINTERPRET(, uint, u, 64, 1, float, f, 32, 2, expected_xx_f32_8); - TEST_VREINTERPRET_POLY(, poly, p, 8, 8, float, f, 32, 2, expected_xx_f32_9); - TEST_VREINTERPRET_POLY(, poly, p, 16, 4, float, f, 32, 2, expected_xx_f32_10); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET_FP(, float, f, 16, 4, float, f, 32, 2, expected_xx_f32_11); -+#endif - - /* vreinterpretq_xx_f32. */ - TEST_VREINTERPRET(q, int, s, 8, 16, float, f, 32, 4, expected_q_xx_f32_1); -@@ -732,6 +1008,33 @@ void exec_vreinterpret (void) - TEST_VREINTERPRET(q, uint, u, 64, 2, float, f, 32, 4, expected_q_xx_f32_8); - TEST_VREINTERPRET_POLY(q, poly, p, 8, 16, float, f, 32, 4, expected_q_xx_f32_9); - TEST_VREINTERPRET_POLY(q, poly, p, 16, 8, float, f, 32, 4, expected_q_xx_f32_10); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET_FP(q, float, f, 16, 8, float, f, 32, 4, expected_q_xx_f32_11); -+ -+ /* vreinterpret_f16_xx. */ -+ TEST_VREINTERPRET_FP(, float, f, 16, 4, int, s, 8, 8, expected_f16_1); -+ TEST_VREINTERPRET_FP(, float, f, 16, 4, int, s, 16, 4, expected_f16_2); -+ TEST_VREINTERPRET_FP(, float, f, 16, 4, int, s, 32, 2, expected_f16_3); -+ TEST_VREINTERPRET_FP(, float, f, 16, 4, int, s, 64, 1, expected_f16_4); -+ TEST_VREINTERPRET_FP(, float, f, 16, 4, uint, u, 8, 8, expected_f16_5); -+ TEST_VREINTERPRET_FP(, float, f, 16, 4, uint, u, 16, 4, expected_f16_6); -+ TEST_VREINTERPRET_FP(, float, f, 16, 4, uint, u, 32, 2, expected_f16_7); -+ TEST_VREINTERPRET_FP(, float, f, 16, 4, uint, u, 64, 1, expected_f16_8); -+ TEST_VREINTERPRET_FP(, float, f, 16, 4, poly, p, 8, 8, expected_f16_9); -+ TEST_VREINTERPRET_FP(, float, f, 16, 4, poly, p, 16, 4, expected_f16_10); -+ -+ /* vreinterpretq_f16_xx. */ -+ TEST_VREINTERPRET_FP(q, float, f, 16, 8, int, s, 8, 16, expected_q_f16_1); -+ TEST_VREINTERPRET_FP(q, float, f, 16, 8, int, s, 16, 8, expected_q_f16_2); -+ TEST_VREINTERPRET_FP(q, float, f, 16, 8, int, s, 32, 4, expected_q_f16_3); -+ TEST_VREINTERPRET_FP(q, float, f, 16, 8, int, s, 64, 2, expected_q_f16_4); -+ TEST_VREINTERPRET_FP(q, float, f, 16, 8, uint, u, 8, 16, expected_q_f16_5); -+ TEST_VREINTERPRET_FP(q, float, f, 16, 8, uint, u, 16, 8, expected_q_f16_6); -+ TEST_VREINTERPRET_FP(q, float, f, 16, 8, uint, u, 32, 4, expected_q_f16_7); -+ TEST_VREINTERPRET_FP(q, float, f, 16, 8, uint, u, 64, 2, expected_q_f16_8); -+ TEST_VREINTERPRET_FP(q, float, f, 16, 8, poly, p, 8, 16, expected_q_f16_9); -+ TEST_VREINTERPRET_FP(q, float, f, 16, 8, poly, p, 16, 8, expected_q_f16_10); -+#endif - } - - int main (void) ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p128.c -@@ -0,0 +1,165 @@ -+/* This file contains tests for the vreinterpret *p128 intrinsics. */ -+ -+/* { dg-require-effective-target arm_crypto_ok { target { arm*-*-* } } } */ -+/* { dg-add-options arm_crypto } */ -+/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+/* Expected results: vreinterpretq_p128_*. */ -+VECT_VAR_DECL(vreint_expected_q_p128_s8,poly,64,2) [] = { 0xf7f6f5f4f3f2f1f0, -+ 0xfffefdfcfbfaf9f8 }; -+VECT_VAR_DECL(vreint_expected_q_p128_s16,poly,64,2) [] = { 0xfff3fff2fff1fff0, -+ 0xfff7fff6fff5fff4 }; -+VECT_VAR_DECL(vreint_expected_q_p128_s32,poly,64,2) [] = { 0xfffffff1fffffff0, -+ 0xfffffff3fffffff2 }; -+VECT_VAR_DECL(vreint_expected_q_p128_s64,poly,64,2) [] = { 0xfffffffffffffff0, -+ 0xfffffffffffffff1 }; -+VECT_VAR_DECL(vreint_expected_q_p128_u8,poly,64,2) [] = { 0xf7f6f5f4f3f2f1f0, -+ 0xfffefdfcfbfaf9f8 }; -+VECT_VAR_DECL(vreint_expected_q_p128_u16,poly,64,2) [] = { 0xfff3fff2fff1fff0, -+ 0xfff7fff6fff5fff4 }; -+VECT_VAR_DECL(vreint_expected_q_p128_u32,poly,64,2) [] = { 0xfffffff1fffffff0, -+ 0xfffffff3fffffff2 }; -+VECT_VAR_DECL(vreint_expected_q_p128_u64,poly,64,2) [] = { 0xfffffffffffffff0, -+ 0xfffffffffffffff1 }; -+VECT_VAR_DECL(vreint_expected_q_p128_p8,poly,64,2) [] = { 0xf7f6f5f4f3f2f1f0, -+ 0xfffefdfcfbfaf9f8 }; -+VECT_VAR_DECL(vreint_expected_q_p128_p16,poly,64,2) [] = { 0xfff3fff2fff1fff0, -+ 0xfff7fff6fff5fff4 }; -+VECT_VAR_DECL(vreint_expected_q_p128_f32,poly,64,2) [] = { 0xc1700000c1800000, -+ 0xc1500000c1600000 }; -+VECT_VAR_DECL(vreint_expected_q_p128_f16,poly,64,2) [] = { 0xca80cb00cb80cc00, -+ 0xc880c900c980ca00 }; -+ -+/* Expected results: vreinterpretq_*_p128. */ -+VECT_VAR_DECL(vreint_expected_q_s8_p128,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xf1, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(vreint_expected_q_s16_p128,int,16,8) [] = { 0xfff0, 0xffff, -+ 0xffff, 0xffff, -+ 0xfff1, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL(vreint_expected_q_s32_p128,int,32,4) [] = { 0xfffffff0, 0xffffffff, -+ 0xfffffff1, 0xffffffff }; -+VECT_VAR_DECL(vreint_expected_q_s64_p128,int,64,2) [] = { 0xfffffffffffffff0, -+ 0xfffffffffffffff1 }; -+VECT_VAR_DECL(vreint_expected_q_u8_p128,uint,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xf1, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(vreint_expected_q_u16_p128,uint,16,8) [] = { 0xfff0, 0xffff, -+ 0xffff, 0xffff, -+ 0xfff1, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL(vreint_expected_q_u32_p128,uint,32,4) [] = { 0xfffffff0, 0xffffffff, -+ 0xfffffff1, 0xffffffff }; -+VECT_VAR_DECL(vreint_expected_q_u64_p128,uint,64,2) [] = { 0xfffffffffffffff0, -+ 0xfffffffffffffff1 }; -+VECT_VAR_DECL(vreint_expected_q_p8_p128,poly,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xf1, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(vreint_expected_q_p16_p128,poly,16,8) [] = { 0xfff0, 0xffff, -+ 0xffff, 0xffff, -+ 0xfff1, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL(vreint_expected_q_p64_p128,uint,64,2) [] = { 0xfffffffffffffff0, -+ 0xfffffffffffffff1 }; -+VECT_VAR_DECL(vreint_expected_q_f32_p128,hfloat,32,4) [] = { 0xfffffff0, 0xffffffff, -+ 0xfffffff1, 0xffffffff }; -+VECT_VAR_DECL(vreint_expected_q_f16_p128,hfloat,16,8) [] = { 0xfff0, 0xffff, -+ 0xffff, 0xffff, -+ 0xfff1, 0xffff, -+ 0xffff, 0xffff }; -+ -+int main (void) -+{ -+ DECL_VARIABLE_128BITS_VARIANTS(vreint_vector); -+ DECL_VARIABLE_128BITS_VARIANTS(vreint_vector_res); -+ -+ clean_results (); -+ -+ TEST_MACRO_128BITS_VARIANTS_2_5(VLOAD, vreint_vector, buffer); -+ VLOAD(vreint_vector, buffer, q, poly, p, 64, 2); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ VLOAD(vreint_vector, buffer, q, float, f, 16, 8); -+#endif -+ VLOAD(vreint_vector, buffer, q, float, f, 32, 4); -+ -+ /* vreinterpretq_p128_* tests. */ -+#undef TEST_MSG -+#define TEST_MSG "VREINTERPRETQ_P128_*" -+ -+ /* Since there is no way to store a poly128_t value, convert to -+ poly64x2_t before storing. This means that we are not able to -+ test vreinterpretq_p128* alone, and that errors in -+ vreinterpretq_p64_p128 could compensate for errors in -+ vreinterpretq_p128*. */ -+#define TEST_VREINTERPRET128(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \ -+ VECT_VAR(vreint_vector_res, poly, 64, 2) = vreinterpretq_p64_p128( \ -+ vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vreint_vector, TS1, WS, NS))); \ -+ vst1##Q##_##T2##64(VECT_VAR(result, poly, 64, 2), \ -+ VECT_VAR(vreint_vector_res, poly, 64, 2)); \ -+ CHECK_POLY(TEST_MSG, T1, 64, 2, PRIx##64, EXPECTED, ""); -+ -+ TEST_VREINTERPRET128(q, poly, p, 128, 1, int, s, 8, 16, vreint_expected_q_p128_s8); -+ TEST_VREINTERPRET128(q, poly, p, 128, 1, int, s, 16, 8, vreint_expected_q_p128_s16); -+ TEST_VREINTERPRET128(q, poly, p, 128, 1, int, s, 32, 4, vreint_expected_q_p128_s32); -+ TEST_VREINTERPRET128(q, poly, p, 128, 1, int, s, 64, 2, vreint_expected_q_p128_s64); -+ TEST_VREINTERPRET128(q, poly, p, 128, 1, uint, u, 8, 16, vreint_expected_q_p128_u8); -+ TEST_VREINTERPRET128(q, poly, p, 128, 1, uint, u, 16, 8, vreint_expected_q_p128_u16); -+ TEST_VREINTERPRET128(q, poly, p, 128, 1, uint, u, 32, 4, vreint_expected_q_p128_u32); -+ TEST_VREINTERPRET128(q, poly, p, 128, 1, uint, u, 64, 2, vreint_expected_q_p128_u64); -+ TEST_VREINTERPRET128(q, poly, p, 128, 1, poly, p, 8, 16, vreint_expected_q_p128_p8); -+ TEST_VREINTERPRET128(q, poly, p, 128, 1, poly, p, 16, 8, vreint_expected_q_p128_p16); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET128(q, poly, p, 128, 1, float, f, 16, 8, vreint_expected_q_p128_f16); -+#endif -+ TEST_VREINTERPRET128(q, poly, p, 128, 1, float, f, 32, 4, vreint_expected_q_p128_f32); -+ -+ /* vreinterpretq_*_p128 tests. */ -+#undef TEST_MSG -+#define TEST_MSG "VREINTERPRETQ_*_P128" -+ -+ /* Since there is no way to load a poly128_t value, load a -+ poly64x2_t and convert it to poly128_t. This means that we are -+ not able to test vreinterpretq_*_p128 alone, and that errors in -+ vreinterpretq_p128_p64 could compensate for errors in -+ vreinterpretq_*_p128*. */ -+#define TEST_VREINTERPRET_FROM_P128(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \ -+ VECT_VAR(vreint_vector_res, T1, W, N) = \ -+ vreinterpret##Q##_##T2##W##_##TS2##WS( \ -+ vreinterpretq_p128_p64(VECT_VAR(vreint_vector, TS1, 64, 2))); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ -+ VECT_VAR(vreint_vector_res, T1, W, N)); \ -+ CHECK(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, ""); -+ -+#define TEST_VREINTERPRET_FP_FROM_P128(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \ -+ VECT_VAR(vreint_vector_res, T1, W, N) = \ -+ vreinterpret##Q##_##T2##W##_##TS2##WS( \ -+ vreinterpretq_p128_p64(VECT_VAR(vreint_vector, TS1, 64, 2))); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ -+ VECT_VAR(vreint_vector_res, T1, W, N)); \ -+ CHECK_FP(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, ""); -+ -+ TEST_VREINTERPRET_FROM_P128(q, int, s, 8, 16, poly, p, 128, 1, vreint_expected_q_s8_p128); -+ TEST_VREINTERPRET_FROM_P128(q, int, s, 16, 8, poly, p, 128, 1, vreint_expected_q_s16_p128); -+ TEST_VREINTERPRET_FROM_P128(q, int, s, 32, 4, poly, p, 128, 1, vreint_expected_q_s32_p128); -+ TEST_VREINTERPRET_FROM_P128(q, int, s, 64, 2, poly, p, 128, 1, vreint_expected_q_s64_p128); -+ TEST_VREINTERPRET_FROM_P128(q, uint, u, 8, 16, poly, p, 128, 1, vreint_expected_q_u8_p128); -+ TEST_VREINTERPRET_FROM_P128(q, uint, u, 16, 8, poly, p, 128, 1, vreint_expected_q_u16_p128); -+ TEST_VREINTERPRET_FROM_P128(q, uint, u, 32, 4, poly, p, 128, 1, vreint_expected_q_u32_p128); -+ TEST_VREINTERPRET_FROM_P128(q, uint, u, 64, 2, poly, p, 128, 1, vreint_expected_q_u64_p128); -+ TEST_VREINTERPRET_FROM_P128(q, poly, p, 8, 16, poly, p, 128, 1, vreint_expected_q_p8_p128); -+ TEST_VREINTERPRET_FROM_P128(q, poly, p, 16, 8, poly, p, 128, 1, vreint_expected_q_p16_p128); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET_FP_FROM_P128(q, float, f, 16, 8, poly, p, 128, 1, vreint_expected_q_f16_p128); -+#endif -+ TEST_VREINTERPRET_FP_FROM_P128(q, float, f, 32, 4, poly, p, 128, 1, vreint_expected_q_f32_p128); -+ -+ return 0; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vreinterpret_p64.c -@@ -0,0 +1,216 @@ -+/* This file contains tests for the vreinterpret *p64 intrinsics. */ -+ -+/* { dg-require-effective-target arm_crypto_ok { target { arm*-*-* } } } */ -+/* { dg-add-options arm_crypto } */ -+/* { dg-additional-options "-march=armv8-a+crypto" { target { aarch64*-*-* } } }*/ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+/* Expected results: vreinterpret_p64_*. */ -+VECT_VAR_DECL(vreint_expected_p64_s8,poly,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; -+VECT_VAR_DECL(vreint_expected_p64_s16,poly,64,1) [] = { 0xfff3fff2fff1fff0 }; -+VECT_VAR_DECL(vreint_expected_p64_s32,poly,64,1) [] = { 0xfffffff1fffffff0 }; -+VECT_VAR_DECL(vreint_expected_p64_s64,poly,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(vreint_expected_p64_u8,poly,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; -+VECT_VAR_DECL(vreint_expected_p64_u16,poly,64,1) [] = { 0xfff3fff2fff1fff0 }; -+VECT_VAR_DECL(vreint_expected_p64_u32,poly,64,1) [] = { 0xfffffff1fffffff0 }; -+VECT_VAR_DECL(vreint_expected_p64_u64,poly,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(vreint_expected_p64_p8,poly,64,1) [] = { 0xf7f6f5f4f3f2f1f0 }; -+VECT_VAR_DECL(vreint_expected_p64_p16,poly,64,1) [] = { 0xfff3fff2fff1fff0 }; -+VECT_VAR_DECL(vreint_expected_p64_f32,poly,64,1) [] = { 0xc1700000c1800000 }; -+VECT_VAR_DECL(vreint_expected_p64_f16,poly,64,1) [] = { 0xca80cb00cb80cc00 }; -+ -+/* Expected results: vreinterpretq_p64_*. */ -+VECT_VAR_DECL(vreint_expected_q_p64_s8,poly,64,2) [] = { 0xf7f6f5f4f3f2f1f0, -+ 0xfffefdfcfbfaf9f8 }; -+VECT_VAR_DECL(vreint_expected_q_p64_s16,poly,64,2) [] = { 0xfff3fff2fff1fff0, -+ 0xfff7fff6fff5fff4 }; -+VECT_VAR_DECL(vreint_expected_q_p64_s32,poly,64,2) [] = { 0xfffffff1fffffff0, -+ 0xfffffff3fffffff2 }; -+VECT_VAR_DECL(vreint_expected_q_p64_s64,poly,64,2) [] = { 0xfffffffffffffff0, -+ 0xfffffffffffffff1 }; -+VECT_VAR_DECL(vreint_expected_q_p64_u8,poly,64,2) [] = { 0xf7f6f5f4f3f2f1f0, -+ 0xfffefdfcfbfaf9f8 }; -+VECT_VAR_DECL(vreint_expected_q_p64_u16,poly,64,2) [] = { 0xfff3fff2fff1fff0, -+ 0xfff7fff6fff5fff4 }; -+VECT_VAR_DECL(vreint_expected_q_p64_u32,poly,64,2) [] = { 0xfffffff1fffffff0, -+ 0xfffffff3fffffff2 }; -+VECT_VAR_DECL(vreint_expected_q_p64_u64,poly,64,2) [] = { 0xfffffffffffffff0, -+ 0xfffffffffffffff1 }; -+VECT_VAR_DECL(vreint_expected_q_p64_p8,poly,64,2) [] = { 0xf7f6f5f4f3f2f1f0, -+ 0xfffefdfcfbfaf9f8 }; -+VECT_VAR_DECL(vreint_expected_q_p64_p16,poly,64,2) [] = { 0xfff3fff2fff1fff0, -+ 0xfff7fff6fff5fff4 }; -+VECT_VAR_DECL(vreint_expected_q_p64_f32,poly,64,2) [] = { 0xc1700000c1800000, -+ 0xc1500000c1600000 }; -+VECT_VAR_DECL(vreint_expected_q_p64_f16,poly,64,2) [] = { 0xca80cb00cb80cc00, -+ 0xc880c900c980ca00 }; -+ -+/* Expected results: vreinterpret_*_p64. */ -+VECT_VAR_DECL(vreint_expected_s8_p64,int,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(vreint_expected_s16_p64,int,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL(vreint_expected_s32_p64,int,32,2) [] = { 0xfffffff0, 0xffffffff }; -+VECT_VAR_DECL(vreint_expected_s64_p64,int,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(vreint_expected_u8_p64,uint,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(vreint_expected_u16_p64,uint,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL(vreint_expected_u32_p64,uint,32,2) [] = { 0xfffffff0, 0xffffffff }; -+VECT_VAR_DECL(vreint_expected_u64_p64,uint,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(vreint_expected_p8_p64,poly,8,8) [] = { 0xf0, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(vreint_expected_p16_p64,poly,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; -+VECT_VAR_DECL(vreint_expected_f32_p64,hfloat,32,2) [] = { 0xfffffff0, 0xffffffff }; -+VECT_VAR_DECL(vreint_expected_f16_p64,hfloat,16,4) [] = { 0xfff0, 0xffff, 0xffff, 0xffff }; -+ -+/* Expected results: vreinterpretq_*_p64. */ -+VECT_VAR_DECL(vreint_expected_q_s8_p64,int,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xf1, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(vreint_expected_q_s16_p64,int,16,8) [] = { 0xfff0, 0xffff, -+ 0xffff, 0xffff, -+ 0xfff1, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL(vreint_expected_q_s32_p64,int,32,4) [] = { 0xfffffff0, 0xffffffff, -+ 0xfffffff1, 0xffffffff }; -+VECT_VAR_DECL(vreint_expected_q_s64_p64,int,64,2) [] = { 0xfffffffffffffff0, -+ 0xfffffffffffffff1 }; -+VECT_VAR_DECL(vreint_expected_q_u8_p64,uint,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xf1, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(vreint_expected_q_u16_p64,uint,16,8) [] = { 0xfff0, 0xffff, -+ 0xffff, 0xffff, -+ 0xfff1, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL(vreint_expected_q_u32_p64,uint,32,4) [] = { 0xfffffff0, 0xffffffff, -+ 0xfffffff1, 0xffffffff }; -+VECT_VAR_DECL(vreint_expected_q_u64_p64,uint,64,2) [] = { 0xfffffffffffffff0, -+ 0xfffffffffffffff1 }; -+VECT_VAR_DECL(vreint_expected_q_p8_p64,poly,8,16) [] = { 0xf0, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xf1, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(vreint_expected_q_p16_p64,poly,16,8) [] = { 0xfff0, 0xffff, -+ 0xffff, 0xffff, -+ 0xfff1, 0xffff, -+ 0xffff, 0xffff }; -+VECT_VAR_DECL(vreint_expected_q_f32_p64,hfloat,32,4) [] = { 0xfffffff0, 0xffffffff, -+ 0xfffffff1, 0xffffffff }; -+VECT_VAR_DECL(vreint_expected_q_f16_p64,hfloat,16,8) [] = { 0xfff0, 0xffff, -+ 0xffff, 0xffff, -+ 0xfff1, 0xffff, -+ 0xffff, 0xffff }; -+ -+int main (void) -+{ -+#define TEST_VREINTERPRET(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \ -+ VECT_VAR(vreint_vector_res, T1, W, N) = \ -+ vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vreint_vector, TS1, WS, NS)); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ -+ VECT_VAR(vreint_vector_res, T1, W, N)); \ -+ CHECK(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, ""); -+ -+#define TEST_VREINTERPRET_TO_POLY(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \ -+ VECT_VAR(vreint_vector_res, T1, W, N) = \ -+ vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vreint_vector, TS1, WS, NS)); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ -+ VECT_VAR(vreint_vector_res, T1, W, N)); \ -+ CHECK_POLY(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, ""); -+ -+#define TEST_VREINTERPRET_FP(Q, T1, T2, W, N, TS1, TS2, WS, NS, EXPECTED) \ -+ VECT_VAR(vreint_vector_res, T1, W, N) = \ -+ vreinterpret##Q##_##T2##W##_##TS2##WS(VECT_VAR(vreint_vector, TS1, WS, NS)); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \ -+ VECT_VAR(vreint_vector_res, T1, W, N)); \ -+ CHECK_FP(TEST_MSG, T1, W, N, PRIx##W, EXPECTED, ""); -+ -+ DECL_VARIABLE_ALL_VARIANTS(vreint_vector); -+ DECL_VARIABLE_ALL_VARIANTS(vreint_vector_res); -+ -+ clean_results (); -+ -+ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vreint_vector, buffer); -+ VLOAD(vreint_vector, buffer, , poly, p, 64, 1); -+ VLOAD(vreint_vector, buffer, q, poly, p, 64, 2); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ VLOAD(vreint_vector, buffer, , float, f, 16, 4); -+ VLOAD(vreint_vector, buffer, q, float, f, 16, 8); -+#endif -+ VLOAD(vreint_vector, buffer, , float, f, 32, 2); -+ VLOAD(vreint_vector, buffer, q, float, f, 32, 4); -+ -+ /* vreinterpret_p64_* tests. */ -+#undef TEST_MSG -+#define TEST_MSG "VREINTERPRET_P64_*" -+ TEST_VREINTERPRET_TO_POLY(, poly, p, 64, 1, int, s, 8, 8, vreint_expected_p64_s8); -+ TEST_VREINTERPRET_TO_POLY(, poly, p, 64, 1, int, s, 16, 4, vreint_expected_p64_s16); -+ TEST_VREINTERPRET_TO_POLY(, poly, p, 64, 1, int, s, 32, 2, vreint_expected_p64_s32); -+ TEST_VREINTERPRET_TO_POLY(, poly, p, 64, 1, int, s, 64, 1, vreint_expected_p64_s64); -+ TEST_VREINTERPRET_TO_POLY(, poly, p, 64, 1, uint, u, 8, 8, vreint_expected_p64_u8); -+ TEST_VREINTERPRET_TO_POLY(, poly, p, 64, 1, uint, u, 16, 4, vreint_expected_p64_u16); -+ TEST_VREINTERPRET_TO_POLY(, poly, p, 64, 1, uint, u, 32, 2, vreint_expected_p64_u32); -+ TEST_VREINTERPRET_TO_POLY(, poly, p, 64, 1, uint, u, 64, 1, vreint_expected_p64_u64); -+ TEST_VREINTERPRET_TO_POLY(, poly, p, 64, 1, poly, p, 8, 8, vreint_expected_p64_p8); -+ TEST_VREINTERPRET_TO_POLY(, poly, p, 64, 1, poly, p, 16, 4, vreint_expected_p64_p16); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET_TO_POLY(, poly, p, 64, 1, float, f, 16, 4, vreint_expected_p64_f16); -+#endif -+ TEST_VREINTERPRET_TO_POLY(, poly, p, 64, 1, float, f, 32, 2, vreint_expected_p64_f32); -+ -+ /* vreinterpretq_p64_* tests. */ -+#undef TEST_MSG -+#define TEST_MSG "VREINTERPRETQ_P64_*" -+ TEST_VREINTERPRET_TO_POLY(q, poly, p, 64, 2, int, s, 8, 16, vreint_expected_q_p64_s8); -+ TEST_VREINTERPRET_TO_POLY(q, poly, p, 64, 2, int, s, 16, 8, vreint_expected_q_p64_s16); -+ TEST_VREINTERPRET_TO_POLY(q, poly, p, 64, 2, int, s, 32, 4, vreint_expected_q_p64_s32); -+ TEST_VREINTERPRET_TO_POLY(q, poly, p, 64, 2, int, s, 64, 2, vreint_expected_q_p64_s64); -+ TEST_VREINTERPRET_TO_POLY(q, poly, p, 64, 2, uint, u, 8, 16, vreint_expected_q_p64_u8); -+ TEST_VREINTERPRET_TO_POLY(q, poly, p, 64, 2, uint, u, 16, 8, vreint_expected_q_p64_u16); -+ TEST_VREINTERPRET_TO_POLY(q, poly, p, 64, 2, uint, u, 32, 4, vreint_expected_q_p64_u32); -+ TEST_VREINTERPRET_TO_POLY(q, poly, p, 64, 2, uint, u, 64, 2, vreint_expected_q_p64_u64); -+ TEST_VREINTERPRET_TO_POLY(q, poly, p, 64, 2, poly, p, 8, 16, vreint_expected_q_p64_p8); -+ TEST_VREINTERPRET_TO_POLY(q, poly, p, 64, 2, poly, p, 16, 8, vreint_expected_q_p64_p16); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET_TO_POLY(q, poly, p, 64, 2, float, f, 16, 8, vreint_expected_q_p64_f16); -+#endif -+ TEST_VREINTERPRET_TO_POLY(q, poly, p, 64, 2, float, f, 32, 4, vreint_expected_q_p64_f32); -+ -+ /* vreinterpret_*_p64 tests. */ -+#undef TEST_MSG -+#define TEST_MSG "VREINTERPRET_*_P64" -+ -+ TEST_VREINTERPRET(, int, s, 8, 8, poly, p, 64, 1, vreint_expected_s8_p64); -+ TEST_VREINTERPRET(, int, s, 16, 4, poly, p, 64, 1, vreint_expected_s16_p64); -+ TEST_VREINTERPRET(, int, s, 32, 2, poly, p, 64, 1, vreint_expected_s32_p64); -+ TEST_VREINTERPRET(, int, s, 64, 1, poly, p, 64, 1, vreint_expected_s64_p64); -+ TEST_VREINTERPRET(, uint, u, 8, 8, poly, p, 64, 1, vreint_expected_u8_p64); -+ TEST_VREINTERPRET(, uint, u, 16, 4, poly, p, 64, 1, vreint_expected_u16_p64); -+ TEST_VREINTERPRET(, uint, u, 32, 2, poly, p, 64, 1, vreint_expected_u32_p64); -+ TEST_VREINTERPRET(, uint, u, 64, 1, poly, p, 64, 1, vreint_expected_u64_p64); -+ TEST_VREINTERPRET_TO_POLY(, poly, p, 8, 8, poly, p, 64, 1, vreint_expected_p8_p64); -+ TEST_VREINTERPRET_TO_POLY(, poly, p, 16, 4, poly, p, 64, 1, vreint_expected_p16_p64); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET_FP(, float, f, 16, 4, poly, p, 64, 1, vreint_expected_f16_p64); -+#endif -+ TEST_VREINTERPRET_FP(, float, f, 32, 2, poly, p, 64, 1, vreint_expected_f32_p64); -+ TEST_VREINTERPRET(q, int, s, 8, 16, poly, p, 64, 2, vreint_expected_q_s8_p64); -+ TEST_VREINTERPRET(q, int, s, 16, 8, poly, p, 64, 2, vreint_expected_q_s16_p64); -+ TEST_VREINTERPRET(q, int, s, 32, 4, poly, p, 64, 2, vreint_expected_q_s32_p64); -+ TEST_VREINTERPRET(q, int, s, 64, 2, poly, p, 64, 2, vreint_expected_q_s64_p64); -+ TEST_VREINTERPRET(q, uint, u, 8, 16, poly, p, 64, 2, vreint_expected_q_u8_p64); -+ TEST_VREINTERPRET(q, uint, u, 16, 8, poly, p, 64, 2, vreint_expected_q_u16_p64); -+ TEST_VREINTERPRET(q, uint, u, 32, 4, poly, p, 64, 2, vreint_expected_q_u32_p64); -+ TEST_VREINTERPRET(q, uint, u, 64, 2, poly, p, 64, 2, vreint_expected_q_u64_p64); -+ TEST_VREINTERPRET_TO_POLY(q, poly, p, 8, 16, poly, p, 64, 2, vreint_expected_q_p8_p64); -+ TEST_VREINTERPRET_TO_POLY(q, poly, p, 16, 8, poly, p, 64, 2, vreint_expected_q_p16_p64); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ TEST_VREINTERPRET_FP(q, float, f, 16, 8, poly, p, 64, 2, vreint_expected_q_f16_p64); -+#endif -+ TEST_VREINTERPRET_FP(q, float, f, 32, 4, poly, p, 64, 2, vreint_expected_q_f32_p64); -+ -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrev.c -@@ -63,6 +63,10 @@ VECT_VAR_DECL(expected_vrev64,uint,32,2) [] = { 0xfffffff1, 0xfffffff0 }; - VECT_VAR_DECL(expected_vrev64,poly,8,8) [] = { 0xf7, 0xf6, 0xf5, 0xf4, - 0xf3, 0xf2, 0xf1, 0xf0 }; - VECT_VAR_DECL(expected_vrev64,poly,16,4) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected_vrev64, hfloat, 16, 4) [] = { 0xca80, 0xcb00, -+ 0xcb80, 0xcc00 }; -+#endif - VECT_VAR_DECL(expected_vrev64,hfloat,32,2) [] = { 0xc1700000, 0xc1800000 }; - VECT_VAR_DECL(expected_vrev64,int,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4, - 0xf3, 0xf2, 0xf1, 0xf0, -@@ -86,6 +90,12 @@ VECT_VAR_DECL(expected_vrev64,poly,8,16) [] = { 0xf7, 0xf6, 0xf5, 0xf4, - 0xfb, 0xfa, 0xf9, 0xf8 }; - VECT_VAR_DECL(expected_vrev64,poly,16,8) [] = { 0xfff3, 0xfff2, 0xfff1, 0xfff0, - 0xfff7, 0xfff6, 0xfff5, 0xfff4 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected_vrev64, hfloat, 16, 8) [] = { 0xca80, 0xcb00, -+ 0xcb80, 0xcc00, -+ 0xc880, 0xc900, -+ 0xc980, 0xca00 }; -+#endif - VECT_VAR_DECL(expected_vrev64,hfloat,32,4) [] = { 0xc1700000, 0xc1800000, - 0xc1500000, 0xc1600000 }; - -@@ -104,6 +114,10 @@ void exec_vrev (void) - - /* Initialize input "vector" from "buffer". */ - TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); -+#if defined (FP16_SUPPORTED) -+ VLOAD (vector, buffer, , float, f, 16, 4); -+ VLOAD (vector, buffer, q, float, f, 16, 8); -+#endif - VLOAD(vector, buffer, , float, f, 32, 2); - VLOAD(vector, buffer, q, float, f, 32, 4); - -@@ -118,10 +132,10 @@ void exec_vrev (void) - - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vrev16, ""); - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vrev16, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vrev16, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vrev16, ""); - CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_vrev16, ""); - CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_vrev16, ""); -- CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev16, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev16, ""); - - #undef TEST_MSG - #define TEST_MSG "VREV32" -@@ -142,14 +156,14 @@ void exec_vrev (void) - CHECK(TEST_MSG, int, 16, 4, PRIx16, expected_vrev32, ""); - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vrev32, ""); - CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_vrev32, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vrev32, ""); -- CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_vrev32, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vrev32, ""); -+ CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_vrev32, ""); - CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_vrev32, ""); - CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_vrev32, ""); - CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_vrev32, ""); - CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_vrev32, ""); -- CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev32, ""); -- CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_vrev32, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev32, ""); -+ CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_vrev32, ""); - - #undef TEST_MSG - #define TEST_MSG "VREV64" -@@ -176,17 +190,23 @@ void exec_vrev (void) - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vrev64, ""); - CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_vrev64, ""); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_vrev64, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vrev64, ""); -- CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_vrev64, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vrev64, ""); -+ CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_vrev64, ""); - CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_vrev64, ""); - CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_vrev64, ""); - CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_vrev64, ""); - CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_vrev64, ""); - CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_vrev64, ""); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_vrev64, ""); -- CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev64, ""); -- CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_vrev64, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 16, PRIx8, expected_vrev64, ""); -+ CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_vrev64, ""); - -+#if defined (FP16_SUPPORTED) -+ TEST_VREV (, float, f, 16, 4, 64); -+ TEST_VREV (q, float, f, 16, 8, 64); -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx32, expected_vrev64, ""); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx32, expected_vrev64, ""); -+#endif - TEST_VREV(, float, f, 32, 2, 64); - TEST_VREV(q, float, f, 32, 4, 64); - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_vrev64, ""); ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrnd.c -@@ -0,0 +1,24 @@ -+/* { dg-require-effective-target arm_v8_neon_hw } */ -+/* { dg-add-options arm_v8_neon } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+/* Expected results. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0xcb80, -+ 0xcb00, 0xca80 }; -+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, -+ 0xcb00, 0xca80, -+ 0xca00, 0xc980, -+ 0xc900, 0xc880 }; -+#endif -+VECT_VAR_DECL (expected, hfloat, 32, 2) [] = { 0xc1800000, 0xc1700000 }; -+VECT_VAR_DECL (expected, hfloat, 32, 4) [] = { 0xc1800000, 0xc1700000, -+ 0xc1600000, 0xc1500000 }; -+ -+#define INSN vrnd -+#define TEST_MSG "VRND" -+ -+#include "vrndX.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndX.inc -@@ -0,0 +1,63 @@ -+#define FNNAME1(NAME) exec_ ## NAME -+#define FNNAME(NAME) FNNAME1 (NAME) -+ -+void FNNAME (INSN) (void) -+{ -+ /* vector_res = vrndX (vector), then store the result. */ -+#define TEST_VRND2(INSN, Q, T1, T2, W, N) \ -+ VECT_VAR (vector_res, T1, W, N) = \ -+ INSN##Q##_##T2##W (VECT_VAR (vector, T1, W, N)); \ -+ vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N), \ -+ VECT_VAR (vector_res, T1, W, N)) -+ -+ /* Two auxliary macros are necessary to expand INSN. */ -+#define TEST_VRND1(INSN, Q, T1, T2, W, N) \ -+ TEST_VRND2 (INSN, Q, T1, T2, W, N) -+ -+#define TEST_VRND(Q, T1, T2, W, N) \ -+ TEST_VRND1 (INSN, Q, T1, T2, W, N) -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector, float, 16, 4); -+ DECL_VARIABLE(vector, float, 16, 8); -+#endif -+ DECL_VARIABLE (vector, float, 32, 2); -+ DECL_VARIABLE (vector, float, 32, 4); -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector_res, float, 16, 4); -+ DECL_VARIABLE(vector_res, float, 16, 8); -+#endif -+ DECL_VARIABLE (vector_res, float, 32, 2); -+ DECL_VARIABLE (vector_res, float, 32, 4); -+ -+ clean_results (); -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VLOAD (vector, buffer, , float, f, 16, 4); -+ VLOAD (vector, buffer, q, float, f, 16, 8); -+#endif -+ VLOAD (vector, buffer, , float, f, 32, 2); -+ VLOAD (vector, buffer, q, float, f, 32, 4); -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VRND ( , float, f, 16, 4); -+ TEST_VRND (q, float, f, 16, 8); -+#endif -+ TEST_VRND ( , float, f, 32, 2); -+ TEST_VRND (q, float, f, 32, 4); -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected, ""); -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected, ""); -+#endif -+ CHECK_FP (TEST_MSG, float, 32, 2, PRIx32, expected, ""); -+ CHECK_FP (TEST_MSG, float, 32, 4, PRIx32, expected, ""); -+} -+ -+int -+main (void) -+{ -+ FNNAME (INSN) (); -+ return 0; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrnda.c -@@ -0,0 +1,25 @@ -+/* { dg-require-effective-target arm_v8_neon_hw } */ -+/* { dg-add-options arm_v8_neon } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+/* Expected results. */ -+/* Expected results. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0xcb80, -+ 0xcb00, 0xca80 }; -+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, -+ 0xcb00, 0xca80, -+ 0xca00, 0xc980, -+ 0xc900, 0xc880 }; -+#endif -+VECT_VAR_DECL (expected, hfloat, 32, 2) [] = { 0xc1800000, 0xc1700000 }; -+VECT_VAR_DECL (expected, hfloat, 32, 4) [] = { 0xc1800000, 0xc1700000, -+ 0xc1600000, 0xc1500000 }; -+ -+#define INSN vrnda -+#define TEST_MSG "VRNDA" -+ -+#include "vrndX.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndah_f16_1.c -@@ -0,0 +1,40 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected[] = -+{ -+ 0x0000 /* 0.000000 */, -+ 0x8000 /* -0.000000 */, -+ 0x4000 /* 2.000000 */, -+ 0x4200 /* 3.000000 */, -+ 0x4d00 /* 20.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0xc000 /* -2.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0xc800 /* -8.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0x4a80 /* 13.000000 */, -+ 0xc600 /* -6.000000 */, -+ 0x4d00 /* 20.000000 */, -+ 0x7c00 /* inf */, -+ 0xfc00 /* -inf */ -+}; -+ -+#define TEST_MSG "VRNDAH_F16" -+#define INSN_NAME vrndah_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndh_f16_1.c -@@ -0,0 +1,40 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected[] = -+{ -+ 0x0000 /* 0.000000 */, -+ 0x8000 /* -0.000000 */, -+ 0x4000 /* 2.000000 */, -+ 0x4200 /* 3.000000 */, -+ 0x4d00 /* 20.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0xc000 /* -2.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0xc700 /* -7.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0x4a80 /* 13.000000 */, -+ 0xc600 /* -6.000000 */, -+ 0x4d00 /* 20.000000 */, -+ 0x7c00 /* inf */, -+ 0xfc00 /* -inf */ -+}; -+ -+#define TEST_MSG "VRNDH_F16" -+#define INSN_NAME vrndh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndi_f16_1.c -@@ -0,0 +1,71 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_neon } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+#define FP16_C(a) ((__fp16) a) -+#define A FP16_C (123.4) -+#define RNDI_A 0x57B0 /* FP16_C (123). */ -+#define B FP16_C (-567.5) -+#define RNDI_B 0xE070 /* FP16_C (-568). */ -+#define C FP16_C (-34.8) -+#define RNDI_C 0xD060 /* FP16_C (-35). */ -+#define D FP16_C (1024) -+#define RNDI_D 0x6400 /* FP16_C (1024). */ -+#define E FP16_C (663.1) -+#define RNDI_E 0x612E /* FP16_C (663). */ -+#define F FP16_C (169.1) -+#define RNDI_F 0x5948 /* FP16_C (169). */ -+#define G FP16_C (-4.8) -+#define RNDI_G 0xC500 /* FP16_C (-5). */ -+#define H FP16_C (77.5) -+#define RNDI_H 0x54E0 /* FP16_C (78). */ -+ -+/* Expected results for vrndi. */ -+VECT_VAR_DECL (expected_static, hfloat, 16, 4) [] -+ = { RNDI_A, RNDI_B, RNDI_C, RNDI_D }; -+ -+VECT_VAR_DECL (expected_static, hfloat, 16, 8) [] -+ = { RNDI_A, RNDI_B, RNDI_C, RNDI_D, RNDI_E, RNDI_F, RNDI_G, RNDI_H }; -+ -+void exec_vrndi_f16 (void) -+{ -+#undef TEST_MSG -+#define TEST_MSG "VRNDI (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc, float, 16, 4); -+ VECT_VAR_DECL (buf_src, float, 16, 4) [] = {A, B, C, D}; -+ VLOAD (vsrc, buf_src, , float, f, 16, 4); -+ DECL_VARIABLE (vector_res, float, 16, 4) -+ = vrndi_f16 (VECT_VAR (vsrc, float, 16, 4)); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VRNDIQ (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc, float, 16, 8); -+ VECT_VAR_DECL (buf_src, float, 16, 8) [] = {A, B, C, D, E, F, G, H}; -+ VLOAD (vsrc, buf_src, q, float, f, 16, 8); -+ DECL_VARIABLE (vector_res, float, 16, 8) -+ = vrndiq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_static, ""); -+} -+ -+int -+main (void) -+{ -+ exec_vrndi_f16 (); -+ return 0; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndih_f16_1.c -@@ -0,0 +1,40 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected[] = -+{ -+ 0x0000 /* 0.000000 */, -+ 0x8000 /* -0.000000 */, -+ 0x4000 /* 2.000000 */, -+ 0x4200 /* 3.000000 */, -+ 0x4d00 /* 20.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0xc000 /* -2.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0xc800 /* -8.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0x4a80 /* 13.000000 */, -+ 0xc600 /* -6.000000 */, -+ 0x4d00 /* 20.000000 */, -+ 0x7c00 /* inf */, -+ 0xfc00 /* -inf */ -+}; -+ -+#define TEST_MSG "VRNDIH_F16" -+#define INSN_NAME vrndih_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndm.c -@@ -0,0 +1,25 @@ -+/* { dg-require-effective-target arm_v8_neon_hw } */ -+/* { dg-add-options arm_v8_neon } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+/* Expected results. */ -+/* Expected results. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0xcb80, -+ 0xcb00, 0xca80 }; -+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, -+ 0xcb00, 0xca80, -+ 0xca00, 0xc980, -+ 0xc900, 0xc880 }; -+#endif -+VECT_VAR_DECL (expected, hfloat, 32, 2) [] = { 0xc1800000, 0xc1700000 }; -+VECT_VAR_DECL (expected, hfloat, 32, 4) [] = { 0xc1800000, 0xc1700000, -+ 0xc1600000, 0xc1500000 }; -+ -+#define INSN vrndm -+#define TEST_MSG "VRNDM" -+ -+#include "vrndX.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndmh_f16_1.c -@@ -0,0 +1,40 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected[] = -+{ -+ 0x0000 /* 0.000000 */, -+ 0x8000 /* -0.000000 */, -+ 0x4000 /* 2.000000 */, -+ 0x4200 /* 3.000000 */, -+ 0x4d00 /* 20.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0xc200 /* -3.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0xc800 /* -8.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0x4a80 /* 13.000000 */, -+ 0xc700 /* -7.000000 */, -+ 0x4d00 /* 20.000000 */, -+ 0x7c00 /* inf */, -+ 0xfc00 /* -inf */ -+}; -+ -+#define TEST_MSG "VRNDMH_F16" -+#define INSN_NAME vrndmh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndn.c -@@ -0,0 +1,25 @@ -+/* { dg-require-effective-target arm_v8_neon_hw } */ -+/* { dg-add-options arm_v8_neon } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+/* Expected results. */ -+/* Expected results. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0xcb80, -+ 0xcb00, 0xca80 }; -+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, -+ 0xcb00, 0xca80, -+ 0xca00, 0xc980, -+ 0xc900, 0xc880 }; -+#endif -+VECT_VAR_DECL (expected, hfloat, 32, 2) [] = { 0xc1800000, 0xc1700000 }; -+VECT_VAR_DECL (expected, hfloat, 32, 4) [] = { 0xc1800000, 0xc1700000, -+ 0xc1600000, 0xc1500000 }; -+ -+#define INSN vrndn -+#define TEST_MSG "VRNDN" -+ -+#include "vrndX.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndnh_f16_1.c -@@ -0,0 +1,40 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected[] = -+{ -+ 0x0000 /* 0.000000 */, -+ 0x8000 /* -0.000000 */, -+ 0x4000 /* 2.000000 */, -+ 0x4200 /* 3.000000 */, -+ 0x4d00 /* 20.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0xc000 /* -2.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0xc800 /* -8.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0x4a80 /* 13.000000 */, -+ 0xc600 /* -6.000000 */, -+ 0x4d00 /* 20.000000 */, -+ 0x7c00 /* inf */, -+ 0xfc00 /* -inf */ -+}; -+ -+#define TEST_MSG "VRNDNH_F16" -+#define INSN_NAME vrndnh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndp.c -@@ -0,0 +1,24 @@ -+/* { dg-require-effective-target arm_v8_neon_hw } */ -+/* { dg-add-options arm_v8_neon } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+/* Expected results. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0xcb80, -+ 0xcb00, 0xca80 }; -+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, -+ 0xcb00, 0xca80, -+ 0xca00, 0xc980, -+ 0xc900, 0xc880 }; -+#endif -+VECT_VAR_DECL (expected, hfloat, 32, 2) [] = { 0xc1800000, 0xc1700000 }; -+VECT_VAR_DECL (expected, hfloat, 32, 4) [] = { 0xc1800000, 0xc1700000, -+ 0xc1600000, 0xc1500000 }; -+ -+#define INSN vrndp -+#define TEST_MSG "VRNDP" -+ -+#include "vrndX.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndph_f16_1.c -@@ -0,0 +1,40 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected[] = -+{ -+ 0x0000 /* 0.000000 */, -+ 0x8000 /* -0.000000 */, -+ 0x4000 /* 2.000000 */, -+ 0x4400 /* 4.000000 */, -+ 0x4d00 /* 20.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0xc000 /* -2.000000 */, -+ 0x4000 /* 2.000000 */, -+ 0xc700 /* -7.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0x4b00 /* 14.000000 */, -+ 0xc600 /* -6.000000 */, -+ 0x4d00 /* 20.000000 */, -+ 0x7c00 /* inf */, -+ 0xfc00 /* -inf */ -+}; -+ -+#define TEST_MSG "VRNDPH_F16" -+#define INSN_NAME vrndph_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndx.c -@@ -0,0 +1,24 @@ -+/* { dg-require-effective-target arm_v8_neon_hw } */ -+/* { dg-add-options arm_v8_neon } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+/* Expected results. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0xcb80, -+ 0xcb00, 0xca80 }; -+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, -+ 0xcb00, 0xca80, -+ 0xca00, 0xc980, -+ 0xc900, 0xc880 }; -+#endif -+VECT_VAR_DECL (expected, hfloat, 32, 2) [] = { 0xc1800000, 0xc1700000 }; -+VECT_VAR_DECL (expected, hfloat, 32, 4) [] = { 0xc1800000, 0xc1700000, -+ 0xc1600000, 0xc1500000 }; -+ -+#define INSN vrndx -+#define TEST_MSG "VRNDX" -+ -+#include "vrndX.inc" ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrndxh_f16_1.c -@@ -0,0 +1,40 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected[] = -+{ -+ 0x0000 /* 0.000000 */, -+ 0x8000 /* -0.000000 */, -+ 0x4000 /* 2.000000 */, -+ 0x4200 /* 3.000000 */, -+ 0x4d00 /* 20.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0xc000 /* -2.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0xc800 /* -8.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x0000 /* 0.000000 */, -+ 0x3c00 /* 1.000000 */, -+ 0x4a80 /* 13.000000 */, -+ 0xc600 /* -6.000000 */, -+ 0x4d00 /* 20.000000 */, -+ 0x7c00 /* inf */, -+ 0xfc00 /* -inf */ -+}; -+ -+#define TEST_MSG "VRNDNH_F16" -+#define INSN_NAME vrndnh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrte.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrte.c -@@ -7,6 +7,11 @@ - VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffff, 0xffffffff }; - VECT_VAR_DECL(expected,uint,32,4) [] = { 0x9c800000, 0x9c800000, - 0x9c800000, 0x9c800000 }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0x324c, 0x324c, 0x324c, 0x324c }; -+VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0x3380, 0x3380, 0x3380, 0x3380, -+ 0x3380, 0x3380, 0x3380, 0x3380 }; -+#endif - VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x3e498000, 0x3e498000 }; - VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x3e700000, 0x3e700000, - 0x3e700000, 0x3e700000 }; -@@ -22,17 +27,39 @@ VECT_VAR_DECL(expected_2,uint,32,4) [] = { 0xed000000, 0xed000000, - 0xed000000, 0xed000000 }; - - /* Expected results with FP special inputs values (NaNs, ...). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_fp1, hfloat, 16, 4) [] = { 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00 }; -+VECT_VAR_DECL(expected_fp1, hfloat, 16, 8) [] = { 0x7c00, 0x7c00, -+ 0x7c00, 0x7c00, -+ 0x7c00, 0x7c00, -+ 0x7c00, 0x7c00 }; -+#endif - VECT_VAR_DECL(expected_fp1,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; - VECT_VAR_DECL(expected_fp1,hfloat,32,4) [] = { 0x7f800000, 0x7f800000, - 0x7f800000, 0x7f800000 }; - - /* Expected results with FP special inputs values - (negative, infinity). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_fp2, hfloat, 16, 4) [] = { 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00 }; -+VECT_VAR_DECL(expected_fp2, hfloat, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0 }; -+#endif - VECT_VAR_DECL(expected_fp2,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; - VECT_VAR_DECL(expected_fp2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; - - /* Expected results with FP special inputs values - (-0, -infinity). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_fp3, hfloat, 16, 4) [] = { 0xfc00, 0xfc00, -+ 0xfc00, 0xfc00 }; -+VECT_VAR_DECL(expected_fp3, hfloat, 16, 8) [] = { 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00 }; -+#endif - VECT_VAR_DECL(expected_fp3,hfloat,32,2) [] = { 0xff800000, 0xff800000 }; - VECT_VAR_DECL(expected_fp3,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, - 0x7fc00000, 0x7fc00000 }; -@@ -50,32 +77,60 @@ void exec_vrsqrte(void) - VECT_VAR(vector_res, T1, W, N)) - - DECL_VARIABLE(vector, uint, 32, 2); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector, float, 16, 4); -+#endif - DECL_VARIABLE(vector, float, 32, 2); - DECL_VARIABLE(vector, uint, 32, 4); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector, float, 16, 8); -+#endif - DECL_VARIABLE(vector, float, 32, 4); - - DECL_VARIABLE(vector_res, uint, 32, 2); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector_res, float, 16, 4); -+#endif - DECL_VARIABLE(vector_res, float, 32, 2); - DECL_VARIABLE(vector_res, uint, 32, 4); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector_res, float, 16, 8); -+#endif - DECL_VARIABLE(vector_res, float, 32, 4); - - clean_results (); - - /* Choose init value arbitrarily. */ - VDUP(vector, , uint, u, 32, 2, 0x12345678); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, , float, f, 16, 4, 25.799999f); -+#endif - VDUP(vector, , float, f, 32, 2, 25.799999f); - VDUP(vector, q, uint, u, 32, 4, 0xABCDEF10); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, q, float, f, 16, 8, 18.2f); -+#endif - VDUP(vector, q, float, f, 32, 4, 18.2f); - - /* Apply the operator. */ - TEST_VRSQRTE(, uint, u, 32, 2); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VRSQRTE(, float, f, 16, 4); -+#endif - TEST_VRSQRTE(, float, f, 32, 2); - TEST_VRSQRTE(q, uint, u, 32, 4); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VRSQRTE(q, float, f, 16, 8); -+#endif - TEST_VRSQRTE(q, float, f, 32, 4); - - #define CMT "" - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, CMT); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, CMT); -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, CMT); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, CMT); - -@@ -110,42 +165,78 @@ void exec_vrsqrte(void) - - - /* Test FP variants with special input values (NaNs, ...). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, , float, f, 16, 4, NAN); -+ VDUP(vector, q, float, f, 16, 8, 0.0f); -+#endif - VDUP(vector, , float, f, 32, 2, NAN); - VDUP(vector, q, float, f, 32, 4, 0.0f); - - /* Apply the operator. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VRSQRTE(, float, f, 16, 4); -+ TEST_VRSQRTE(q, float, f, 16, 8); -+#endif - TEST_VRSQRTE(, float, f, 32, 2); - TEST_VRSQRTE(q, float, f, 32, 4); - - #undef CMT - #define CMT " FP special (NaN, 0)" -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp1, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp1, CMT); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp1, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp1, CMT); - - - /* Test FP variants with special input values (negative, infinity). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, , float, f, 16, 4, -1.0f); -+ VDUP(vector, q, float, f, 16, 8, HUGE_VALF); -+#endif - VDUP(vector, , float, f, 32, 2, -1.0f); - VDUP(vector, q, float, f, 32, 4, HUGE_VALF); - - /* Apply the operator. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VRSQRTE(, float, f, 16, 4); -+ TEST_VRSQRTE(q, float, f, 16, 8); -+#endif - TEST_VRSQRTE(, float, f, 32, 2); - TEST_VRSQRTE(q, float, f, 32, 4); - - #undef CMT - #define CMT " FP special (negative, infinity)" -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp2, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp2, CMT); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp2, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp2, CMT); - - /* Test FP variants with special input values (-0, -infinity). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, , float, f, 16, 4, -0.0f); -+ VDUP(vector, q, float, f, 16, 8, -HUGE_VALF); -+#endif - VDUP(vector, , float, f, 32, 2, -0.0f); - VDUP(vector, q, float, f, 32, 4, -HUGE_VALF); - - /* Apply the operator. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VRSQRTE(, float, f, 16, 4); -+ TEST_VRSQRTE(q, float, f, 16, 8); -+#endif - TEST_VRSQRTE(, float, f, 32, 2); - TEST_VRSQRTE(q, float, f, 32, 4); - - #undef CMT - #define CMT " FP special (-0, -infinity)" -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp3, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp3, CMT); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp3, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp3, CMT); - } ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrteh_f16_1.c -@@ -0,0 +1,30 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+float16_t input[] = { 123.4, 67.8, 34.8, 24.0, 66.1, 144.0, 4.8, 77.0 }; -+uint16_t expected[] = { 0x2DC4 /* FP16_C (1/__builtin_sqrtf (123.4)). */, -+ 0x2FC8 /* FP16_C (1/__builtin_sqrtf (67.8)). */, -+ 0x316C /* FP16_C (1/__builtin_sqrtf (34.8)). */, -+ 0x3288 /* FP16_C (1/__builtin_sqrtf (24.0)). */, -+ 0x2FDC /* FP16_C (1/__builtin_sqrtf (66.1)). */, -+ 0x2D54 /* FP16_C (1/__builtin_sqrtf (144.0)). */, -+ 0x3750 /* FP16_C (1/__builtin_sqrtf (4.8)). */, -+ 0x2F48 /* FP16_C (1/__builtin_sqrtf (77.0)). */ }; -+ -+#define TEST_MSG "VRSQRTEH_F16" -+#define INSN_NAME vrsqrteh_f16 -+ -+#define INPUT input -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrts.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrts.c -@@ -4,22 +4,51 @@ - #include <math.h> - - /* Expected results. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected, hfloat, 16, 4) [] = { 0xd3cb, 0xd3cb, 0xd3cb, 0xd3cb }; -+VECT_VAR_DECL(expected, hfloat, 16, 8) [] = { 0xc726, 0xc726, 0xc726, 0xc726, -+ 0xc726, 0xc726, 0xc726, 0xc726 }; -+#endif - VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc2796b84, 0xc2796b84 }; - VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc0e4a3d8, 0xc0e4a3d8, - 0xc0e4a3d8, 0xc0e4a3d8 }; - - /* Expected results with input=NaN. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_nan, hfloat, 16, 4) [] = { 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00 }; -+VECT_VAR_DECL(expected_nan, hfloat, 16, 8) [] = { 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00, -+ 0x7e00, 0x7e00 }; -+#endif - VECT_VAR_DECL(expected_nan,hfloat,32,2) [] = { 0x7fc00000, 0x7fc00000 }; - VECT_VAR_DECL(expected_nan,hfloat,32,4) [] = { 0x7fc00000, 0x7fc00000, - 0x7fc00000, 0x7fc00000 }; - - /* Expected results with FP special inputs values (infinity, 0). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_fp1, hfloat, 16, 4) [] = { 0xfc00, 0xfc00, -+ 0xfc00, 0xfc00 }; -+VECT_VAR_DECL(expected_fp1, hfloat, 16, 8) [] = { 0x3e00, 0x3e00, -+ 0x3e00, 0x3e00, -+ 0x3e00, 0x3e00, -+ 0x3e00, 0x3e00 }; -+#endif - VECT_VAR_DECL(expected_fp1,hfloat,32,2) [] = { 0xff800000, 0xff800000 }; - VECT_VAR_DECL(expected_fp1,hfloat,32,4) [] = { 0x3fc00000, 0x3fc00000, - 0x3fc00000, 0x3fc00000 }; - - /* Expected results with only FP special inputs values (infinity, - 0). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_fp2, hfloat, 16, 4) [] = { 0x3e00, 0x3e00, -+ 0x3e00, 0x3e00 }; -+VECT_VAR_DECL(expected_fp2, hfloat, 16, 8) [] = { 0x3e00, 0x3e00, -+ 0x3e00, 0x3e00, -+ 0x3e00, 0x3e00, -+ 0x3e00, 0x3e00 }; -+#endif - VECT_VAR_DECL(expected_fp2,hfloat,32,2) [] = { 0x3fc00000, 0x3fc00000 }; - VECT_VAR_DECL(expected_fp2,hfloat,32,4) [] = { 0x3fc00000, 0x3fc00000, - 0x3fc00000, 0x3fc00000 }; -@@ -38,75 +67,143 @@ void exec_vrsqrts(void) - VECT_VAR(vector_res, T1, W, N)) - - /* No need for integer variants. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector, float, 16, 4); -+ DECL_VARIABLE(vector, float, 16, 8); -+#endif - DECL_VARIABLE(vector, float, 32, 2); - DECL_VARIABLE(vector, float, 32, 4); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector2, float, 16, 4); -+ DECL_VARIABLE(vector2, float, 16, 8); -+#endif - DECL_VARIABLE(vector2, float, 32, 2); - DECL_VARIABLE(vector2, float, 32, 4); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector_res, float, 16, 4); -+ DECL_VARIABLE(vector_res, float, 16, 8); -+#endif - DECL_VARIABLE(vector_res, float, 32, 2); - DECL_VARIABLE(vector_res, float, 32, 4); - - clean_results (); - - /* Choose init value arbitrarily. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, , float, f, 16, 4, 12.9f); -+ VDUP(vector, q, float, f, 16, 8, 9.1f); -+#endif - VDUP(vector, , float, f, 32, 2, 12.9f); - VDUP(vector, q, float, f, 32, 4, 9.1f); - -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector2, , float, f, 16, 4, 9.9f); -+ VDUP(vector2, q, float, f, 16, 8, 1.9f); -+#endif - VDUP(vector2, , float, f, 32, 2, 9.9f); - VDUP(vector2, q, float, f, 32, 4, 1.9f); - - /* Apply the operator. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VRSQRTS(, float, f, 16, 4); -+ TEST_VRSQRTS(q, float, f, 16, 8); -+#endif - TEST_VRSQRTS(, float, f, 32, 2); - TEST_VRSQRTS(q, float, f, 32, 4); - - #define CMT "" -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected, CMT); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected, CMT); - - - /* Test FP variants with special input values (NaN). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, , float, f, 16, 4, NAN); -+ VDUP(vector2, q, float, f, 16, 8, NAN); -+#endif - VDUP(vector, , float, f, 32, 2, NAN); - VDUP(vector2, q, float, f, 32, 4, NAN); - - /* Apply the operator. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VRSQRTS(, float, f, 16, 4); -+ TEST_VRSQRTS(q, float, f, 16, 8); -+#endif - TEST_VRSQRTS(, float, f, 32, 2); - TEST_VRSQRTS(q, float, f, 32, 4); - - #undef CMT - #define CMT " FP special (NAN) and normal values" -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_nan, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_nan, CMT); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_nan, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_nan, CMT); - - - /* Test FP variants with special input values (infinity, 0). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, , float, f, 16, 4, HUGE_VALF); -+ VDUP(vector, q, float, f, 16, 8, 0.0f); -+ /* Restore a normal value in vector2. */ -+ VDUP(vector2, q, float, f, 16, 8, 3.2f); -+#endif - VDUP(vector, , float, f, 32, 2, HUGE_VALF); - VDUP(vector, q, float, f, 32, 4, 0.0f); - /* Restore a normal value in vector2. */ - VDUP(vector2, q, float, f, 32, 4, 3.2f); - - /* Apply the operator. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VRSQRTS(, float, f, 16, 4); -+ TEST_VRSQRTS(q, float, f, 16, 8); -+#endif - TEST_VRSQRTS(, float, f, 32, 2); - TEST_VRSQRTS(q, float, f, 32, 4); - - #undef CMT - #define CMT " FP special (infinity, 0) and normal values" -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp1, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp1, CMT); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp1, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp1, CMT); - - - /* Test FP variants with only special input values (infinity, 0). */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ VDUP(vector, , float, f, 16, 4, HUGE_VALF); -+ VDUP(vector, q, float, f, 16, 8, 0.0f); -+ VDUP(vector2, , float, f, 16, 4, -0.0f); -+ VDUP(vector2, q, float, f, 16, 8, HUGE_VALF); -+#endif - VDUP(vector, , float, f, 32, 2, HUGE_VALF); - VDUP(vector, q, float, f, 32, 4, 0.0f); - VDUP(vector2, , float, f, 32, 2, -0.0f); - VDUP(vector2, q, float, f, 32, 4, HUGE_VALF); - - /* Apply the operator. */ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ TEST_VRSQRTS(, float, f, 16, 4); -+ TEST_VRSQRTS(q, float, f, 16, 8); -+#endif - TEST_VRSQRTS(, float, f, 32, 2); - TEST_VRSQRTS(q, float, f, 32, 4); - - #undef CMT - #define CMT " only FP special (infinity, 0)" -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_fp2, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_fp2, CMT); -+#endif - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_fp2, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_fp2, CMT); - } ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vrsqrtsh_f16_1.c -@@ -0,0 +1,50 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_fp16.h> -+ -+/* Input values. */ -+#define A 12.4 -+#define B -5.8 -+#define C -3.8 -+#define D 10 -+#define E 66.1 -+#define F 16.1 -+#define G -4.8 -+#define H -77 -+ -+#define I 0.7 -+#define J -78 -+#define K 10.23 -+#define L 98 -+#define M 87 -+#define N -87.81 -+#define O -1.1 -+#define P 47.8 -+ -+float16_t input_1[] = { A, B, C, D, I, J, K, L }; -+float16_t input_2[] = { E, F, G, H, M, N, O, P }; -+uint16_t expected[] = { 0xDE62 /* (3.0f + (-A) * E) / 2.0f. */, -+ 0x5206 /* (3.0f + (-B) * F) / 2.0f. */, -+ 0xC7A0 /* (3.0f + (-C) * G) / 2.0f. */, -+ 0x5E0A /* (3.0f + (-D) * H) / 2.0f. */, -+ 0xCF3D /* (3.0f + (-I) * M) / 2.0f. */, -+ 0xEAB0 /* (3.0f + (-J) * N) / 2.0f. */, -+ 0x471F /* (3.0f + (-K) * O) / 2.0f. */, -+ 0xE893 /* (3.0f + (-L) * P) / 2.0f. */ }; -+ -+#define TEST_MSG "VRSQRTSH_F16" -+#define INSN_NAME vrsqrtsh_f16 -+ -+#define INPUT_1 input_1 -+#define INPUT_2 input_2 -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "binary_scalar_op.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsXi_n.inc -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsXi_n.inc -@@ -76,16 +76,16 @@ void FNNAME (INSN_NAME) (void) - CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected, ""); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected, ""); - CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected, ""); -- CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected, ""); - CHECK(TEST_MSG, int, 8, 16, PRIx8, expected, ""); - CHECK(TEST_MSG, int, 16, 8, PRIx16, expected, ""); - CHECK(TEST_MSG, int, 32, 4, PRIx32, expected, ""); - CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected, ""); - CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected, ""); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected, ""); -- CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected, ""); -- CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 16, PRIx8, expected, ""); -+ CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected, ""); - - #ifdef EXTRA_TESTS - EXTRA_TESTS(); ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshl.c -@@ -101,10 +101,8 @@ VECT_VAR_DECL(expected_negative_shift,uint,64,2) [] = { 0x7ffffffffffffff, - 0x7ffffffffffffff }; - - --#ifndef INSN_NAME - #define INSN_NAME vshl - #define TEST_MSG "VSHL/VSHLQ" --#endif - - #define FNNAME1(NAME) exec_ ## NAME - #define FNNAME(NAME) FNNAME1(NAME) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vshuffle.inc -@@ -53,9 +53,17 @@ void FNNAME (INSN_NAME) (void) - DECL_VSHUFFLE(float, 32, 4) - - DECL_ALL_VSHUFFLE(); -+#if defined (FP16_SUPPORTED) -+ DECL_VSHUFFLE (float, 16, 4); -+ DECL_VSHUFFLE (float, 16, 8); -+#endif - - /* Initialize input "vector" from "buffer". */ - TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector1, buffer); -+#if defined (FP16_SUPPORTED) -+ VLOAD (vector1, buffer, , float, f, 16, 4); -+ VLOAD (vector1, buffer, q, float, f, 16, 8); -+#endif - VLOAD(vector1, buffer, , float, f, 32, 2); - VLOAD(vector1, buffer, q, float, f, 32, 4); - -@@ -68,6 +76,9 @@ void FNNAME (INSN_NAME) (void) - VDUP(vector2, , uint, u, 32, 2, 0x77); - VDUP(vector2, , poly, p, 8, 8, 0x55); - VDUP(vector2, , poly, p, 16, 4, 0x66); -+#if defined (FP16_SUPPORTED) -+ VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */ -+#endif - VDUP(vector2, , float, f, 32, 2, 33.6f); - - VDUP(vector2, q, int, s, 8, 16, 0x11); -@@ -78,8 +89,11 @@ void FNNAME (INSN_NAME) (void) - VDUP(vector2, q, uint, u, 32, 4, 0x77); - VDUP(vector2, q, poly, p, 8, 16, 0x55); - VDUP(vector2, q, poly, p, 16, 8, 0x66); -+#if defined (FP16_SUPPORTED) -+ VDUP (vector2, q, float, f, 16, 8, 14.6f); -+#endif - VDUP(vector2, q, float, f, 32, 4, 33.8f); -- -+ - #define TEST_ALL_VSHUFFLE(INSN) \ - TEST_VSHUFFLE(INSN, , int, s, 8, 8); \ - TEST_VSHUFFLE(INSN, , int, s, 16, 4); \ -@@ -100,6 +114,10 @@ void FNNAME (INSN_NAME) (void) - TEST_VSHUFFLE(INSN, q, poly, p, 16, 8); \ - TEST_VSHUFFLE(INSN, q, float, f, 32, 4) - -+#define TEST_VSHUFFLE_FP16(INSN) \ -+ TEST_VSHUFFLE(INSN, , float, f, 16, 4); \ -+ TEST_VSHUFFLE(INSN, q, float, f, 16, 8); -+ - #define TEST_ALL_EXTRA_CHUNKS() \ - TEST_EXTRA_CHUNK(int, 8, 8, 1); \ - TEST_EXTRA_CHUNK(int, 16, 4, 1); \ -@@ -130,8 +148,8 @@ void FNNAME (INSN_NAME) (void) - CHECK(test_name, uint, 8, 8, PRIx8, EXPECTED, comment); \ - CHECK(test_name, uint, 16, 4, PRIx16, EXPECTED, comment); \ - CHECK(test_name, uint, 32, 2, PRIx32, EXPECTED, comment); \ -- CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \ -- CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \ -+ CHECK_POLY(test_name, poly, 8, 8, PRIx8, EXPECTED, comment); \ -+ CHECK_POLY(test_name, poly, 16, 4, PRIx16, EXPECTED, comment); \ - CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment); \ - \ - CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment); \ -@@ -140,20 +158,40 @@ void FNNAME (INSN_NAME) (void) - CHECK(test_name, uint, 8, 16, PRIx8, EXPECTED, comment); \ - CHECK(test_name, uint, 16, 8, PRIx16, EXPECTED, comment); \ - CHECK(test_name, uint, 32, 4, PRIx32, EXPECTED, comment); \ -- CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \ -- CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \ -+ CHECK_POLY(test_name, poly, 8, 16, PRIx8, EXPECTED, comment); \ -+ CHECK_POLY(test_name, poly, 16, 8, PRIx16, EXPECTED, comment); \ - CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment); \ -- } \ -+ } -+ -+#define CHECK_RESULTS_VSHUFFLE_FP16(test_name,EXPECTED,comment) \ -+ { \ -+ CHECK_FP (test_name, float, 16, 4, PRIx16, EXPECTED, comment); \ -+ CHECK_FP (test_name, float, 16, 8, PRIx16, EXPECTED, comment); \ -+ } - - clean_results (); - - /* Execute the tests. */ - TEST_ALL_VSHUFFLE(INSN_NAME); -+#if defined (FP16_SUPPORTED) -+ TEST_VSHUFFLE_FP16 (INSN_NAME); -+#endif - - CHECK_RESULTS_VSHUFFLE (TEST_MSG, expected0, "(chunk 0)"); -+#if defined (FP16_SUPPORTED) -+ CHECK_RESULTS_VSHUFFLE_FP16 (TEST_MSG, expected0, "(chunk 0)"); -+#endif - - TEST_ALL_EXTRA_CHUNKS(); -+#if defined (FP16_SUPPORTED) -+ TEST_EXTRA_CHUNK (float, 16, 4, 1); -+ TEST_EXTRA_CHUNK (float, 16, 8, 1); -+#endif -+ - CHECK_RESULTS_VSHUFFLE (TEST_MSG, expected1, "(chunk 1)"); -+#if defined (FP16_SUPPORTED) -+ CHECK_RESULTS_VSHUFFLE_FP16 (TEST_MSG, expected1, "(chunk 1)"); -+#endif - } - - int main (void) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsli_n.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsli_n.c -@@ -161,14 +161,16 @@ void vsli_extra(void) - CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_shift, COMMENT); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_shift, COMMENT); - CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_shift, COMMENT); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_max_shift, COMMENT); -- CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_max_shift, COMMENT); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_max_shift, COMMENT); -+ CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_max_shift, COMMENT); - CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_shift, COMMENT); - CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_shift, COMMENT); - CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_shift, COMMENT); -+ CHECK(TEST_MSG, int, 64, 2, PRIx64, expected_max_shift, COMMENT); - CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_shift, COMMENT); - CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_shift, COMMENT); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_shift, COMMENT); -- CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_max_shift, COMMENT); -- CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_max_shift, COMMENT); -+ CHECK(TEST_MSG, uint, 64, 2, PRIx64, expected_max_shift, COMMENT); -+ CHECK_POLY(TEST_MSG, poly, 8, 16, PRIx8, expected_max_shift, COMMENT); -+ CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_max_shift, COMMENT); - } ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsqrt_f16_1.c -@@ -0,0 +1,72 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_neon } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+#define FP16_C(a) ((__fp16) a) -+#define A FP16_C (123.4) -+#define B FP16_C (567.8) -+#define C FP16_C (34.8) -+#define D FP16_C (1024) -+#define E FP16_C (663.1) -+#define F FP16_C (144.0) -+#define G FP16_C (4.8) -+#define H FP16_C (77) -+ -+#define SQRT_A 0x498E /* FP16_C (__builtin_sqrtf (123.4)). */ -+#define SQRT_B 0x4DF5 /* FP16_C (__builtin_sqrtf (567.8)). */ -+#define SQRT_C 0x45E6 /* FP16_C (__builtin_sqrtf (34.8)). */ -+#define SQRT_D 0x5000 /* FP16_C (__builtin_sqrtf (1024)). */ -+#define SQRT_E 0x4E70 /* FP16_C (__builtin_sqrtf (663.1)). */ -+#define SQRT_F 0x4A00 /* FP16_C (__builtin_sqrtf (144.0)). */ -+#define SQRT_G 0x4062 /* FP16_C (__builtin_sqrtf (4.8)). */ -+#define SQRT_H 0x4863 /* FP16_C (__builtin_sqrtf (77)). */ -+ -+/* Expected results for vsqrt. */ -+VECT_VAR_DECL (expected_static, hfloat, 16, 4) [] -+ = { SQRT_A, SQRT_B, SQRT_C, SQRT_D }; -+ -+VECT_VAR_DECL (expected_static, hfloat, 16, 8) [] -+ = { SQRT_A, SQRT_B, SQRT_C, SQRT_D, SQRT_E, SQRT_F, SQRT_G, SQRT_H }; -+ -+void exec_vsqrt_f16 (void) -+{ -+#undef TEST_MSG -+#define TEST_MSG "VSQRT (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc, float, 16, 4); -+ VECT_VAR_DECL (buf_src, float, 16, 4) [] = {A, B, C, D}; -+ VLOAD (vsrc, buf_src, , float, f, 16, 4); -+ DECL_VARIABLE (vector_res, float, 16, 4) -+ = vsqrt_f16 (VECT_VAR (vsrc, float, 16, 4)); -+ vst1_f16 (VECT_VAR (result, float, 16, 4), -+ VECT_VAR (vector_res, float, 16, 4)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 4, PRIx16, expected_static, ""); -+ -+#undef TEST_MSG -+#define TEST_MSG "VSQRTQ (FP16)" -+ clean_results (); -+ -+ DECL_VARIABLE(vsrc, float, 16, 8); -+ VECT_VAR_DECL (buf_src, float, 16, 8) [] = {A, B, C, D, E, F, G, H}; -+ VLOAD (vsrc, buf_src, q, float, f, 16, 8); -+ DECL_VARIABLE (vector_res, float, 16, 8) -+ = vsqrtq_f16 (VECT_VAR (vsrc, float, 16, 8)); -+ vst1q_f16 (VECT_VAR (result, float, 16, 8), -+ VECT_VAR (vector_res, float, 16, 8)); -+ -+ CHECK_FP (TEST_MSG, float, 16, 8, PRIx16, expected_static, ""); -+} -+ -+int -+main (void) -+{ -+ exec_vsqrt_f16 (); -+ return 0; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsqrth_f16_1.c -@@ -0,0 +1,40 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected[] = -+{ -+ 0x0000 /* 0.000000 */, -+ 0x8000 /* -0.000000 */, -+ 0x3da8 /* 1.414062 */, -+ 0x3f0b /* 1.760742 */, -+ 0x4479 /* 4.472656 */, -+ 0x390f /* 0.632324 */, -+ 0x7e00 /* nan */, -+ 0x3c9d /* 1.153320 */, -+ 0x7e00 /* nan */, -+ 0x3874 /* 0.556641 */, -+ 0x38a2 /* 0.579102 */, -+ 0x39a8 /* 0.707031 */, -+ 0x3c00 /* 1.000000 */, -+ 0x433f /* 3.623047 */, -+ 0x7e00 /* nan */, -+ 0x4479 /* 4.472656 */, -+ 0x7c00 /* inf */, -+ 0x7e00 /* nan */ -+}; -+ -+#define TEST_MSG "VSQRTH_F16" -+#define INSN_NAME vsqrth_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for unary scalar operations. */ -+#include "unary_scalar_op.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsri_n.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsri_n.c -@@ -163,14 +163,14 @@ void vsri_extra(void) - CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_max_shift, COMMENT); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_max_shift, COMMENT); - CHECK(TEST_MSG, uint, 64, 1, PRIx64, expected_max_shift, COMMENT); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_max_shift, COMMENT); -- CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_max_shift, COMMENT); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_max_shift, COMMENT); -+ CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_max_shift, COMMENT); - CHECK(TEST_MSG, int, 8, 16, PRIx8, expected_max_shift, COMMENT); - CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_max_shift, COMMENT); - CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_max_shift, COMMENT); - CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_max_shift, COMMENT); - CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_max_shift, COMMENT); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_max_shift, COMMENT); -- CHECK(TEST_MSG, poly, 8, 16, PRIx8, expected_max_shift, COMMENT); -- CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_max_shift, COMMENT); -+ CHECK_POLY(TEST_MSG, poly, 8, 16, PRIx8, expected_max_shift, COMMENT); -+ CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_max_shift, COMMENT); - } ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f16_indices_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2_lane_f16_indices_1.c -@@ -2,6 +2,7 @@ - - /* { dg-do compile } */ - /* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+/* { dg-require-effective-target arm_neon_fp16_ok { target { arm*-*-* } } } */ - - void - f_vst2_lane_f16 (float16_t * p, float16x4x2_t v) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f16_indices_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst2q_lane_f16_indices_1.c -@@ -2,6 +2,7 @@ - - /* { dg-do compile } */ - /* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+/* { dg-require-effective-target arm_neon_fp16_ok { target { arm*-*-* } } } */ - - void - f_vst2q_lane_f16 (float16_t * p, float16x8x2_t v) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f16_indices_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3_lane_f16_indices_1.c -@@ -2,6 +2,7 @@ - - /* { dg-do compile } */ - /* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+/* { dg-require-effective-target arm_neon_fp16_ok { target { arm*-*-* } } } */ - - void - f_vst3_lane_f16 (float16_t * p, float16x4x3_t v) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f16_indices_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst3q_lane_f16_indices_1.c -@@ -2,6 +2,7 @@ - - /* { dg-do compile } */ - /* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+/* { dg-require-effective-target arm_neon_fp16_ok { target { arm*-*-* } } } */ - - void - f_vst3q_lane_f16 (float16_t * p, float16x8x3_t v) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f16_indices_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4_lane_f16_indices_1.c -@@ -2,6 +2,7 @@ - - /* { dg-do compile } */ - /* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+/* { dg-require-effective-target arm_neon_fp16_ok { target { arm*-*-* } } } */ - - void - f_vst4_lane_f16 (float16_t * p, float16x4x4_t v) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f16_indices_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vst4q_lane_f16_indices_1.c -@@ -2,6 +2,7 @@ - - /* { dg-do compile } */ - /* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */ -+/* { dg-require-effective-target arm_neon_fp16_ok { target { arm*-*-* } } } */ - - void - f_vst4q_lane_f16 (float16_t * p, float16x8x4_t v) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vstX_lane.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vstX_lane.c -@@ -14,6 +14,7 @@ VECT_VAR_DECL(expected_st2_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; - VECT_VAR_DECL(expected_st2_0,poly,8,8) [] = { 0xf0, 0xf1, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st2_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_st2_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st2_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; - VECT_VAR_DECL(expected_st2_0,int,16,8) [] = { 0xfff0, 0xfff1, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0 }; -@@ -24,6 +25,8 @@ VECT_VAR_DECL(expected_st2_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, - 0x0, 0x0 }; - VECT_VAR_DECL(expected_st2_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_st2_0,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st2_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, - 0x0, 0x0 }; - -@@ -39,6 +42,7 @@ VECT_VAR_DECL(expected_st2_1,uint,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected_st2_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st2_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_st2_1,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st2_1,hfloat,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected_st2_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0 }; -@@ -48,6 +52,8 @@ VECT_VAR_DECL(expected_st2_1,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, - VECT_VAR_DECL(expected_st2_1,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st2_1,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_st2_1,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st2_1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; - - /* Expected results for vst3, chunk 0. */ -@@ -62,6 +68,7 @@ VECT_VAR_DECL(expected_st3_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; - VECT_VAR_DECL(expected_st3_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0x0, - 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st3_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0 }; -+VECT_VAR_DECL(expected_st3_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0x0 }; - VECT_VAR_DECL(expected_st3_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; - VECT_VAR_DECL(expected_st3_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0, - 0x0, 0x0, 0x0, 0x0 }; -@@ -73,6 +80,8 @@ VECT_VAR_DECL(expected_st3_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, - 0xfffffff2, 0x0 }; - VECT_VAR_DECL(expected_st3_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0x0, - 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_st3_0,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st3_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, - 0xc1600000, 0x0 }; - -@@ -88,6 +97,7 @@ VECT_VAR_DECL(expected_st3_1,uint,32,2) [] = { 0xfffffff2, 0x0 }; - VECT_VAR_DECL(expected_st3_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st3_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_st3_1,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st3_1,hfloat,32,2) [] = { 0xc1600000, 0x0 }; - VECT_VAR_DECL(expected_st3_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0 }; -@@ -97,6 +107,8 @@ VECT_VAR_DECL(expected_st3_1,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, - VECT_VAR_DECL(expected_st3_1,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st3_1,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_st3_1,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st3_1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; - - /* Expected results for vst3, chunk 2. */ -@@ -111,6 +123,7 @@ VECT_VAR_DECL(expected_st3_2,uint,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected_st3_2,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st3_2,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_st3_2,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st3_2,hfloat,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected_st3_2,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0 }; -@@ -120,6 +133,8 @@ VECT_VAR_DECL(expected_st3_2,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, - VECT_VAR_DECL(expected_st3_2,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st3_2,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_st3_2,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st3_2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; - - /* Expected results for vst4, chunk 0. */ -@@ -134,6 +149,7 @@ VECT_VAR_DECL(expected_st4_0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; - VECT_VAR_DECL(expected_st4_0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, - 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st4_0,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 }; -+VECT_VAR_DECL(expected_st4_0,hfloat,16,4) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80 }; - VECT_VAR_DECL(expected_st4_0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; - VECT_VAR_DECL(expected_st4_0,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, - 0x0, 0x0, 0x0, 0x0 }; -@@ -145,6 +161,8 @@ VECT_VAR_DECL(expected_st4_0,uint,32,4) [] = { 0xfffffff0, 0xfffffff1, - 0xfffffff2, 0xfffffff3 }; - VECT_VAR_DECL(expected_st4_0,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3, - 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_st4_0,hfloat,16,8) [] = { 0xcc00, 0xcb80, 0xcb00, 0xca80, -+ 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st4_0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, - 0xc1600000, 0xc1500000 }; - -@@ -160,6 +178,7 @@ VECT_VAR_DECL(expected_st4_1,uint,32,2) [] = { 0xfffffff2, 0xfffffff3 }; - VECT_VAR_DECL(expected_st4_1,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st4_1,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_st4_1,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st4_1,hfloat,32,2) [] = { 0xc1600000, 0xc1500000 }; - VECT_VAR_DECL(expected_st4_1,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0 }; -@@ -169,6 +188,8 @@ VECT_VAR_DECL(expected_st4_1,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, - VECT_VAR_DECL(expected_st4_1,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st4_1,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_st4_1,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st4_1,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; - - /* Expected results for vst4, chunk 2. */ -@@ -183,6 +204,7 @@ VECT_VAR_DECL(expected_st4_2,uint,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected_st4_2,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st4_2,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_st4_2,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st4_2,hfloat,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected_st4_2,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0 }; -@@ -192,6 +214,8 @@ VECT_VAR_DECL(expected_st4_2,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, - VECT_VAR_DECL(expected_st4_2,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st4_2,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_st4_2,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st4_2,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; - - /* Expected results for vst4, chunk 3. */ -@@ -206,6 +230,7 @@ VECT_VAR_DECL(expected_st4_3,uint,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected_st4_3,poly,8,8) [] = { 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st4_3,poly,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_st4_3,hfloat,16,4) [] = { 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st4_3,hfloat,32,2) [] = { 0x0, 0x0 }; - VECT_VAR_DECL(expected_st4_3,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0 }; -@@ -215,6 +240,8 @@ VECT_VAR_DECL(expected_st4_3,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0, - VECT_VAR_DECL(expected_st4_3,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st4_3,poly,16,8) [] = { 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0 }; -+VECT_VAR_DECL(expected_st4_3,hfloat,16,8) [] = { 0x0, 0x0, 0x0, 0x0, -+ 0x0, 0x0, 0x0, 0x0 }; - VECT_VAR_DECL(expected_st4_3,hfloat,32,4) [] = { 0x0, 0x0, 0x0, 0x0 }; - - /* Declare additional input buffers as needed. */ -@@ -229,6 +256,9 @@ VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 32, 2); - VECT_VAR_DECL_INIT(buffer_vld2_lane, uint, 64, 2); - VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 8, 2); - VECT_VAR_DECL_INIT(buffer_vld2_lane, poly, 16, 2); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 16, 2); -+#endif - VECT_VAR_DECL_INIT(buffer_vld2_lane, float, 32, 2); - - /* Input buffers for vld3_lane. */ -@@ -242,6 +272,9 @@ VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 32, 3); - VECT_VAR_DECL_INIT(buffer_vld3_lane, uint, 64, 3); - VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 8, 3); - VECT_VAR_DECL_INIT(buffer_vld3_lane, poly, 16, 3); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 16, 3); -+#endif - VECT_VAR_DECL_INIT(buffer_vld3_lane, float, 32, 3); - - /* Input buffers for vld4_lane. */ -@@ -255,6 +288,9 @@ VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 32, 4); - VECT_VAR_DECL_INIT(buffer_vld4_lane, uint, 64, 4); - VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 8, 4); - VECT_VAR_DECL_INIT(buffer_vld4_lane, poly, 16, 4); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 16, 4); -+#endif - VECT_VAR_DECL_INIT(buffer_vld4_lane, float, 32, 4); - - void exec_vstX_lane (void) -@@ -302,7 +338,7 @@ void exec_vstX_lane (void) - - /* We need all variants in 64 bits, but there is no 64x2 variant, - nor 128 bits vectors of int8/uint8/poly8. */ --#define DECL_ALL_VSTX_LANE(X) \ -+#define DECL_ALL_VSTX_LANE_NO_FP16(X) \ - DECL_VSTX_LANE(int, 8, 8, X); \ - DECL_VSTX_LANE(int, 16, 4, X); \ - DECL_VSTX_LANE(int, 32, 2, X); \ -@@ -319,11 +355,20 @@ void exec_vstX_lane (void) - DECL_VSTX_LANE(poly, 16, 8, X); \ - DECL_VSTX_LANE(float, 32, 4, X) - -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+#define DECL_ALL_VSTX_LANE(X) \ -+ DECL_ALL_VSTX_LANE_NO_FP16(X); \ -+ DECL_VSTX_LANE(float, 16, 4, X); \ -+ DECL_VSTX_LANE(float, 16, 8, X) -+#else -+#define DECL_ALL_VSTX_LANE(X) DECL_ALL_VSTX_LANE_NO_FP16(X) -+#endif -+ - #define DUMMY_ARRAY(V, T, W, N, L) VECT_VAR_DECL(V,T,W,N)[N*L] - - /* Use the same lanes regardless of the size of the array (X), for - simplicity. */ --#define TEST_ALL_VSTX_LANE(X) \ -+#define TEST_ALL_VSTX_LANE_NO_FP16(X) \ - TEST_VSTX_LANE(, int, s, 8, 8, X, 7); \ - TEST_VSTX_LANE(, int, s, 16, 4, X, 2); \ - TEST_VSTX_LANE(, int, s, 32, 2, X, 0); \ -@@ -340,7 +385,16 @@ void exec_vstX_lane (void) - TEST_VSTX_LANE(q, poly, p, 16, 8, X, 5); \ - TEST_VSTX_LANE(q, float, f, 32, 4, X, 2) - --#define TEST_ALL_EXTRA_CHUNKS(X, Y) \ -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+#define TEST_ALL_VSTX_LANE(X) \ -+ TEST_ALL_VSTX_LANE_NO_FP16(X); \ -+ TEST_VSTX_LANE(, float, f, 16, 4, X, 2); \ -+ TEST_VSTX_LANE(q, float, f, 16, 8, X, 6) -+#else -+#define TEST_ALL_VSTX_LANE(X) TEST_ALL_VSTX_LANE_NO_FP16(X) -+#endif -+ -+#define TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y) \ - TEST_EXTRA_CHUNK(int, 8, 8, X, Y); \ - TEST_EXTRA_CHUNK(int, 16, 4, X, Y); \ - TEST_EXTRA_CHUNK(int, 32, 2, X, Y); \ -@@ -357,6 +411,15 @@ void exec_vstX_lane (void) - TEST_EXTRA_CHUNK(poly, 16, 8, X, Y); \ - TEST_EXTRA_CHUNK(float, 32, 4, X, Y) - -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+#define TEST_ALL_EXTRA_CHUNKS(X,Y) \ -+ TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y); \ -+ TEST_EXTRA_CHUNK(float, 16, 4, X, Y); \ -+ TEST_EXTRA_CHUNK(float, 16, 8, X, Y) -+#else -+#define TEST_ALL_EXTRA_CHUNKS(X,Y) TEST_ALL_EXTRA_CHUNKS_NO_FP16(X, Y) -+#endif -+ - /* Declare the temporary buffers / variables. */ - DECL_ALL_VSTX_LANE(2); - DECL_ALL_VSTX_LANE(3); -@@ -371,12 +434,18 @@ void exec_vstX_lane (void) - DUMMY_ARRAY(buffer_src, uint, 32, 2, 4); - DUMMY_ARRAY(buffer_src, poly, 8, 8, 4); - DUMMY_ARRAY(buffer_src, poly, 16, 4, 4); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ DUMMY_ARRAY(buffer_src, float, 16, 4, 4); -+#endif - DUMMY_ARRAY(buffer_src, float, 32, 2, 4); - DUMMY_ARRAY(buffer_src, int, 16, 8, 4); - DUMMY_ARRAY(buffer_src, int, 32, 4, 4); - DUMMY_ARRAY(buffer_src, uint, 16, 8, 4); - DUMMY_ARRAY(buffer_src, uint, 32, 4, 4); - DUMMY_ARRAY(buffer_src, poly, 16, 8, 4); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ DUMMY_ARRAY(buffer_src, float, 16, 8, 4); -+#endif - DUMMY_ARRAY(buffer_src, float, 32, 4, 4); - - /* Check vst2_lane/vst2q_lane. */ -@@ -391,15 +460,19 @@ void exec_vstX_lane (void) - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st2_0, CMT); - CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st2_0, CMT); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st2_0, CMT); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st2_0, CMT); -- CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st2_0, CMT); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st2_0, CMT); -+ CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st2_0, CMT); - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st2_0, CMT); - CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st2_0, CMT); - CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st2_0, CMT); - CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st2_0, CMT); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st2_0, CMT); -- CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st2_0, CMT); -+ CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st2_0, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st2_0, CMT); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st2_0, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st2_0, CMT); -+#endif - - TEST_ALL_EXTRA_CHUNKS(2, 1); - #undef CMT -@@ -410,15 +483,19 @@ void exec_vstX_lane (void) - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st2_1, CMT); - CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st2_1, CMT); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st2_1, CMT); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st2_1, CMT); -- CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st2_1, CMT); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st2_1, CMT); -+ CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st2_1, CMT); - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st2_1, CMT); - CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st2_1, CMT); - CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st2_1, CMT); - CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st2_1, CMT); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st2_1, CMT); -- CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st2_1, CMT); -+ CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st2_1, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st2_1, CMT); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st2_1, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st2_1, CMT); -+#endif - - - /* Check vst3_lane/vst3q_lane. */ -@@ -435,15 +512,19 @@ void exec_vstX_lane (void) - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st3_0, CMT); - CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st3_0, CMT); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_0, CMT); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_0, CMT); -- CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_0, CMT); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_0, CMT); -+ CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_0, CMT); - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_0, CMT); - CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_0, CMT); - CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_0, CMT); - CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st3_0, CMT); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st3_0, CMT); -- CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st3_0, CMT); -+ CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st3_0, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st3_0, CMT); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st3_0, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st3_0, CMT); -+#endif - - TEST_ALL_EXTRA_CHUNKS(3, 1); - -@@ -455,15 +536,19 @@ void exec_vstX_lane (void) - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st3_1, CMT); - CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st3_1, CMT); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_1, CMT); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_1, CMT); -- CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_1, CMT); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_1, CMT); -+ CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_1, CMT); - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_1, CMT); - CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_1, CMT); - CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_1, CMT); - CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st3_1, CMT); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st3_1, CMT); -- CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st3_1, CMT); -+ CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st3_1, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st3_1, CMT); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st3_1, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st3_1, CMT); -+#endif - - TEST_ALL_EXTRA_CHUNKS(3, 2); - -@@ -475,15 +560,19 @@ void exec_vstX_lane (void) - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st3_2, CMT); - CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st3_2, CMT); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st3_2, CMT); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_2, CMT); -- CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_2, CMT); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st3_2, CMT); -+ CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st3_2, CMT); - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st3_2, CMT); - CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st3_2, CMT); - CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st3_2, CMT); - CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st3_2, CMT); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st3_2, CMT); -- CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st3_2, CMT); -+ CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st3_2, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st3_2, CMT); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st3_2, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st3_2, CMT); -+#endif - - - /* Check vst4_lane/vst4q_lane. */ -@@ -500,15 +589,19 @@ void exec_vstX_lane (void) - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st4_0, CMT); - CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st4_0, CMT); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_0, CMT); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_0, CMT); -- CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_0, CMT); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_0, CMT); -+ CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_0, CMT); - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_0, CMT); - CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_0, CMT); - CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_0, CMT); - CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st4_0, CMT); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st4_0, CMT); -- CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_0, CMT); -+ CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_0, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st4_0, CMT); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st4_0, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st4_0, CMT); -+#endif - - TEST_ALL_EXTRA_CHUNKS(4, 1); - -@@ -520,15 +613,19 @@ void exec_vstX_lane (void) - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st4_1, CMT); - CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st4_1, CMT); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_1, CMT); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_1, CMT); -- CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_1, CMT); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_1, CMT); -+ CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_1, CMT); - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_1, CMT); - CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_1, CMT); - CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_1, CMT); - CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st4_1, CMT); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st4_1, CMT); -- CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_1, CMT); -+ CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_1, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st4_1, CMT); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st4_1, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st4_1, CMT); -+#endif - - TEST_ALL_EXTRA_CHUNKS(4, 2); - -@@ -540,15 +637,19 @@ void exec_vstX_lane (void) - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st4_2, CMT); - CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st4_2, CMT); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_2, CMT); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_2, CMT); -- CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_2, CMT); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_2, CMT); -+ CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_2, CMT); - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_2, CMT); - CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_2, CMT); - CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_2, CMT); - CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st4_2, CMT); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st4_2, CMT); -- CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_2, CMT); -+ CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_2, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st4_2, CMT); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st4_2, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st4_2, CMT); -+#endif - - TEST_ALL_EXTRA_CHUNKS(4, 3); - -@@ -560,15 +661,19 @@ void exec_vstX_lane (void) - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_st4_3, CMT); - CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_st4_3, CMT); - CHECK(TEST_MSG, uint, 32, 2, PRIx32, expected_st4_3, CMT); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_3, CMT); -- CHECK(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_3, CMT); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_st4_3, CMT); -+ CHECK_POLY(TEST_MSG, poly, 16, 4, PRIx16, expected_st4_3, CMT); - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_st4_3, CMT); - CHECK(TEST_MSG, int, 16, 8, PRIx16, expected_st4_3, CMT); - CHECK(TEST_MSG, int, 32, 4, PRIx32, expected_st4_3, CMT); - CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_st4_3, CMT); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_st4_3, CMT); -- CHECK(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_3, CMT); -+ CHECK_POLY(TEST_MSG, poly, 16, 8, PRIx16, expected_st4_3, CMT); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_st4_3, CMT); -+#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_st4_3, CMT); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_st4_3, CMT); -+#endif - } - - int main (void) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsub.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsub.c -@@ -44,6 +44,14 @@ VECT_VAR_DECL(expected,uint,64,2) [] = { 0xffffffffffffffed, - VECT_VAR_DECL(expected_float32,hfloat,32,2) [] = { 0xc00ccccd, 0xc00ccccd }; - VECT_VAR_DECL(expected_float32,hfloat,32,4) [] = { 0xc00ccccc, 0xc00ccccc, - 0xc00ccccc, 0xc00ccccc }; -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+VECT_VAR_DECL(expected_float16, hfloat, 16, 4) [] = { 0xc066, 0xc066, -+ 0xc066, 0xc066 }; -+VECT_VAR_DECL(expected_float16, hfloat, 16, 8) [] = { 0xc067, 0xc067, -+ 0xc067, 0xc067, -+ 0xc067, 0xc067, -+ 0xc067, 0xc067 }; -+#endif - - void exec_vsub_f32(void) - { -@@ -67,4 +75,27 @@ void exec_vsub_f32(void) - - CHECK_FP(TEST_MSG, float, 32, 2, PRIx32, expected_float32, ""); - CHECK_FP(TEST_MSG, float, 32, 4, PRIx32, expected_float32, ""); -+ -+#if defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ DECL_VARIABLE(vector, float, 16, 4); -+ DECL_VARIABLE(vector, float, 16, 8); -+ -+ DECL_VARIABLE(vector2, float, 16, 4); -+ DECL_VARIABLE(vector2, float, 16, 8); -+ -+ DECL_VARIABLE(vector_res, float, 16, 4); -+ DECL_VARIABLE(vector_res, float, 16, 8); -+ -+ VDUP(vector, , float, f, 16, 4, 2.3f); -+ VDUP(vector, q, float, f, 16, 8, 3.4f); -+ -+ VDUP(vector2, , float, f, 16, 4, 4.5f); -+ VDUP(vector2, q, float, f, 16, 8, 5.6f); -+ -+ TEST_BINARY_OP(INSN_NAME, , float, f, 16, 4); -+ TEST_BINARY_OP(INSN_NAME, q, float, f, 16, 8); -+ -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected_float16, ""); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected_float16, ""); -+#endif - } ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vsubh_f16_1.c -@@ -0,0 +1,42 @@ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_hw } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+#include <arm_fp16.h> -+ -+#define INFF __builtin_inf () -+ -+/* Expected results (16-bit hexadecimal representation). */ -+uint16_t expected[] = -+{ -+ 0xbc00 /* -1.000000 */, -+ 0xbc00 /* -1.000000 */, -+ 0x4654 /* 6.328125 */, -+ 0xd60e /* -96.875000 */, -+ 0xc900 /* -10.000000 */, -+ 0x36b8 /* 0.419922 */, -+ 0xc19a /* -2.800781 */, -+ 0x4848 /* 8.562500 */, -+ 0xbd34 /* -1.300781 */, -+ 0xccec /* -19.687500 */, -+ 0x4791 /* 7.566406 */, -+ 0xbf34 /* -1.800781 */, -+ 0x484d /* 8.601562 */, -+ 0x4804 /* 8.031250 */, -+ 0xc69c /* -6.609375 */, -+ 0x4ceb /* 19.671875 */, -+ 0x7c00 /* inf */, -+ 0xfc00 /* -inf */ -+}; -+ -+#define TEST_MSG "VSUB_F16" -+#define INSN_NAME vsubh_f16 -+ -+#define EXPECTED expected -+ -+#define INPUT_TYPE float16_t -+#define OUTPUT_TYPE float16_t -+#define OUTPUT_TYPE_SIZE 16 -+ -+/* Include the template for binary scalar operations. */ -+#include "binary_scalar_op.inc" ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtbX.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtbX.c -@@ -167,7 +167,7 @@ void exec_vtbX (void) - - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbl1, ""); - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbl1, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl1, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl1, ""); - - /* Check vtbl2. */ - clean_results (); -@@ -177,7 +177,7 @@ void exec_vtbX (void) - - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbl2, ""); - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbl2, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl2, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl2, ""); - - /* Check vtbl3. */ - clean_results (); -@@ -187,7 +187,7 @@ void exec_vtbX (void) - - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbl3, ""); - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbl3, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl3, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl3, ""); - - /* Check vtbl4. */ - clean_results (); -@@ -197,7 +197,7 @@ void exec_vtbX (void) - - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbl4, ""); - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbl4, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl4, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbl4, ""); - - - /* Now test VTBX. */ -@@ -249,7 +249,7 @@ void exec_vtbX (void) - - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbx1, ""); - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbx1, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx1, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx1, ""); - - /* Check vtbx2. */ - clean_results (); -@@ -259,7 +259,7 @@ void exec_vtbX (void) - - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbx2, ""); - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbx2, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx2, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx2, ""); - - /* Check vtbx3. */ - clean_results (); -@@ -269,7 +269,7 @@ void exec_vtbX (void) - - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbx3, ""); - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbx3, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx3, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx3, ""); - - /* Check vtbx4. */ - clean_results (); -@@ -279,7 +279,7 @@ void exec_vtbX (void) - - CHECK(TEST_MSG, int, 8, 8, PRIx8, expected_vtbx4, ""); - CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_vtbx4, ""); -- CHECK(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx4, ""); -+ CHECK_POLY(TEST_MSG, poly, 8, 8, PRIx8, expected_vtbx4, ""); - } - - int main (void) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn.c -@@ -15,6 +15,10 @@ VECT_VAR_DECL(expected0,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 }; - VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0x55, 0x55, - 0xf2, 0xf3, 0x55, 0x55 }; - VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1, 0x66, 0x66 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb80, -+ 0x4b4d, 0x4b4d }; -+#endif - VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; - VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0x11, 0x11, - 0xf2, 0xf3, 0x11, 0x11, -@@ -36,6 +40,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf1, 0x55, 0x55, - 0xf6, 0xf7, 0x55, 0x55 }; - VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff1, 0x66, 0x66, - 0xfff2, 0xfff3, 0x66, 0x66 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, -+ 0x4b4d, 0x4b4d, -+ 0xcb00, 0xca80, -+ 0x4b4d, 0x4b4d }; -+#endif - VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, - 0x42073333, 0x42073333 }; - -@@ -51,6 +61,10 @@ VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 }; - VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf4, 0xf5, 0x55, 0x55, - 0xf6, 0xf7, 0x55, 0x55 }; - VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff2, 0xfff3, 0x66, 0x66 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb00, 0xca80, -+ 0x4b4d, 0x4b4d }; -+#endif - VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 }; - VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf8, 0xf9, 0x11, 0x11, - 0xfa, 0xfb, 0x11, 0x11, -@@ -72,6 +86,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf8, 0xf9, 0x55, 0x55, - 0xfe, 0xff, 0x55, 0x55 }; - VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff4, 0xfff5, 0x66, 0x66, - 0xfff6, 0xfff7, 0x66, 0x66 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xca00, 0xc980, -+ 0x4b4d, 0x4b4d, -+ 0xc900, 0xc880, -+ 0x4b4d, 0x4b4d }; -+#endif - VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1600000, 0xc1500000, - 0x42073333, 0x42073333 }; - ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtrn_half.c -@@ -0,0 +1,263 @@ -+/* { dg-do run } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+/* Expected results. */ -+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0x11, 0xf2, 0x11, -+ 0xf4, 0x11, 0xf6, 0x11 }; -+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0x22, 0xfff2, 0x22 }; -+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0x33 }; -+VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0x55, 0xf2, 0x55, -+ 0xf4, 0x55, 0xf6, 0x55 }; -+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0x66, 0xfff2, 0x66 }; -+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0x77 }; -+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0x55, 0xf2, 0x55, -+ 0xf4, 0x55, 0xf6, 0x55 }; -+VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0x66, 0xfff2, 0x66 }; -+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0x42066666 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0x4b4d, -+ 0xcb00, 0x4b4d }; -+#endif -+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0x11, 0xf2, 0x11, -+ 0xf4, 0x11, 0xf6, 0x11, -+ 0xf8, 0x11, 0xfa, 0x11, -+ 0xfc, 0x11, 0xfe, 0x11 }; -+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0x22, 0xfff2, 0x22, -+ 0xfff4, 0x22, 0xfff6, 0x22 }; -+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0x33, -+ 0xfffffff2, 0x33 }; -+VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, -+ 0x44 }; -+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0x55, 0xf2, 0x55, -+ 0xf4, 0x55, 0xf6, 0x55, -+ 0xf8, 0x55, 0xfa, 0x55, -+ 0xfc, 0x55, 0xfe, 0x55 }; -+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0x66, 0xfff2, 0x66, -+ 0xfff4, 0x66, 0xfff6, 0x66 }; -+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0x77, -+ 0xfffffff2, 0x77 }; -+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, -+ 0x88 }; -+VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0x55, 0xf2, 0x55, -+ 0xf4, 0x55, 0xf6, 0x55, -+ 0xf8, 0x55, 0xfa, 0x55, -+ 0xfc, 0x55, 0xfe, 0x55 }; -+VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0x66, 0xfff2, 0x66, -+ 0xfff4, 0x66, 0xfff6, 0x66 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0x4b4d, -+ 0xcb00, 0x4b4d, -+ 0xca00, 0x4b4d, -+ 0xc900, 0x4b4d }; -+#endif -+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0x42073333, -+ 0xc1600000, 0x42073333 }; -+ -+#define TEST_MSG "VTRN1" -+void exec_vtrn_half (void) -+{ -+#define TEST_VTRN(PART, Q, T1, T2, W, N) \ -+ VECT_VAR(vector_res, T1, W, N) = \ -+ vtrn##PART##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ -+ VECT_VAR(vector2, T1, W, N)); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) -+ -+#define TEST_VTRN1(Q, T1, T2, W, N) TEST_VTRN(1, Q, T1, T2, W, N) -+ -+ /* Input vector can only have 64 bits. */ -+ DECL_VARIABLE_ALL_VARIANTS(vector); -+ DECL_VARIABLE_ALL_VARIANTS(vector2); -+ DECL_VARIABLE(vector, float, 64, 2); -+ DECL_VARIABLE(vector2, float, 64, 2); -+ -+ DECL_VARIABLE_ALL_VARIANTS(vector_res); -+ DECL_VARIABLE(vector_res, float, 64, 2); -+ -+ clean_results (); -+ /* We don't have vtrn1_T64x1, so set expected to the clean value. */ -+ CLEAN(expected, int, 64, 1); -+ CLEAN(expected, uint, 64, 1); -+ -+ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); -+#if defined (FP16_SUPPORTED) -+ VLOAD(vector, buffer, , float, f, 16, 4); -+ VLOAD(vector, buffer, q, float, f, 16, 8); -+#endif -+ VLOAD(vector, buffer, , float, f, 32, 2); -+ VLOAD(vector, buffer, q, float, f, 32, 4); -+ VLOAD(vector, buffer, q, float, f, 64, 2); -+ -+ /* Choose arbitrary initialization values. */ -+ VDUP(vector2, , int, s, 8, 8, 0x11); -+ VDUP(vector2, , int, s, 16, 4, 0x22); -+ VDUP(vector2, , int, s, 32, 2, 0x33); -+ VDUP(vector2, , uint, u, 8, 8, 0x55); -+ VDUP(vector2, , uint, u, 16, 4, 0x66); -+ VDUP(vector2, , uint, u, 32, 2, 0x77); -+ VDUP(vector2, , poly, p, 8, 8, 0x55); -+ VDUP(vector2, , poly, p, 16, 4, 0x66); -+#if defined (FP16_SUPPORTED) -+ VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */ -+#endif -+ VDUP(vector2, , float, f, 32, 2, 33.6f); -+ -+ VDUP(vector2, q, int, s, 8, 16, 0x11); -+ VDUP(vector2, q, int, s, 16, 8, 0x22); -+ VDUP(vector2, q, int, s, 32, 4, 0x33); -+ VDUP(vector2, q, int, s, 64, 2, 0x44); -+ VDUP(vector2, q, uint, u, 8, 16, 0x55); -+ VDUP(vector2, q, uint, u, 16, 8, 0x66); -+ VDUP(vector2, q, uint, u, 32, 4, 0x77); -+ VDUP(vector2, q, uint, u, 64, 2, 0x88); -+ VDUP(vector2, q, poly, p, 8, 16, 0x55); -+ VDUP(vector2, q, poly, p, 16, 8, 0x66); -+#if defined (FP16_SUPPORTED) -+ VDUP (vector2, q, float, f, 16, 8, 14.6f); -+#endif -+ VDUP(vector2, q, float, f, 32, 4, 33.8f); -+ VDUP(vector2, q, float, f, 64, 2, 33.8f); -+ -+ TEST_VTRN1(, int, s, 8, 8); -+ TEST_VTRN1(, int, s, 16, 4); -+ TEST_VTRN1(, int, s, 32, 2); -+ TEST_VTRN1(, uint, u, 8, 8); -+ TEST_VTRN1(, uint, u, 16, 4); -+ TEST_VTRN1(, uint, u, 32, 2); -+ TEST_VTRN1(, poly, p, 8, 8); -+ TEST_VTRN1(, poly, p, 16, 4); -+#if defined (FP16_SUPPORTED) -+ TEST_VTRN1(, float, f, 16, 4); -+#endif -+ TEST_VTRN1(, float, f, 32, 2); -+ -+ TEST_VTRN1(q, int, s, 8, 16); -+ TEST_VTRN1(q, int, s, 16, 8); -+ TEST_VTRN1(q, int, s, 32, 4); -+ TEST_VTRN1(q, int, s, 64, 2); -+ TEST_VTRN1(q, uint, u, 8, 16); -+ TEST_VTRN1(q, uint, u, 16, 8); -+ TEST_VTRN1(q, uint, u, 32, 4); -+ TEST_VTRN1(q, uint, u, 64, 2); -+ TEST_VTRN1(q, poly, p, 8, 16); -+ TEST_VTRN1(q, poly, p, 16, 8); -+#if defined (FP16_SUPPORTED) -+ TEST_VTRN1(q, float, f, 16, 8); -+#endif -+ TEST_VTRN1(q, float, f, 32, 4); -+ TEST_VTRN1(q, float, f, 64, 2); -+ -+#if defined (FP16_SUPPORTED) -+ CHECK_RESULTS (TEST_MSG, ""); -+#else -+ CHECK_RESULTS_NO_FP16 (TEST_MSG, ""); -+#endif -+ -+#undef TEST_MSG -+#define TEST_MSG "VTRN2" -+ -+#define TEST_VTRN2(Q, T1, T2, W, N) TEST_VTRN(2, Q, T1, T2, W, N) -+ -+/* Expected results. */ -+VECT_VAR_DECL(expected2,int,8,8) [] = { 0xf1, 0x11, 0xf3, 0x11, -+ 0xf5, 0x11, 0xf7, 0x11 }; -+VECT_VAR_DECL(expected2,int,16,4) [] = { 0xfff1, 0x22, 0xfff3, 0x22 }; -+VECT_VAR_DECL(expected2,int,32,2) [] = { 0xfffffff1, 0x33 }; -+VECT_VAR_DECL(expected2,int,64,1) [] = { 0xfffffffffffffff1 }; -+VECT_VAR_DECL(expected2,uint,8,8) [] = { 0xf1, 0x55, 0xf3, 0x55, -+ 0xf5, 0x55, 0xf7, 0x55 }; -+VECT_VAR_DECL(expected2,uint,16,4) [] = { 0xfff1, 0x66, 0xfff3, 0x66 }; -+VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xfffffff1, 0x77 }; -+VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff1 }; -+VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf1, 0x55, 0xf3, 0x55, -+ 0xf5, 0x55, 0xf7, 0x55 }; -+VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff1, 0x66, 0xfff3, 0x66 }; -+VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1700000, 0x42066666 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb80, 0x4b4d, -+ 0xca80, 0x4b4d }; -+#endif -+VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf1, 0x11, 0xf3, 0x11, -+ 0xf5, 0x11, 0xf7, 0x11, -+ 0xf9, 0x11, 0xfb, 0x11, -+ 0xfd, 0x11, 0xff, 0x11 }; -+VECT_VAR_DECL(expected2,int,16,8) [] = { 0xfff1, 0x22, 0xfff3, 0x22, -+ 0xfff5, 0x22, 0xfff7, 0x22 }; -+VECT_VAR_DECL(expected2,int,32,4) [] = { 0xfffffff1, 0x33, -+ 0xfffffff3, 0x33 }; -+VECT_VAR_DECL(expected2,int,64,2) [] = { 0xfffffffffffffff1, -+ 0x44 }; -+VECT_VAR_DECL(expected2,uint,8,16) [] = { 0xf1, 0x55, 0xf3, 0x55, -+ 0xf5, 0x55, 0xf7, 0x55, -+ 0xf9, 0x55, 0xfb, 0x55, -+ 0xfd, 0x55, 0xff, 0x55 }; -+VECT_VAR_DECL(expected2,uint,16,8) [] = { 0xfff1, 0x66, 0xfff3, 0x66, -+ 0xfff5, 0x66, 0xfff7, 0x66 }; -+VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xfffffff1, 0x77, -+ 0xfffffff3, 0x77 }; -+VECT_VAR_DECL(expected2,uint,64,2) [] = { 0xfffffffffffffff1, -+ 0x88 }; -+VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf1, 0x55, 0xf3, 0x55, -+ 0xf5, 0x55, 0xf7, 0x55, -+ 0xf9, 0x55, 0xfb, 0x55, -+ 0xfd, 0x55, 0xff, 0x55 }; -+VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff1, 0x66, 0xfff3, 0x66, -+ 0xfff5, 0x66, 0xfff7, 0x66 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xcb80, 0x4b4d, -+ 0xca80, 0x4b4d, -+ 0xc980, 0x4b4d, -+ 0xc880, 0x4b4d }; -+#endif -+VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1700000, 0x42073333, -+ 0xc1500000, 0x42073333 }; -+ clean_results (); -+ CLEAN(expected2, int, 64, 1); -+ CLEAN(expected2, uint, 64, 1); -+ -+ TEST_VTRN2(, int, s, 8, 8); -+ TEST_VTRN2(, int, s, 16, 4); -+ TEST_VTRN2(, int, s, 32, 2); -+ TEST_VTRN2(, uint, u, 8, 8); -+ TEST_VTRN2(, uint, u, 16, 4); -+ TEST_VTRN2(, uint, u, 32, 2); -+ TEST_VTRN2(, poly, p, 8, 8); -+ TEST_VTRN2(, poly, p, 16, 4); -+#if defined (FP16_SUPPORTED) -+ TEST_VTRN2(, float, f, 16, 4); -+#endif -+ TEST_VTRN2(, float, f, 32, 2); -+ -+ TEST_VTRN2(q, int, s, 8, 16); -+ TEST_VTRN2(q, int, s, 16, 8); -+ TEST_VTRN2(q, int, s, 32, 4); -+ TEST_VTRN2(q, int, s, 64, 2); -+ TEST_VTRN2(q, uint, u, 8, 16); -+ TEST_VTRN2(q, uint, u, 16, 8); -+ TEST_VTRN2(q, uint, u, 32, 4); -+ TEST_VTRN2(q, uint, u, 64, 2); -+ TEST_VTRN2(q, poly, p, 8, 16); -+ TEST_VTRN2(q, poly, p, 16, 8); -+#if defined (FP16_SUPPORTED) -+ TEST_VTRN2(q, float, f, 16, 8); -+#endif -+ TEST_VTRN2(q, float, f, 32, 4); -+ TEST_VTRN2(q, float, f, 64, 2); -+ -+ CHECK_RESULTS_NAMED (TEST_MSG, expected2, ""); -+#if defined (FP16_SUPPORTED) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected2, ""); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected2, ""); -+#endif -+} -+ -+int main (void) -+{ -+ exec_vtrn_half (); -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtst.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vtst.c -@@ -32,10 +32,21 @@ VECT_VAR_DECL(expected_unsigned,uint,16,8) [] = { 0x0, 0xffff, - VECT_VAR_DECL(expected_unsigned,uint,32,4) [] = { 0x0, 0xffffffff, - 0x0, 0xffffffff }; - --#ifndef INSN_NAME -+/* Expected results with poly input. */ -+VECT_VAR_DECL(expected_poly,uint,8,8) [] = { 0x0, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_poly,uint,8,16) [] = { 0x0, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff, -+ 0xff, 0xff, 0xff, 0xff }; -+VECT_VAR_DECL(expected_poly,uint,16,4) [] = { 0x0, 0xffff, 0x0, 0xffff }; -+VECT_VAR_DECL(expected_poly,uint,16,8) [] = { 0x0, 0xffff, -+ 0x0, 0xffff, -+ 0xffff, 0xffff, -+ 0xffff, 0xffff }; -+ - #define INSN_NAME vtst - #define TEST_MSG "VTST/VTSTQ" --#endif - - /* We can't use the standard ref_v_binary_op.c template because vtst - has no 64 bits variant, and outputs are always of uint type. */ -@@ -73,12 +84,16 @@ FNNAME (INSN_NAME) - VDUP(vector2, , uint, u, 8, 8, 15); - VDUP(vector2, , uint, u, 16, 4, 5); - VDUP(vector2, , uint, u, 32, 2, 1); -+ VDUP(vector2, , poly, p, 8, 8, 15); -+ VDUP(vector2, , poly, p, 16, 4, 5); - VDUP(vector2, q, int, s, 8, 16, 15); - VDUP(vector2, q, int, s, 16, 8, 5); - VDUP(vector2, q, int, s, 32, 4, 1); - VDUP(vector2, q, uint, u, 8, 16, 15); - VDUP(vector2, q, uint, u, 16, 8, 5); - VDUP(vector2, q, uint, u, 32, 4, 1); -+ VDUP(vector2, q, poly, p, 8, 16, 15); -+ VDUP(vector2, q, poly, p, 16, 8, 5); - - #define TEST_MACRO_NO64BIT_VARIANT_1_5(MACRO, VAR, T1, T2) \ - MACRO(VAR, , T1, T2, 8, 8); \ -@@ -111,6 +126,18 @@ FNNAME (INSN_NAME) - CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_unsigned, CMT); - CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_unsigned, CMT); - CHECK(TEST_MSG, uint, 32, 4, PRIx32, expected_unsigned, CMT); -+ -+ /* Now, test the variants with poly8 and poly16 as input. */ -+#undef CMT -+#define CMT " (poly input)" -+ TEST_BINARY_OP(INSN_NAME, , poly, p, 8, 8); -+ TEST_BINARY_OP(INSN_NAME, , poly, p, 16, 4); -+ TEST_BINARY_OP(INSN_NAME, q, poly, p, 8, 16); -+ TEST_BINARY_OP(INSN_NAME, q, poly, p, 16, 8); -+ CHECK(TEST_MSG, uint, 8, 8, PRIx8, expected_poly, CMT); -+ CHECK(TEST_MSG, uint, 16, 4, PRIx16, expected_poly, CMT); -+ CHECK(TEST_MSG, uint, 8, 16, PRIx8, expected_poly, CMT); -+ CHECK(TEST_MSG, uint, 16, 8, PRIx16, expected_poly, CMT); - } - - int main (void) ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp.c -@@ -19,6 +19,10 @@ VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3, - 0xf4, 0xf5, 0xf6, 0xf7 }; - VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff1, - 0xfff2, 0xfff3 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb80, -+ 0xcb00, 0xca80 }; -+#endif - VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; - VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3, - 0xf4, 0xf5, 0xf6, 0xf7, -@@ -48,6 +52,12 @@ VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff1, - 0xfff2, 0xfff3, - 0xfff4, 0xfff5, - 0xfff6, 0xfff7 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xcb80, -+ 0xcb00, 0xca80, -+ 0xca00, 0xc980, -+ 0xc900, 0xc880 }; -+#endif - VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1700000, - 0xc1600000, 0xc1500000 }; - -@@ -63,6 +73,10 @@ VECT_VAR_DECL(expected1,uint,32,2) [] = { 0x77, 0x77 }; - VECT_VAR_DECL(expected1,poly,8,8) [] = { 0x55, 0x55, 0x55, 0x55, - 0x55, 0x55, 0x55, 0x55 }; - VECT_VAR_DECL(expected1,poly,16,4) [] = { 0x66, 0x66, 0x66, 0x66 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0x4b4d, 0x4b4d, -+ 0x4b4d, 0x4b4d }; -+#endif - VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 }; - VECT_VAR_DECL(expected1,int,8,16) [] = { 0x11, 0x11, 0x11, 0x11, - 0x11, 0x11, 0x11, 0x11, -@@ -84,6 +98,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0x55, 0x55, 0x55, 0x55, - 0x55, 0x55, 0x55, 0x55 }; - VECT_VAR_DECL(expected1,poly,16,8) [] = { 0x66, 0x66, 0x66, 0x66, - 0x66, 0x66, 0x66, 0x66 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0x4b4d, 0x4b4d, -+ 0x4b4d, 0x4b4d, -+ 0x4b4d, 0x4b4d, -+ 0x4b4d, 0x4b4d }; -+#endif - VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0x42073333, 0x42073333, - 0x42073333, 0x42073333 }; - ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vuzp_half.c -@@ -0,0 +1,259 @@ -+/* { dg-do run } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+/* Expected results. */ -+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf2, 0xf4, 0xf6, -+ 0x11, 0x11, 0x11, 0x11 }; -+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff2, 0x22, 0x22 }; -+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0x33 }; -+VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf2, 0xf4, 0xf6, -+ 0x55, 0x55, 0x55, 0x55 }; -+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff2, 0x66, 0x66 }; -+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0x77 }; -+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf2, 0xf4, 0xf6, -+ 0x55, 0x55, 0x55, 0x55 }; -+VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff2, 0x66, 0x66 }; -+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0x42066666 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0xcb00, -+ 0x4b4d, 0x4b4d }; -+#endif -+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf2, 0xf4, 0xf6, -+ 0xf8, 0xfa, 0xfc, 0xfe, -+ 0x11, 0x11, 0x11, 0x11, -+ 0x11, 0x11, 0x11, 0x11 }; -+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff2, 0xfff4, 0xfff6, -+ 0x22, 0x22, 0x22, 0x22 }; -+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff2, -+ 0x33, 0x33 }; -+VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, -+ 0x44 }; -+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf2, 0xf4, 0xf6, -+ 0xf8, 0xfa, 0xfc, 0xfe, -+ 0x55, 0x55, 0x55, 0x55, -+ 0x55, 0x55, 0x55, 0x55 }; -+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff2, 0xfff4, 0xfff6, -+ 0x66, 0x66, 0x66, 0x66 }; -+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff2, 0x77, 0x77 }; -+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, -+ 0x88 }; -+VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf2, 0xf4, 0xf6, -+ 0xf8, 0xfa, 0xfc, 0xfe, -+ 0x55, 0x55, 0x55, 0x55, -+ 0x55, 0x55, 0x55, 0x55 }; -+VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff2, 0xfff4, 0xfff6, -+ 0x66, 0x66, 0x66, 0x66 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0xcb00, 0xca00, 0xc900, -+ 0x4b4d, 0x4b4d, 0x4b4d, 0x4b4d }; -+#endif -+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1600000, -+ 0x42073333, 0x42073333 }; -+ -+#define TEST_MSG "VUZP1" -+void exec_vuzp_half (void) -+{ -+#define TEST_VUZP(PART, Q, T1, T2, W, N) \ -+ VECT_VAR(vector_res, T1, W, N) = \ -+ vuzp##PART##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ -+ VECT_VAR(vector2, T1, W, N)); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) -+ -+#define TEST_VUZP1(Q, T1, T2, W, N) TEST_VUZP(1, Q, T1, T2, W, N) -+ -+ /* Input vector can only have 64 bits. */ -+ DECL_VARIABLE_ALL_VARIANTS(vector); -+ DECL_VARIABLE_ALL_VARIANTS(vector2); -+ DECL_VARIABLE(vector, float, 64, 2); -+ DECL_VARIABLE(vector2, float, 64, 2); -+ -+ DECL_VARIABLE_ALL_VARIANTS(vector_res); -+ DECL_VARIABLE(vector_res, float, 64, 2); -+ -+ clean_results (); -+ /* We don't have vuzp1_T64x1, so set expected to the clean value. */ -+ CLEAN(expected, int, 64, 1); -+ CLEAN(expected, uint, 64, 1); -+ -+ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); -+#if defined (FP16_SUPPORTED) -+ VLOAD(vector, buffer, , float, f, 16, 4); -+ VLOAD(vector, buffer, q, float, f, 16, 8); -+#endif -+ VLOAD(vector, buffer, , float, f, 32, 2); -+ VLOAD(vector, buffer, q, float, f, 32, 4); -+ VLOAD(vector, buffer, q, float, f, 64, 2); -+ -+ /* Choose arbitrary initialization values. */ -+ VDUP(vector2, , int, s, 8, 8, 0x11); -+ VDUP(vector2, , int, s, 16, 4, 0x22); -+ VDUP(vector2, , int, s, 32, 2, 0x33); -+ VDUP(vector2, , uint, u, 8, 8, 0x55); -+ VDUP(vector2, , uint, u, 16, 4, 0x66); -+ VDUP(vector2, , uint, u, 32, 2, 0x77); -+ VDUP(vector2, , poly, p, 8, 8, 0x55); -+ VDUP(vector2, , poly, p, 16, 4, 0x66); -+#if defined (FP16_SUPPORTED) -+ VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */ -+#endif -+ VDUP(vector2, , float, f, 32, 2, 33.6f); -+ -+ VDUP(vector2, q, int, s, 8, 16, 0x11); -+ VDUP(vector2, q, int, s, 16, 8, 0x22); -+ VDUP(vector2, q, int, s, 32, 4, 0x33); -+ VDUP(vector2, q, int, s, 64, 2, 0x44); -+ VDUP(vector2, q, uint, u, 8, 16, 0x55); -+ VDUP(vector2, q, uint, u, 16, 8, 0x66); -+ VDUP(vector2, q, uint, u, 32, 4, 0x77); -+ VDUP(vector2, q, uint, u, 64, 2, 0x88); -+ VDUP(vector2, q, poly, p, 8, 16, 0x55); -+ VDUP(vector2, q, poly, p, 16, 8, 0x66); -+#if defined (FP16_SUPPORTED) -+ VDUP (vector2, q, float, f, 16, 8, 14.6f); -+#endif -+ VDUP(vector2, q, float, f, 32, 4, 33.8f); -+ VDUP(vector2, q, float, f, 64, 2, 33.8f); -+ -+ TEST_VUZP1(, int, s, 8, 8); -+ TEST_VUZP1(, int, s, 16, 4); -+ TEST_VUZP1(, int, s, 32, 2); -+ TEST_VUZP1(, uint, u, 8, 8); -+ TEST_VUZP1(, uint, u, 16, 4); -+ TEST_VUZP1(, uint, u, 32, 2); -+ TEST_VUZP1(, poly, p, 8, 8); -+ TEST_VUZP1(, poly, p, 16, 4); -+#if defined (FP16_SUPPORTED) -+ TEST_VUZP1(, float, f, 16, 4); -+#endif -+ TEST_VUZP1(, float, f, 32, 2); -+ -+ TEST_VUZP1(q, int, s, 8, 16); -+ TEST_VUZP1(q, int, s, 16, 8); -+ TEST_VUZP1(q, int, s, 32, 4); -+ TEST_VUZP1(q, int, s, 64, 2); -+ TEST_VUZP1(q, uint, u, 8, 16); -+ TEST_VUZP1(q, uint, u, 16, 8); -+ TEST_VUZP1(q, uint, u, 32, 4); -+ TEST_VUZP1(q, uint, u, 64, 2); -+ TEST_VUZP1(q, poly, p, 8, 16); -+ TEST_VUZP1(q, poly, p, 16, 8); -+#if defined (FP16_SUPPORTED) -+ TEST_VUZP1(q, float, f, 16, 8); -+#endif -+ TEST_VUZP1(q, float, f, 32, 4); -+ TEST_VUZP1(q, float, f, 64, 2); -+ -+#if defined (FP16_SUPPORTED) -+ CHECK_RESULTS (TEST_MSG, ""); -+#else -+ CHECK_RESULTS_NO_FP16 (TEST_MSG, ""); -+#endif -+ -+#undef TEST_MSG -+#define TEST_MSG "VUZP2" -+ -+#define TEST_VUZP2(Q, T1, T2, W, N) TEST_VUZP(2, Q, T1, T2, W, N) -+ -+/* Expected results. */ -+VECT_VAR_DECL(expected2,int,8,8) [] = { 0xf1, 0xf3, 0xf5, 0xf7, -+ 0x11, 0x11, 0x11, 0x11 }; -+VECT_VAR_DECL(expected2,int,16,4) [] = { 0xfff1, 0xfff3, 0x22, 0x22 }; -+VECT_VAR_DECL(expected2,int,32,2) [] = { 0xfffffff1, 0x33 }; -+VECT_VAR_DECL(expected2,int,64,1) [] = { 0xfffffffffffffff1 }; -+VECT_VAR_DECL(expected2,uint,8,8) [] = { 0xf1, 0xf3, 0xf5, 0xf7, -+ 0x55, 0x55, 0x55, 0x55 }; -+VECT_VAR_DECL(expected2,uint,16,4) [] = { 0xfff1, 0xfff3, 0x66, 0x66 }; -+VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xfffffff1, 0x77 }; -+VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff1 }; -+VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf1, 0xf3, 0xf5, 0xf7, -+ 0x55, 0x55, 0x55, 0x55 }; -+VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff1, 0xfff3, 0x66, 0x66 }; -+VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1700000, 0x42066666 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb80, 0xca80, -+ 0x4b4d, 0x4b4d }; -+#endif -+VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf1, 0xf3, 0xf5, 0xf7, -+ 0xf9, 0xfb, 0xfd, 0xff, -+ 0x11, 0x11, 0x11, 0x11, -+ 0x11, 0x11, 0x11, 0x11 }; -+VECT_VAR_DECL(expected2,int,16,8) [] = { 0xfff1, 0xfff3, 0xfff5, 0xfff7, -+ 0x22, 0x22, 0x22, 0x22 }; -+VECT_VAR_DECL(expected2,int,32,4) [] = { 0xfffffff1, 0xfffffff3, -+ 0x33, 0x33 }; -+VECT_VAR_DECL(expected2,int,64,2) [] = { 0xfffffffffffffff1, -+ 0x44 }; -+VECT_VAR_DECL(expected2,uint,8,16) [] = { 0xf1, 0xf3, 0xf5, 0xf7, -+ 0xf9, 0xfb, 0xfd, 0xff, -+ 0x55, 0x55, 0x55, 0x55, -+ 0x55, 0x55, 0x55, 0x55 }; -+VECT_VAR_DECL(expected2,uint,16,8) [] = { 0xfff1, 0xfff3, 0xfff5, 0xfff7, -+ 0x66, 0x66, 0x66, 0x66 }; -+VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xfffffff1, 0xfffffff3, 0x77, 0x77 }; -+VECT_VAR_DECL(expected2,uint,64,2) [] = { 0xfffffffffffffff1, -+ 0x88 }; -+VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf1, 0xf3, 0xf5, 0xf7, -+ 0xf9, 0xfb, 0xfd, 0xff, -+ 0x55, 0x55, 0x55, 0x55, -+ 0x55, 0x55, 0x55, 0x55 }; -+VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff1, 0xfff3, 0xfff5, 0xfff7, -+ 0x66, 0x66, 0x66, 0x66 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xcb80, 0xca80, 0xc980, 0xc880, -+ 0x4b4d, 0x4b4d, 0x4b4d, 0x4b4d -+ }; -+#endif -+VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1700000, 0xc1500000, -+ 0x42073333, 0x42073333 }; -+ -+ clean_results (); -+ CLEAN(expected2, int, 64, 1); -+ CLEAN(expected2, uint, 64, 1); -+ -+ TEST_VUZP2(, int, s, 8, 8); -+ TEST_VUZP2(, int, s, 16, 4); -+ TEST_VUZP2(, int, s, 32, 2); -+ TEST_VUZP2(, uint, u, 8, 8); -+ TEST_VUZP2(, uint, u, 16, 4); -+ TEST_VUZP2(, uint, u, 32, 2); -+ TEST_VUZP2(, poly, p, 8, 8); -+ TEST_VUZP2(, poly, p, 16, 4); -+#if defined (FP16_SUPPORTED) -+ TEST_VUZP2(, float, f, 16, 4); -+#endif -+ TEST_VUZP2(, float, f, 32, 2); -+ -+ TEST_VUZP2(q, int, s, 8, 16); -+ TEST_VUZP2(q, int, s, 16, 8); -+ TEST_VUZP2(q, int, s, 32, 4); -+ TEST_VUZP2(q, int, s, 64, 2); -+ TEST_VUZP2(q, uint, u, 8, 16); -+ TEST_VUZP2(q, uint, u, 16, 8); -+ TEST_VUZP2(q, uint, u, 32, 4); -+ TEST_VUZP2(q, uint, u, 64, 2); -+ TEST_VUZP2(q, poly, p, 8, 16); -+ TEST_VUZP2(q, poly, p, 16, 8); -+#if defined (FP16_SUPPORTED) -+ TEST_VUZP2(q, float, f, 16, 8); -+#endif -+ TEST_VUZP2(q, float, f, 32, 4); -+ TEST_VUZP2(q, float, f, 64, 2); -+ -+ CHECK_RESULTS_NAMED (TEST_MSG, expected2, ""); -+#if defined (FP16_SUPPORTED) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected2, ""); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected2, ""); -+#endif -+} -+ -+int main (void) -+{ -+ exec_vuzp_half (); -+ return 0; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip.c -@@ -18,6 +18,10 @@ VECT_VAR_DECL(expected0,poly,8,8) [] = { 0xf0, 0xf4, 0x55, 0x55, - 0xf1, 0xf5, 0x55, 0x55 }; - VECT_VAR_DECL(expected0,poly,16,4) [] = { 0xfff0, 0xfff2, - 0x66, 0x66 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected0, hfloat, 16, 4) [] = { 0xcc00, 0xcb00, -+ 0x4b4d, 0x4b4d }; -+#endif - VECT_VAR_DECL(expected0,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 }; - VECT_VAR_DECL(expected0,int,8,16) [] = { 0xf0, 0xf8, 0x11, 0x11, - 0xf1, 0xf9, 0x11, 0x11, -@@ -41,6 +45,12 @@ VECT_VAR_DECL(expected0,poly,8,16) [] = { 0xf0, 0xf8, 0x55, 0x55, - 0xf3, 0xfb, 0x55, 0x55 }; - VECT_VAR_DECL(expected0,poly,16,8) [] = { 0xfff0, 0xfff4, 0x66, 0x66, - 0xfff1, 0xfff5, 0x66, 0x66 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected0, hfloat, 16, 8) [] = { 0xcc00, 0xca00, -+ 0x4b4d, 0x4b4d, -+ 0xcb80, 0xc980, -+ 0x4b4d, 0x4b4d }; -+#endif - VECT_VAR_DECL(expected0,hfloat,32,4) [] = { 0xc1800000, 0xc1600000, - 0x42073333, 0x42073333 }; - -@@ -59,6 +69,10 @@ VECT_VAR_DECL(expected1,poly,8,8) [] = { 0xf2, 0xf6, 0x55, 0x55, - 0xf3, 0xf7, 0x55, 0x55 }; - VECT_VAR_DECL(expected1,poly,16,4) [] = { 0xfff1, 0xfff3, - 0x66, 0x66 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected1, hfloat, 16, 4) [] = { 0xcb80, 0xca80, -+ 0x4b4d, 0x4b4d }; -+#endif - VECT_VAR_DECL(expected1,hfloat,32,2) [] = { 0x42066666, 0x42066666 }; - VECT_VAR_DECL(expected1,int,8,16) [] = { 0xf4, 0xfc, 0x11, 0x11, - 0xf5, 0xfd, 0x11, 0x11, -@@ -82,6 +96,12 @@ VECT_VAR_DECL(expected1,poly,8,16) [] = { 0xf4, 0xfc, 0x55, 0x55, - 0xf7, 0xff, 0x55, 0x55 }; - VECT_VAR_DECL(expected1,poly,16,8) [] = { 0xfff2, 0xfff6, 0x66, 0x66, - 0xfff3, 0xfff7, 0x66, 0x66 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected1, hfloat, 16, 8) [] = { 0xcb00, 0xc900, -+ 0x4b4d, 0x4b4d, -+ 0xca80, 0xc880, -+ 0x4b4d, 0x4b4d }; -+#endif - VECT_VAR_DECL(expected1,hfloat,32,4) [] = { 0xc1700000, 0xc1500000, - 0x42073333, 0x42073333 }; - ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vzip_half.c -@@ -0,0 +1,263 @@ -+/* { dg-do run } */ -+/* { dg-skip-if "" { arm*-*-* } } */ -+ -+#include <arm_neon.h> -+#include "arm-neon-ref.h" -+#include "compute-ref-data.h" -+ -+/* Expected results. */ -+VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0x11, 0xf1, 0x11, -+ 0xf2, 0x11, 0xf3, 0x11 }; -+VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0x22, 0xfff1, 0x22 }; -+VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0x33 }; -+VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0x55, 0xf1, 0x55, -+ 0xf2, 0x55, 0xf3, 0x55 }; -+VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0x66, 0xfff1, 0x66 }; -+VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0x77 }; -+VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 }; -+VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0x55, 0xf1, 0x55, -+ 0xf2, 0x55, 0xf3, 0x55 }; -+VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0x66, 0xfff1, 0x66 }; -+VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0x42066666 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected, hfloat, 16, 4) [] = { 0xcc00, 0x4b4d, -+ 0xcb80, 0x4b4d }; -+#endif -+VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0x11, 0xf1, 0x11, -+ 0xf2, 0x11, 0xf3, 0x11, -+ 0xf4, 0x11, 0xf5, 0x11, -+ 0xf6, 0x11, 0xf7, 0x11 }; -+VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0x22, 0xfff1, 0x22, -+ 0xfff2, 0x22, 0xfff3, 0x22 }; -+VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0x33, -+ 0xfffffff1, 0x33 }; -+VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0, -+ 0x44 }; -+VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0x55, 0xf1, 0x55, -+ 0xf2, 0x55, 0xf3, 0x55, -+ 0xf4, 0x55, 0xf5, 0x55, -+ 0xf6, 0x55, 0xf7, 0x55 }; -+VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0x66, 0xfff1, 0x66, -+ 0xfff2, 0x66, 0xfff3, 0x66 }; -+VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0x77, -+ 0xfffffff1, 0x77 }; -+VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0, -+ 0x88 }; -+VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0x55, 0xf1, 0x55, -+ 0xf2, 0x55, 0xf3, 0x55, -+ 0xf4, 0x55, 0xf5, 0x55, -+ 0xf6, 0x55, 0xf7, 0x55 }; -+VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0x66, 0xfff1, 0x66, -+ 0xfff2, 0x66, 0xfff3, 0x66 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected, hfloat, 16, 8) [] = { 0xcc00, 0x4b4d, -+ 0xcb80, 0x4b4d, -+ 0xcb00, 0x4b4d, -+ 0xca80, 0x4b4d }; -+#endif -+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0x42073333, -+ 0xc1700000, 0x42073333 }; -+ -+#define TEST_MSG "VZIP1" -+void exec_vzip_half (void) -+{ -+#define TEST_VZIP(PART, Q, T1, T2, W, N) \ -+ VECT_VAR(vector_res, T1, W, N) = \ -+ vzip##PART##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \ -+ VECT_VAR(vector2, T1, W, N)); \ -+ vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N)) -+ -+#define TEST_VZIP1(Q, T1, T2, W, N) TEST_VZIP(1, Q, T1, T2, W, N) -+ -+ /* Input vector can only have 64 bits. */ -+ DECL_VARIABLE_ALL_VARIANTS(vector); -+ DECL_VARIABLE_ALL_VARIANTS(vector2); -+ DECL_VARIABLE(vector, float, 64, 2); -+ DECL_VARIABLE(vector2, float, 64, 2); -+ -+ DECL_VARIABLE_ALL_VARIANTS(vector_res); -+ DECL_VARIABLE(vector_res, float, 64, 2); -+ -+ clean_results (); -+ /* We don't have vzip1_T64x1, so set expected to the clean value. */ -+ CLEAN(expected, int, 64, 1); -+ CLEAN(expected, uint, 64, 1); -+ -+ TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer); -+#if defined (FP16_SUPPORTED) -+ VLOAD(vector, buffer, , float, f, 16, 4); -+ VLOAD(vector, buffer, q, float, f, 16, 8); -+#endif -+ VLOAD(vector, buffer, , float, f, 32, 2); -+ VLOAD(vector, buffer, q, float, f, 32, 4); -+ VLOAD(vector, buffer, q, float, f, 64, 2); -+ -+ /* Choose arbitrary initialization values. */ -+ VDUP(vector2, , int, s, 8, 8, 0x11); -+ VDUP(vector2, , int, s, 16, 4, 0x22); -+ VDUP(vector2, , int, s, 32, 2, 0x33); -+ VDUP(vector2, , uint, u, 8, 8, 0x55); -+ VDUP(vector2, , uint, u, 16, 4, 0x66); -+ VDUP(vector2, , uint, u, 32, 2, 0x77); -+ VDUP(vector2, , poly, p, 8, 8, 0x55); -+ VDUP(vector2, , poly, p, 16, 4, 0x66); -+#if defined (FP16_SUPPORTED) -+ VDUP (vector2, , float, f, 16, 4, 14.6f); /* 14.6f is 0x4b4d. */ -+#endif -+ VDUP(vector2, , float, f, 32, 2, 33.6f); -+ -+ VDUP(vector2, q, int, s, 8, 16, 0x11); -+ VDUP(vector2, q, int, s, 16, 8, 0x22); -+ VDUP(vector2, q, int, s, 32, 4, 0x33); -+ VDUP(vector2, q, int, s, 64, 2, 0x44); -+ VDUP(vector2, q, uint, u, 8, 16, 0x55); -+ VDUP(vector2, q, uint, u, 16, 8, 0x66); -+ VDUP(vector2, q, uint, u, 32, 4, 0x77); -+ VDUP(vector2, q, uint, u, 64, 2, 0x88); -+ VDUP(vector2, q, poly, p, 8, 16, 0x55); -+ VDUP(vector2, q, poly, p, 16, 8, 0x66); -+#if defined (FP16_SUPPORTED) -+ VDUP (vector2, q, float, f, 16, 8, 14.6f); -+#endif -+ VDUP(vector2, q, float, f, 32, 4, 33.8f); -+ VDUP(vector2, q, float, f, 64, 2, 33.8f); -+ -+ TEST_VZIP1(, int, s, 8, 8); -+ TEST_VZIP1(, int, s, 16, 4); -+ TEST_VZIP1(, int, s, 32, 2); -+ TEST_VZIP1(, uint, u, 8, 8); -+ TEST_VZIP1(, uint, u, 16, 4); -+ TEST_VZIP1(, uint, u, 32, 2); -+ TEST_VZIP1(, poly, p, 8, 8); -+ TEST_VZIP1(, poly, p, 16, 4); -+#if defined (FP16_SUPPORTED) -+ TEST_VZIP1(, float, f, 16, 4); -+#endif -+ TEST_VZIP1(, float, f, 32, 2); -+ -+ TEST_VZIP1(q, int, s, 8, 16); -+ TEST_VZIP1(q, int, s, 16, 8); -+ TEST_VZIP1(q, int, s, 32, 4); -+ TEST_VZIP1(q, int, s, 64, 2); -+ TEST_VZIP1(q, uint, u, 8, 16); -+ TEST_VZIP1(q, uint, u, 16, 8); -+ TEST_VZIP1(q, uint, u, 32, 4); -+ TEST_VZIP1(q, uint, u, 64, 2); -+ TEST_VZIP1(q, poly, p, 8, 16); -+ TEST_VZIP1(q, poly, p, 16, 8); -+#if defined (FP16_SUPPORTED) -+ TEST_VZIP1(q, float, f, 16, 8); -+#endif -+ TEST_VZIP1(q, float, f, 32, 4); -+ TEST_VZIP1(q, float, f, 64, 2); -+ -+#if defined (FP16_SUPPORTED) -+ CHECK_RESULTS (TEST_MSG, ""); -+#else -+ CHECK_RESULTS_NO_FP16 (TEST_MSG, ""); -+#endif -+ -+#undef TEST_MSG -+#define TEST_MSG "VZIP2" -+ -+#define TEST_VZIP2(Q, T1, T2, W, N) TEST_VZIP(2, Q, T1, T2, W, N) -+ -+/* Expected results. */ -+VECT_VAR_DECL(expected2,int,8,8) [] = { 0xf4, 0x11, 0xf5, 0x11, -+ 0xf6, 0x11, 0xf7, 0x11 }; -+VECT_VAR_DECL(expected2,int,16,4) [] = { 0xfff2, 0x22, 0xfff3, 0x22 }; -+VECT_VAR_DECL(expected2,int,32,2) [] = { 0xfffffff1, 0x33 }; -+VECT_VAR_DECL(expected2,int,64,1) [] = { 0xfffffffffffffff1 }; -+VECT_VAR_DECL(expected2,uint,8,8) [] = { 0xf4, 0x55, 0xf5, 0x55, -+ 0xf6, 0x55, 0xf7, 0x55 }; -+VECT_VAR_DECL(expected2,uint,16,4) [] = { 0xfff2, 0x66, 0xfff3, 0x66 }; -+VECT_VAR_DECL(expected2,uint,32,2) [] = { 0xfffffff1, 0x77 }; -+VECT_VAR_DECL(expected2,uint,64,1) [] = { 0xfffffffffffffff1 }; -+VECT_VAR_DECL(expected2,poly,8,8) [] = { 0xf4, 0x55, 0xf5, 0x55, -+ 0xf6, 0x55, 0xf7, 0x55 }; -+VECT_VAR_DECL(expected2,poly,16,4) [] = { 0xfff2, 0x66, 0xfff3, 0x66 }; -+VECT_VAR_DECL(expected2,hfloat,32,2) [] = { 0xc1700000, 0x42066666 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected2, hfloat, 16, 4) [] = { 0xcb00, 0x4b4d, -+ 0xca80, 0x4b4d }; -+#endif -+VECT_VAR_DECL(expected2,int,8,16) [] = { 0xf8, 0x11, 0xf9, 0x11, -+ 0xfa, 0x11, 0xfb, 0x11, -+ 0xfc, 0x11, 0xfd, 0x11, -+ 0xfe, 0x11, 0xff, 0x11 }; -+VECT_VAR_DECL(expected2,int,16,8) [] = { 0xfff4, 0x22, 0xfff5, 0x22, -+ 0xfff6, 0x22, 0xfff7, 0x22 }; -+VECT_VAR_DECL(expected2,int,32,4) [] = { 0xfffffff2, 0x33, -+ 0xfffffff3, 0x33 }; -+VECT_VAR_DECL(expected2,int,64,2) [] = { 0xfffffffffffffff1, -+ 0x44 }; -+VECT_VAR_DECL(expected2,uint,8,16) [] = { 0xf8, 0x55, 0xf9, 0x55, -+ 0xfa, 0x55, 0xfb, 0x55, -+ 0xfc, 0x55, 0xfd, 0x55, -+ 0xfe, 0x55, 0xff, 0x55 }; -+VECT_VAR_DECL(expected2,uint,16,8) [] = { 0xfff4, 0x66, 0xfff5, 0x66, -+ 0xfff6, 0x66, 0xfff7, 0x66 }; -+VECT_VAR_DECL(expected2,uint,32,4) [] = { 0xfffffff2, 0x77, -+ 0xfffffff3, 0x77 }; -+VECT_VAR_DECL(expected2,uint,64,2) [] = { 0xfffffffffffffff1, -+ 0x88 }; -+VECT_VAR_DECL(expected2,poly,8,16) [] = { 0xf8, 0x55, 0xf9, 0x55, -+ 0xfa, 0x55, 0xfb, 0x55, -+ 0xfc, 0x55, 0xfd, 0x55, -+ 0xfe, 0x55, 0xff, 0x55 }; -+VECT_VAR_DECL(expected2,poly,16,8) [] = { 0xfff4, 0x66, 0xfff5, 0x66, -+ 0xfff6, 0x66, 0xfff7, 0x66 }; -+#if defined (FP16_SUPPORTED) -+VECT_VAR_DECL (expected2, hfloat, 16, 8) [] = { 0xca00, 0x4b4d, -+ 0xc980, 0x4b4d, -+ 0xc900, 0x4b4d, -+ 0xc880, 0x4b4d }; -+#endif -+VECT_VAR_DECL(expected2,hfloat,32,4) [] = { 0xc1600000, 0x42073333, -+ 0xc1500000, 0x42073333 }; -+ clean_results (); -+ CLEAN(expected2, int, 64, 1); -+ CLEAN(expected2, uint, 64, 1); -+ -+ TEST_VZIP2(, int, s, 8, 8); -+ TEST_VZIP2(, int, s, 16, 4); -+ TEST_VZIP2(, int, s, 32, 2); -+ TEST_VZIP2(, uint, u, 8, 8); -+ TEST_VZIP2(, uint, u, 16, 4); -+ TEST_VZIP2(, uint, u, 32, 2); -+ TEST_VZIP2(, poly, p, 8, 8); -+ TEST_VZIP2(, poly, p, 16, 4); -+#if defined (FP16_SUPPORTED) -+ TEST_VZIP2(, float, f, 16, 4); -+#endif -+ TEST_VZIP2(, float, f, 32, 2); -+ -+ TEST_VZIP2(q, int, s, 8, 16); -+ TEST_VZIP2(q, int, s, 16, 8); -+ TEST_VZIP2(q, int, s, 32, 4); -+ TEST_VZIP2(q, int, s, 64, 2); -+ TEST_VZIP2(q, uint, u, 8, 16); -+ TEST_VZIP2(q, uint, u, 16, 8); -+ TEST_VZIP2(q, uint, u, 32, 4); -+ TEST_VZIP2(q, uint, u, 64, 2); -+ TEST_VZIP2(q, poly, p, 8, 16); -+ TEST_VZIP2(q, poly, p, 16, 8); -+#if defined (FP16_SUPPORTED) -+ TEST_VZIP2(q, float, f, 16, 8); -+#endif -+ TEST_VZIP2(q, float, f, 32, 4); -+ TEST_VZIP2(q, float, f, 64, 2); -+ -+ CHECK_RESULTS_NAMED (TEST_MSG, expected2, ""); -+#if defined (FP16_SUPPORTED) -+ CHECK_FP(TEST_MSG, float, 16, 4, PRIx16, expected2, ""); -+ CHECK_FP(TEST_MSG, float, 16, 8, PRIx16, expected2, ""); -+#endif -+} -+ -+int main (void) -+{ -+ exec_vzip_half (); -+ return 0; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/ands_3.c -@@ -0,0 +1,12 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+int -+f9 (unsigned char x, int y) -+{ -+ if (y > 1 && x == 0) -+ return 10; -+ return x; -+} -+ -+/* { dg-final { scan-assembler "ands\t(x|w)\[0-9\]+,\[ \t\]*(x|w)\[0-9\]+,\[ \t\]*255" } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/cpu-diagnostics-1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/cpu-diagnostics-1.c -@@ -1,4 +1,5 @@ - /* { dg-error "unknown" "" {target "aarch64*-*-*" } } */ -+/* { dg-skip-if "do not override -mcpu" { *-*-* } { "-mcpu=*" } { "" } } */ - /* { dg-options "-O2 -mcpu=dummy" } */ - - void f () ---- a/src/gcc/testsuite/gcc.target/aarch64/cpu-diagnostics-2.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/cpu-diagnostics-2.c -@@ -1,4 +1,5 @@ - /* { dg-error "missing" "" {target "aarch64*-*-*" } } */ -+/* { dg-skip-if "do not override -mcpu" { *-*-* } { "-mcpu=*" } { "" } } */ - /* { dg-options "-O2 -mcpu=cortex-a53+no" } */ - - void f () ---- a/src/gcc/testsuite/gcc.target/aarch64/cpu-diagnostics-3.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/cpu-diagnostics-3.c -@@ -1,4 +1,5 @@ - /* { dg-error "invalid feature" "" {target "aarch64*-*-*" } } */ -+/* { dg-skip-if "do not override -mcpu" { *-*-* } { "-mcpu=*" } { "" } } */ - /* { dg-options "-O2 -mcpu=cortex-a53+dummy" } */ - - void f () ---- a/src/gcc/testsuite/gcc.target/aarch64/cpu-diagnostics-4.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/cpu-diagnostics-4.c -@@ -1,4 +1,5 @@ - /* { dg-error "missing" "" {target "aarch64*-*-*" } } */ -+/* { dg-skip-if "do not override -mcpu" { *-*-* } { "-mcpu=*" } { "" } } */ - /* { dg-options "-O2 -mcpu=+dummy" } */ - - void f () ---- a/src/gcc/testsuite/gcc.target/aarch64/fmaxmin.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/fmaxmin.c -@@ -1,5 +1,5 @@ - /* { dg-do run } */ --/* { dg-options "-O2 -ftree-vectorize -fno-inline -save-temps" } */ -+/* { dg-options "-O2 -ftree-vectorize -fno-inline -fno-vect-cost-model -save-temps" } */ - - - extern void abort (void); ---- a/src/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/fmla_intrinsic_1.c -@@ -110,6 +110,6 @@ main (int argc, char **argv) - /* vfmaq_lane_f64. - vfma_laneq_f64. - vfmaq_laneq_f64. */ --/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d\\\[\[0-9\]+\\\]" 3 } } */ -+/* { dg-final { scan-assembler-times "fmla\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2?d\\\[\[0-9\]+\\\]" 3 } } */ - - ---- a/src/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/fmls_intrinsic_1.c -@@ -111,6 +111,6 @@ main (int argc, char **argv) - /* vfmsq_lane_f64. - vfms_laneq_f64. - vfmsq_laneq_f64. */ --/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d\\\[\[0-9\]+\\\]" 3 } } */ -+/* { dg-final { scan-assembler-times "fmls\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2?d\\\[\[0-9\]+\\\]" 3 } } */ - - ---- a/src/gcc/testsuite/gcc.target/aarch64/fmovd-zero-reg.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/fmovd-zero-reg.c -@@ -8,4 +8,4 @@ foo (void) - bar (0.0); - } - --/* { dg-final { scan-assembler "fmov\\td0, xzr" } } */ -+/* { dg-final { scan-assembler "movi\\td0, #0" } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/fmovf-zero-reg.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/fmovf-zero-reg.c -@@ -8,4 +8,4 @@ foo (void) - bar (0.0); - } - --/* { dg-final { scan-assembler "fmov\\ts0, wzr" } } */ -+/* { dg-final { scan-assembler "movi\\tv0\.2s, #0" } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/fmul_fcvt_2.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/fmul_fcvt_2.c -@@ -1,5 +1,5 @@ - /* { dg-do run } */ --/* { dg-options "-save-temps -O2 -ftree-vectorize -fno-inline" } */ -+/* { dg-options "-save-temps -O2 -ftree-vectorize -fno-inline -fno-vect-cost-model" } */ - - #define N 1024 - ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/ifcvt_multiple_sets_subreg_1.c -@@ -0,0 +1,30 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fdump-rtl-ce1" } */ -+ -+/* Check that the inner if is transformed into CSELs. */ -+ -+int -+foo (int *x, int *z, int a) -+{ -+ int b = 0; -+ int c = 0; -+ int d = 0; -+ int i; -+ -+ for (i = 0; i < a; i++) -+ { -+ if (x[i] < c) -+ { -+ b = z[i]; -+ if (c < b) -+ { -+ c = b; -+ d = i; -+ } -+ } -+ } -+ -+ return c + d; -+} -+ -+/* { dg-final { scan-rtl-dump "if-conversion succeeded through noce_convert_multiple_sets" "ce1" } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/ldp_stp_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/ldp_stp_1.c -@@ -1,4 +1,4 @@ --/* { dg-options "-O2" } */ -+/* { dg-options "-O2 -mcpu=generic" } */ - - int arr[4][4]; - ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_1.c -@@ -0,0 +1,20 @@ -+/* { dg-options "-O2" } */ -+ -+/* Check that we can use a REG + IMM addressing mode when moving an unaligned -+ TImode value to and from memory. */ -+ -+struct foo -+{ -+ long long b; -+ __int128 a; -+} __attribute__ ((packed)); -+ -+void -+bar (struct foo *p, struct foo *q) -+{ -+ p->a = q->a; -+} -+ -+/* { dg-final { scan-assembler-not "add\tx\[0-9\]+, x\[0-9\]+" } } */ -+/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\], .*8" 1 } } */ -+/* { dg-final { scan-assembler-times "stp\tx\[0-9\]+, x\[0-9\], .*8" 1 } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/popcnt.c -@@ -0,0 +1,23 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+int -+foo (int x) -+{ -+ return __builtin_popcount (x); -+} -+ -+long -+foo1 (long x) -+{ -+ return __builtin_popcountl (x); -+} -+ -+long long -+foo2 (long long x) -+{ -+ return __builtin_popcountll (x); -+} -+ -+/* { dg-final { scan-assembler-not "popcount" } } */ -+/* { dg-final { scan-assembler-times "cnt\t" 3 } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/pr37780_1.c -@@ -0,0 +1,46 @@ -+/* Test that we can remove the conditional move due to CLZ -+ and CTZ being defined at zero. */ -+ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+int -+fooctz (int i) -+{ -+ return (i == 0) ? 32 : __builtin_ctz (i); -+} -+ -+int -+fooctz2 (int i) -+{ -+ return (i != 0) ? __builtin_ctz (i) : 32; -+} -+ -+unsigned int -+fooctz3 (unsigned int i) -+{ -+ return (i > 0) ? __builtin_ctz (i) : 32; -+} -+ -+/* { dg-final { scan-assembler-times "rbit\t*" 3 } } */ -+ -+int -+fooclz (int i) -+{ -+ return (i == 0) ? 32 : __builtin_clz (i); -+} -+ -+int -+fooclz2 (int i) -+{ -+ return (i != 0) ? __builtin_clz (i) : 32; -+} -+ -+unsigned int -+fooclz3 (unsigned int i) -+{ -+ return (i > 0) ? __builtin_clz (i) : 32; -+} -+ -+/* { dg-final { scan-assembler-times "clz\t" 6 } } */ -+/* { dg-final { scan-assembler-not "cmp\t.*0" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/pr63874.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+/* { dg-skip-if "Not applicable for mcmodel=large" { aarch64*-*-* } { "-mcmodel=large" } { "" } } */ -+ -+extern void __attribute__((weak)) foo_weakref (void); -+void __attribute__((weak, noinline)) bar (void) -+{ -+ return; -+} -+void (*f) (void); -+void (*g) (void); -+ -+int -+main (void) -+{ -+ f = &foo_weakref; -+ g = &bar; -+ return 0; -+} -+ -+/* { dg-final { scan-assembler-not "adr*foo_weakref" } } */ -+/* { dg-final { scan-assembler-not "\\.(word|xword)\tbar" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/pr71727.c -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mstrict-align -O3" } */ -+ -+struct test_struct_s -+{ -+ long a; -+ long b; -+ long c; -+ long d; -+ unsigned long e; -+}; -+ -+ -+char _a; -+struct test_struct_s xarray[128]; -+ -+void -+_start (void) -+{ -+ struct test_struct_s *new_entry; -+ -+ new_entry = &xarray[0]; -+ new_entry->a = 1; -+ new_entry->b = 2; -+ new_entry->c = 3; -+ new_entry->d = 4; -+ new_entry->e = 5; -+ -+ return; -+} -+ -+/* { dg-final { scan-assembler-times "mov\tx" 5 {target lp64} } } */ -+/* { dg-final { scan-assembler-not "add\tx0, x0, :" {target lp64} } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/pr78382.c -@@ -0,0 +1,10 @@ -+/* { dg-require-effective-target fpic } */ -+/* { dg-options "-mtls-dialect=trad -fpic" } */ -+ -+__thread int abc; -+void -+foo () -+{ -+ int *p; -+ p = &abc; -+} ---- a/src/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c -@@ -1,4 +1,4 @@ --/* Test the `v[min|max]nm{q}_f*' AArch64 SIMD intrinsic. */ -+/* Test the `v[min|max]{nm}{q}_f*' AArch64 SIMD intrinsic. */ - - /* { dg-do run } */ - /* { dg-options "-O2" } */ -@@ -18,6 +18,7 @@ extern void abort (); - int - main (int argc, char **argv) - { -+ /* v{min|max}nm_f32 normal. */ - float32x2_t f32x2_input1 = vdup_n_f32 (-1.0); - float32x2_t f32x2_input2 = vdup_n_f32 (0.0); - float32x2_t f32x2_exp_minnm = vdup_n_f32 (-1.0); -@@ -28,6 +29,7 @@ main (int argc, char **argv) - CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm); - CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm); - -+ /* v{min|max}nm_f32 NaN. */ - f32x2_input1 = vdup_n_f32 (__builtin_nanf ("")); - f32x2_input2 = vdup_n_f32 (1.0); - f32x2_exp_minnm = vdup_n_f32 (1.0); -@@ -38,6 +40,7 @@ main (int argc, char **argv) - CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm); - CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm); - -+ /* v{min|max}nmq_f32 normal. */ - float32x4_t f32x4_input1 = vdupq_n_f32 (-1024.0); - float32x4_t f32x4_input2 = vdupq_n_f32 (77.0); - float32x4_t f32x4_exp_minnm = vdupq_n_f32 (-1024.0); -@@ -48,6 +51,7 @@ main (int argc, char **argv) - CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm); - CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm); - -+ /* v{min|max}nmq_f32 NaN. */ - f32x4_input1 = vdupq_n_f32 (-__builtin_nanf ("")); - f32x4_input2 = vdupq_n_f32 (-1.0); - f32x4_exp_minnm = vdupq_n_f32 (-1.0); -@@ -58,16 +62,57 @@ main (int argc, char **argv) - CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm); - CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm); - -+ /* v{min|max}nm_f64 normal. */ -+ float64x1_t f64x1_input1 = vdup_n_f64 (1.23); -+ float64x1_t f64x1_input2 = vdup_n_f64 (4.56); -+ float64x1_t f64x1_exp_minnm = vdup_n_f64 (1.23); -+ float64x1_t f64x1_exp_maxnm = vdup_n_f64 (4.56); -+ float64x1_t f64x1_ret_minnm = vminnm_f64 (f64x1_input1, f64x1_input2); -+ float64x1_t f64x1_ret_maxnm = vmaxnm_f64 (f64x1_input1, f64x1_input2); -+ CHECK (uint64_t, 1, f64x1_ret_minnm, f64x1_exp_minnm); -+ CHECK (uint64_t, 1, f64x1_ret_maxnm, f64x1_exp_maxnm); -+ -+ /* v{min|max}_f64 normal. */ -+ float64x1_t f64x1_exp_min = vdup_n_f64 (1.23); -+ float64x1_t f64x1_exp_max = vdup_n_f64 (4.56); -+ float64x1_t f64x1_ret_min = vmin_f64 (f64x1_input1, f64x1_input2); -+ float64x1_t f64x1_ret_max = vmax_f64 (f64x1_input1, f64x1_input2); -+ CHECK (uint64_t, 1, f64x1_ret_min, f64x1_exp_min); -+ CHECK (uint64_t, 1, f64x1_ret_max, f64x1_exp_max); -+ -+ /* v{min|max}nmq_f64 normal. */ - float64x2_t f64x2_input1 = vdupq_n_f64 (1.23); - float64x2_t f64x2_input2 = vdupq_n_f64 (4.56); - float64x2_t f64x2_exp_minnm = vdupq_n_f64 (1.23); - float64x2_t f64x2_exp_maxnm = vdupq_n_f64 (4.56); - float64x2_t f64x2_ret_minnm = vminnmq_f64 (f64x2_input1, f64x2_input2); - float64x2_t f64x2_ret_maxnm = vmaxnmq_f64 (f64x2_input1, f64x2_input2); -- - CHECK (uint64_t, 2, f64x2_ret_minnm, f64x2_exp_minnm); - CHECK (uint64_t, 2, f64x2_ret_maxnm, f64x2_exp_maxnm); - -+ /* v{min|max}nm_f64 NaN. */ -+ f64x1_input1 = vdup_n_f64 (-__builtin_nanf ("")); -+ f64x1_input2 = vdup_n_f64 (1.0); -+ f64x1_exp_minnm = vdup_n_f64 (1.0); -+ f64x1_exp_maxnm = vdup_n_f64 (1.0); -+ f64x1_ret_minnm = vminnm_f64 (f64x1_input1, f64x1_input2); -+ f64x1_ret_maxnm = vmaxnm_f64 (f64x1_input1, f64x1_input2); -+ -+ CHECK (uint64_t, 1, f64x1_ret_minnm, f64x1_exp_minnm); -+ CHECK (uint64_t, 1, f64x1_ret_maxnm, f64x1_exp_maxnm); -+ -+ /* v{min|max}_f64 NaN. */ -+ f64x1_input1 = vdup_n_f64 (-__builtin_nanf ("")); -+ f64x1_input2 = vdup_n_f64 (1.0); -+ f64x1_exp_minnm = vdup_n_f64 (-__builtin_nanf ("")); -+ f64x1_exp_maxnm = vdup_n_f64 (-__builtin_nanf ("")); -+ f64x1_ret_minnm = vmin_f64 (f64x1_input1, f64x1_input2); -+ f64x1_ret_maxnm = vmax_f64 (f64x1_input1, f64x1_input2); -+ -+ CHECK (uint64_t, 1, f64x1_ret_minnm, f64x1_exp_minnm); -+ CHECK (uint64_t, 1, f64x1_ret_maxnm, f64x1_exp_maxnm); -+ -+ /* v{min|max}nmq_f64 NaN. */ - f64x2_input1 = vdupq_n_f64 (-__builtin_nan ("")); - f64x2_input2 = vdupq_n_f64 (1.0); - f64x2_exp_minnm = vdupq_n_f64 (1.0); ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vmul_elem_1.c -@@ -0,0 +1,541 @@ -+/* Test the vmul_n_f64 AArch64 SIMD intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-options "-O2 --save-temps" } */ -+ -+#include "arm_neon.h" -+ -+extern void abort (void); -+ -+#define A (132.4f) -+#define B (-0.0f) -+#define C (-34.8f) -+#define D (289.34f) -+float32_t expected2_1[2] = {A * A, B * A}; -+float32_t expected2_2[2] = {A * B, B * B}; -+float32_t expected4_1[4] = {A * A, B * A, C * A, D * A}; -+float32_t expected4_2[4] = {A * B, B * B, C * B, D * B}; -+float32_t expected4_3[4] = {A * C, B * C, C * C, D * C}; -+float32_t expected4_4[4] = {A * D, B * D, C * D, D * D}; -+float32_t _elemA = A; -+float32_t _elemB = B; -+float32_t _elemC = C; -+float32_t _elemD = D; -+ -+#define AD (1234.5) -+#define BD (-0.0) -+#define CD (71.3) -+#define DD (-1024.4) -+float64_t expectedd2_1[2] = {AD * CD, BD * CD}; -+float64_t expectedd2_2[2] = {AD * DD, BD * DD}; -+float64_t _elemdC = CD; -+float64_t _elemdD = DD; -+ -+ -+#define AS (1024) -+#define BS (-31) -+#define CS (0) -+#define DS (655) -+int32_t expecteds2_1[2] = {AS * AS, BS * AS}; -+int32_t expecteds2_2[2] = {AS * BS, BS * BS}; -+int32_t expecteds4_1[4] = {AS * AS, BS * AS, CS * AS, DS * AS}; -+int32_t expecteds4_2[4] = {AS * BS, BS * BS, CS * BS, DS * BS}; -+int32_t expecteds4_3[4] = {AS * CS, BS * CS, CS * CS, DS * CS}; -+int32_t expecteds4_4[4] = {AS * DS, BS * DS, CS * DS, DS * DS}; -+int32_t _elemsA = AS; -+int32_t _elemsB = BS; -+int32_t _elemsC = CS; -+int32_t _elemsD = DS; -+ -+#define AH ((int16_t) 0) -+#define BH ((int16_t) -32) -+#define CH ((int16_t) 102) -+#define DH ((int16_t) -51) -+#define EH ((int16_t) 71) -+#define FH ((int16_t) -91) -+#define GH ((int16_t) 48) -+#define HH ((int16_t) 255) -+int16_t expectedh4_1[4] = {AH * AH, BH * AH, CH * AH, DH * AH}; -+int16_t expectedh4_2[4] = {AH * BH, BH * BH, CH * BH, DH * BH}; -+int16_t expectedh4_3[4] = {AH * CH, BH * CH, CH * CH, DH * CH}; -+int16_t expectedh4_4[4] = {AH * DH, BH * DH, CH * DH, DH * DH}; -+int16_t expectedh8_1[8] = {AH * AH, BH * AH, CH * AH, DH * AH, -+ EH * AH, FH * AH, GH * AH, HH * AH}; -+int16_t expectedh8_2[8] = {AH * BH, BH * BH, CH * BH, DH * BH, -+ EH * BH, FH * BH, GH * BH, HH * BH}; -+int16_t expectedh8_3[8] = {AH * CH, BH * CH, CH * CH, DH * CH, -+ EH * CH, FH * CH, GH * CH, HH * CH}; -+int16_t expectedh8_4[8] = {AH * DH, BH * DH, CH * DH, DH * DH, -+ EH * DH, FH * DH, GH * DH, HH * DH}; -+int16_t expectedh8_5[8] = {AH * EH, BH * EH, CH * EH, DH * EH, -+ EH * EH, FH * EH, GH * EH, HH * EH}; -+int16_t expectedh8_6[8] = {AH * FH, BH * FH, CH * FH, DH * FH, -+ EH * FH, FH * FH, GH * FH, HH * FH}; -+int16_t expectedh8_7[8] = {AH * GH, BH * GH, CH * GH, DH * GH, -+ EH * GH, FH * GH, GH * GH, HH * GH}; -+int16_t expectedh8_8[8] = {AH * HH, BH * HH, CH * HH, DH * HH, -+ EH * HH, FH * HH, GH * HH, HH * HH}; -+int16_t _elemhA = AH; -+int16_t _elemhB = BH; -+int16_t _elemhC = CH; -+int16_t _elemhD = DH; -+int16_t _elemhE = EH; -+int16_t _elemhF = FH; -+int16_t _elemhG = GH; -+int16_t _elemhH = HH; -+ -+#define AUS (1024) -+#define BUS (31) -+#define CUS (0) -+#define DUS (655) -+uint32_t expectedus2_1[2] = {AUS * AUS, BUS * AUS}; -+uint32_t expectedus2_2[2] = {AUS * BUS, BUS * BUS}; -+uint32_t expectedus4_1[4] = {AUS * AUS, BUS * AUS, CUS * AUS, DUS * AUS}; -+uint32_t expectedus4_2[4] = {AUS * BUS, BUS * BUS, CUS * BUS, DUS * BUS}; -+uint32_t expectedus4_3[4] = {AUS * CUS, BUS * CUS, CUS * CUS, DUS * CUS}; -+uint32_t expectedus4_4[4] = {AUS * DUS, BUS * DUS, CUS * DUS, DUS * DUS}; -+uint32_t _elemusA = AUS; -+uint32_t _elemusB = BUS; -+uint32_t _elemusC = CUS; -+uint32_t _elemusD = DUS; -+ -+#define AUH ((uint16_t) 0) -+#define BUH ((uint16_t) 32) -+#define CUH ((uint16_t) 102) -+#define DUH ((uint16_t) 51) -+#define EUH ((uint16_t) 71) -+#define FUH ((uint16_t) 91) -+#define GUH ((uint16_t) 48) -+#define HUH ((uint16_t) 255) -+uint16_t expecteduh4_1[4] = {AUH * AUH, BUH * AUH, CUH * AUH, DUH * AUH}; -+uint16_t expecteduh4_2[4] = {AUH * BUH, BUH * BUH, CUH * BUH, DUH * BUH}; -+uint16_t expecteduh4_3[4] = {AUH * CUH, BUH * CUH, CUH * CUH, DUH * CUH}; -+uint16_t expecteduh4_4[4] = {AUH * DUH, BUH * DUH, CUH * DUH, DUH * DUH}; -+uint16_t expecteduh8_1[8] = {AUH * AUH, BUH * AUH, CUH * AUH, DUH * AUH, -+ EUH * AUH, FUH * AUH, GUH * AUH, HUH * AUH}; -+uint16_t expecteduh8_2[8] = {AUH * BUH, BUH * BUH, CUH * BUH, DUH * BUH, -+ EUH * BUH, FUH * BUH, GUH * BUH, HUH * BUH}; -+uint16_t expecteduh8_3[8] = {AUH * CUH, BUH * CUH, CUH * CUH, DUH * CUH, -+ EUH * CUH, FUH * CUH, GUH * CUH, HUH * CUH}; -+uint16_t expecteduh8_4[8] = {AUH * DUH, BUH * DUH, CUH * DUH, DUH * DUH, -+ EUH * DUH, FUH * DUH, GUH * DUH, HUH * DUH}; -+uint16_t expecteduh8_5[8] = {AUH * EUH, BUH * EUH, CUH * EUH, DUH * EUH, -+ EUH * EUH, FUH * EUH, GUH * EUH, HUH * EUH}; -+uint16_t expecteduh8_6[8] = {AUH * FUH, BUH * FUH, CUH * FUH, DUH * FUH, -+ EUH * FUH, FUH * FUH, GUH * FUH, HUH * FUH}; -+uint16_t expecteduh8_7[8] = {AUH * GUH, BUH * GUH, CUH * GUH, DUH * GUH, -+ EUH * GUH, FUH * GUH, GUH * GUH, HUH * GUH}; -+uint16_t expecteduh8_8[8] = {AUH * HUH, BUH * HUH, CUH * HUH, DUH * HUH, -+ EUH * HUH, FUH * HUH, GUH * HUH, HUH * HUH}; -+uint16_t _elemuhA = AUH; -+uint16_t _elemuhB = BUH; -+uint16_t _elemuhC = CUH; -+uint16_t _elemuhD = DUH; -+uint16_t _elemuhE = EUH; -+uint16_t _elemuhF = FUH; -+uint16_t _elemuhG = GUH; -+uint16_t _elemuhH = HUH; -+ -+void -+check_v2sf (float32_t elemA, float32_t elemB) -+{ -+ int32_t indx; -+ const float32_t vec32x2_buf[2] = {A, B}; -+ float32x2_t vec32x2_src = vld1_f32 (vec32x2_buf); -+ float32_t vec32x2_res[2]; -+ -+ vst1_f32 (vec32x2_res, vmul_n_f32 (vec32x2_src, elemA)); -+ -+ for (indx = 0; indx < 2; indx++) -+ if (* (uint32_t *) &vec32x2_res[indx] != * (uint32_t *) &expected2_1[indx]) -+ abort (); -+ -+ vst1_f32 (vec32x2_res, vmul_n_f32 (vec32x2_src, elemB)); -+ -+ for (indx = 0; indx < 2; indx++) -+ if (* (uint32_t *) &vec32x2_res[indx] != * (uint32_t *) &expected2_2[indx]) -+ abort (); -+ -+/* { dg-final { scan-assembler-times "fmul\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */ -+} -+ -+void -+check_v4sf (float32_t elemA, float32_t elemB, float32_t elemC, float32_t elemD) -+{ -+ int32_t indx; -+ const float32_t vec32x4_buf[4] = {A, B, C, D}; -+ float32x4_t vec32x4_src = vld1q_f32 (vec32x4_buf); -+ float32_t vec32x4_res[4]; -+ -+ vst1q_f32 (vec32x4_res, vmulq_n_f32 (vec32x4_src, elemA)); -+ -+ for (indx = 0; indx < 4; indx++) -+ if (* (uint32_t *) &vec32x4_res[indx] != * (uint32_t *) &expected4_1[indx]) -+ abort (); -+ -+ vst1q_f32 (vec32x4_res, vmulq_n_f32 (vec32x4_src, elemB)); -+ -+ for (indx = 0; indx < 4; indx++) -+ if (* (uint32_t *) &vec32x4_res[indx] != * (uint32_t *) &expected4_2[indx]) -+ abort (); -+ -+ vst1q_f32 (vec32x4_res, vmulq_n_f32 (vec32x4_src, elemC)); -+ -+ for (indx = 0; indx < 4; indx++) -+ if (* (uint32_t *) &vec32x4_res[indx] != * (uint32_t *) &expected4_3[indx]) -+ abort (); -+ -+ vst1q_f32 (vec32x4_res, vmulq_n_f32 (vec32x4_src, elemD)); -+ -+ for (indx = 0; indx < 4; indx++) -+ if (* (uint32_t *) &vec32x4_res[indx] != * (uint32_t *) &expected4_4[indx]) -+ abort (); -+ -+/* { dg-final { scan-assembler-times "fmul\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 4 } } */ -+} -+ -+void -+check_v2df (float64_t elemdC, float64_t elemdD) -+{ -+ int32_t indx; -+ const float64_t vec64x2_buf[2] = {AD, BD}; -+ float64x2_t vec64x2_src = vld1q_f64 (vec64x2_buf); -+ float64_t vec64x2_res[2]; -+ -+ vst1q_f64 (vec64x2_res, vmulq_n_f64 (vec64x2_src, elemdC)); -+ -+ for (indx = 0; indx < 2; indx++) -+ if (* (uint64_t *) &vec64x2_res[indx] != * (uint64_t *) &expectedd2_1[indx]) -+ abort (); -+ -+ vst1q_f64 (vec64x2_res, vmulq_n_f64 (vec64x2_src, elemdD)); -+ -+ for (indx = 0; indx < 2; indx++) -+ if (* (uint64_t *) &vec64x2_res[indx] != * (uint64_t *) &expectedd2_2[indx]) -+ abort (); -+ -+/* { dg-final { scan-assembler-times "fmul\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.d\\\[0\\\]" 2 } } */ -+} -+ -+void -+check_v2si (int32_t elemsA, int32_t elemsB) -+{ -+ int32_t indx; -+ const int32_t vecs32x2_buf[2] = {AS, BS}; -+ int32x2_t vecs32x2_src = vld1_s32 (vecs32x2_buf); -+ int32_t vecs32x2_res[2]; -+ -+ vst1_s32 (vecs32x2_res, vmul_n_s32 (vecs32x2_src, elemsA)); -+ -+ for (indx = 0; indx < 2; indx++) -+ if (vecs32x2_res[indx] != expecteds2_1[indx]) -+ abort (); -+ -+ vst1_s32 (vecs32x2_res, vmul_n_s32 (vecs32x2_src, elemsB)); -+ -+ for (indx = 0; indx < 2; indx++) -+ if (vecs32x2_res[indx] != expecteds2_2[indx]) -+ abort (); -+} -+ -+void -+check_v2si_unsigned (uint32_t elemusA, uint32_t elemusB) -+{ -+ int indx; -+ const uint32_t vecus32x2_buf[2] = {AUS, BUS}; -+ uint32x2_t vecus32x2_src = vld1_u32 (vecus32x2_buf); -+ uint32_t vecus32x2_res[2]; -+ -+ vst1_u32 (vecus32x2_res, vmul_n_u32 (vecus32x2_src, elemusA)); -+ -+ for (indx = 0; indx < 2; indx++) -+ if (vecus32x2_res[indx] != expectedus2_1[indx]) -+ abort (); -+ -+ vst1_u32 (vecus32x2_res, vmul_n_u32 (vecus32x2_src, elemusB)); -+ -+ for (indx = 0; indx < 2; indx++) -+ if (vecus32x2_res[indx] != expectedus2_2[indx]) -+ abort (); -+ -+/* { dg-final { scan-assembler-times "\tmul\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 4 } } */ -+} -+ -+void -+check_v4si (int32_t elemsA, int32_t elemsB, int32_t elemsC, int32_t elemsD) -+{ -+ int32_t indx; -+ const int32_t vecs32x4_buf[4] = {AS, BS, CS, DS}; -+ int32x4_t vecs32x4_src = vld1q_s32 (vecs32x4_buf); -+ int32_t vecs32x4_res[4]; -+ -+ vst1q_s32 (vecs32x4_res, vmulq_n_s32 (vecs32x4_src, elemsA)); -+ -+ for (indx = 0; indx < 4; indx++) -+ if (vecs32x4_res[indx] != expecteds4_1[indx]) -+ abort (); -+ -+ vst1q_s32 (vecs32x4_res, vmulq_n_s32 (vecs32x4_src, elemsB)); -+ -+ for (indx = 0; indx < 4; indx++) -+ if (vecs32x4_res[indx] != expecteds4_2[indx]) -+ abort (); -+ -+ vst1q_s32 (vecs32x4_res, vmulq_n_s32 (vecs32x4_src, elemsC)); -+ -+ for (indx = 0; indx < 4; indx++) -+ if (vecs32x4_res[indx] != expecteds4_3[indx]) -+ abort (); -+ -+ vst1q_s32 (vecs32x4_res, vmulq_n_s32 (vecs32x4_src, elemsD)); -+ -+ for (indx = 0; indx < 4; indx++) -+ if (vecs32x4_res[indx] != expecteds4_4[indx]) -+ abort (); -+} -+ -+void -+check_v4si_unsigned (uint32_t elemusA, uint32_t elemusB, uint32_t elemusC, -+ uint32_t elemusD) -+{ -+ int indx; -+ const uint32_t vecus32x4_buf[4] = {AUS, BUS, CUS, DUS}; -+ uint32x4_t vecus32x4_src = vld1q_u32 (vecus32x4_buf); -+ uint32_t vecus32x4_res[4]; -+ -+ vst1q_u32 (vecus32x4_res, vmulq_n_u32 (vecus32x4_src, elemusA)); -+ -+ for (indx = 0; indx < 4; indx++) -+ if (vecus32x4_res[indx] != expectedus4_1[indx]) -+ abort (); -+ -+ vst1q_u32 (vecus32x4_res, vmulq_n_u32 (vecus32x4_src, elemusB)); -+ -+ for (indx = 0; indx < 4; indx++) -+ if (vecus32x4_res[indx] != expectedus4_2[indx]) -+ abort (); -+ -+ vst1q_u32 (vecus32x4_res, vmulq_n_u32 (vecus32x4_src, elemusC)); -+ -+ for (indx = 0; indx < 4; indx++) -+ if (vecus32x4_res[indx] != expectedus4_3[indx]) -+ abort (); -+ -+ vst1q_u32 (vecus32x4_res, vmulq_n_u32 (vecus32x4_src, elemusD)); -+ -+ for (indx = 0; indx < 4; indx++) -+ if (vecus32x4_res[indx] != expectedus4_4[indx]) -+ abort (); -+ -+/* { dg-final { scan-assembler-times "\tmul\tv\[0-9\]+\.4s, v\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 8 } } */ -+} -+ -+ -+void -+check_v4hi (int16_t elemhA, int16_t elemhB, int16_t elemhC, int16_t elemhD) -+{ -+ int32_t indx; -+ const int16_t vech16x4_buf[4] = {AH, BH, CH, DH}; -+ int16x4_t vech16x4_src = vld1_s16 (vech16x4_buf); -+ int16_t vech16x4_res[4]; -+ -+ vst1_s16 (vech16x4_res, vmul_n_s16 (vech16x4_src, elemhA)); -+ -+ for (indx = 0; indx < 4; indx++) -+ if (vech16x4_res[indx] != expectedh4_1[indx]) -+ abort (); -+ -+ vst1_s16 (vech16x4_res, vmul_n_s16 (vech16x4_src, elemhB)); -+ -+ for (indx = 0; indx < 4; indx++) -+ if (vech16x4_res[indx] != expectedh4_2[indx]) -+ abort (); -+ -+ vst1_s16 (vech16x4_res, vmul_n_s16 (vech16x4_src, elemhC)); -+ -+ for (indx = 0; indx < 4; indx++) -+ if (vech16x4_res[indx] != expectedh4_3[indx]) -+ abort (); -+ -+ vst1_s16 (vech16x4_res, vmul_n_s16 (vech16x4_src, elemhD)); -+ -+ for (indx = 0; indx < 4; indx++) -+ if (vech16x4_res[indx] != expectedh4_4[indx]) -+ abort (); -+} -+ -+void -+check_v4hi_unsigned (uint16_t elemuhA, uint16_t elemuhB, uint16_t elemuhC, -+ uint16_t elemuhD) -+{ -+ int indx; -+ const uint16_t vecuh16x4_buf[4] = {AUH, BUH, CUH, DUH}; -+ uint16x4_t vecuh16x4_src = vld1_u16 (vecuh16x4_buf); -+ uint16_t vecuh16x4_res[4]; -+ -+ vst1_u16 (vecuh16x4_res, vmul_n_u16 (vecuh16x4_src, elemuhA)); -+ -+ for (indx = 0; indx < 4; indx++) -+ if (vecuh16x4_res[indx] != expecteduh4_1[indx]) -+ abort (); -+ -+ vst1_u16 (vecuh16x4_res, vmul_n_u16 (vecuh16x4_src, elemuhB)); -+ -+ for (indx = 0; indx < 4; indx++) -+ if (vecuh16x4_res[indx] != expecteduh4_2[indx]) -+ abort (); -+ -+ vst1_u16 (vecuh16x4_res, vmul_n_u16 (vecuh16x4_src, elemuhC)); -+ -+ for (indx = 0; indx < 4; indx++) -+ if (vecuh16x4_res[indx] != expecteduh4_3[indx]) -+ abort (); -+ -+ vst1_u16 (vecuh16x4_res, vmul_n_u16 (vecuh16x4_src, elemuhD)); -+ -+ for (indx = 0; indx < 4; indx++) -+ if (vecuh16x4_res[indx] != expecteduh4_4[indx]) -+ abort (); -+ -+/* { dg-final { scan-assembler-times "mul\tv\[0-9\]+\.4h, v\[0-9\]+\.4h, v\[0-9\]+\.h\\\[0\\\]" 8 } } */ -+} -+ -+void -+check_v8hi (int16_t elemhA, int16_t elemhB, int16_t elemhC, int16_t elemhD, -+ int16_t elemhE, int16_t elemhF, int16_t elemhG, int16_t elemhH) -+{ -+ int32_t indx; -+ const int16_t vech16x8_buf[8] = {AH, BH, CH, DH, EH, FH, GH, HH}; -+ int16x8_t vech16x8_src = vld1q_s16 (vech16x8_buf); -+ int16_t vech16x8_res[8]; -+ -+ vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhA)); -+ -+ for (indx = 0; indx < 8; indx++) -+ if (vech16x8_res[indx] != expectedh8_1[indx]) -+ abort (); -+ -+ vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhB)); -+ -+ for (indx = 0; indx < 8; indx++) -+ if (vech16x8_res[indx] != expectedh8_2[indx]) -+ abort (); -+ -+ vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhC)); -+ -+ for (indx = 0; indx < 8; indx++) -+ if (vech16x8_res[indx] != expectedh8_3[indx]) -+ abort (); -+ -+ vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhD)); -+ -+ for (indx = 0; indx < 8; indx++) -+ if (vech16x8_res[indx] != expectedh8_4[indx]) -+ abort (); -+ -+ vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhE)); -+ -+ for (indx = 0; indx < 8; indx++) -+ if (vech16x8_res[indx] != expectedh8_5[indx]) -+ abort (); -+ -+ vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhF)); -+ -+ for (indx = 0; indx < 8; indx++) -+ if (vech16x8_res[indx] != expectedh8_6[indx]) -+ abort (); -+ -+ vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhG)); -+ -+ for (indx = 0; indx < 8; indx++) -+ if (vech16x8_res[indx] != expectedh8_7[indx]) -+ abort (); -+ -+ vst1q_s16 (vech16x8_res, vmulq_n_s16 (vech16x8_src, elemhH)); -+ -+ for (indx = 0; indx < 8; indx++) -+ if (vech16x8_res[indx] != expectedh8_8[indx]) -+ abort (); -+} -+ -+void -+check_v8hi_unsigned (uint16_t elemuhA, uint16_t elemuhB, uint16_t elemuhC, -+ uint16_t elemuhD, uint16_t elemuhE, uint16_t elemuhF, -+ uint16_t elemuhG, uint16_t elemuhH) -+{ -+ int indx; -+ const uint16_t vecuh16x8_buf[8] = {AUH, BUH, CUH, DUH, EUH, FUH, GUH, HUH}; -+ uint16x8_t vecuh16x8_src = vld1q_u16 (vecuh16x8_buf); -+ uint16_t vecuh16x8_res[8]; -+ -+ vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhA)); -+ -+ for (indx = 0; indx < 8; indx++) -+ if (vecuh16x8_res[indx] != expecteduh8_1[indx]) -+ abort (); -+ -+ vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhB)); -+ -+ for (indx = 0; indx < 8; indx++) -+ if (vecuh16x8_res[indx] != expecteduh8_2[indx]) -+ abort (); -+ -+ vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhC)); -+ -+ for (indx = 0; indx < 8; indx++) -+ if (vecuh16x8_res[indx] != expecteduh8_3[indx]) -+ abort (); -+ -+ vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhD)); -+ -+ for (indx = 0; indx < 8; indx++) -+ if (vecuh16x8_res[indx] != expecteduh8_4[indx]) -+ abort (); -+ -+ vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhE)); -+ -+ for (indx = 0; indx < 8; indx++) -+ if (vecuh16x8_res[indx] != expecteduh8_5[indx]) -+ abort (); -+ -+ vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhF)); -+ -+ for (indx = 0; indx < 8; indx++) -+ if (vecuh16x8_res[indx] != expecteduh8_6[indx]) -+ abort (); -+ -+ vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhG)); -+ -+ for (indx = 0; indx < 8; indx++) -+ if (vecuh16x8_res[indx] != expecteduh8_7[indx]) -+ abort (); -+ -+ vst1q_u16 (vecuh16x8_res, vmulq_n_u16 (vecuh16x8_src, elemuhH)); -+ -+ for (indx = 0; indx < 8; indx++) -+ if (vecuh16x8_res[indx] != expecteduh8_8[indx]) -+ abort (); -+ -+/* { dg-final { scan-assembler-times "mul\tv\[0-9\]+\.8h, v\[0-9\]+\.8h, v\[0-9\]+\.h\\\[0\\\]" 16 } } */ -+} -+ -+int -+main (void) -+{ -+ check_v2sf (_elemA, _elemB); -+ check_v4sf (_elemA, _elemB, _elemC, _elemD); -+ check_v2df (_elemdC, _elemdD); -+ check_v2si (_elemsA, _elemsB); -+ check_v4si (_elemsA, _elemsB, _elemsC, _elemsD); -+ check_v4hi (_elemhA, _elemhB, _elemhC, _elemhD); -+ check_v8hi (_elemhA, _elemhB, _elemhC, _elemhD, -+ _elemhE, _elemhF, _elemhG, _elemhH); -+ check_v2si_unsigned (_elemusA, _elemusB); -+ check_v4si_unsigned (_elemusA, _elemusB, _elemusC, _elemusD); -+ check_v4hi_unsigned (_elemuhA, _elemuhB, _elemuhC, _elemuhD); -+ check_v8hi_unsigned (_elemuhA, _elemuhB, _elemuhC, _elemuhD, -+ _elemuhE, _elemuhF, _elemuhG, _elemuhH); -+ -+ return 0; -+} -+ ---- a/src/gcc/testsuite/gcc.target/aarch64/store-pair-1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/store-pair-1.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2" } */ -+/* { dg-options "-O2 -mcpu=generic" } */ - - int f(int *a, int b) - { ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/store_repeating_constant_1.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mtune=generic" } */ -+ -+void -+foo (unsigned long long *a) -+{ -+ a[0] = 0x0140c0da0140c0daULL; -+} -+ -+/* { dg-final { scan-assembler-times "movk\\tw.*" 1 } } */ -+/* { dg-final { scan-assembler-times "stp\tw\[0-9\]+, w\[0-9\]+.*" 1 } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/store_repeating_constant_2.c -@@ -0,0 +1,15 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Os" } */ -+ -+/* Check that for -Os we synthesize only the bottom half and then -+ store it twice with an STP rather than synthesizing it twice in each -+ half of an X-reg. */ -+ -+void -+foo (unsigned long long *a) -+{ -+ a[0] = 0xc0da0000c0daULL; -+} -+ -+/* { dg-final { scan-assembler-times "mov\\tw.*" 1 } } */ -+/* { dg-final { scan-assembler-times "stp\tw\[0-9\]+, w\[0-9\]+.*" 1 } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/struct_return.c -@@ -0,0 +1,31 @@ -+/* Test the absence of a spurious move from x8 to x0 for functions -+ return structures. */ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+struct s -+{ -+ long x; -+ long y; -+ long z; -+}; -+ -+struct s __attribute__((noinline)) -+foo (long a, long d, long c) -+{ -+ struct s b; -+ b.x = a; -+ b.y = d; -+ b.z = c; -+ return b; -+} -+ -+int -+main (void) -+{ -+ struct s x; -+ x = foo ( 10, 20, 30); -+ return x.x + x.y + x.z; -+} -+ -+/* { dg-final { scan-assembler-not "mov\tx0, x8" } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_10.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_10.c -@@ -4,8 +4,7 @@ - * total frame size > 512. - area except outgoing <= 512 - * number of callee-saved reg >= 2. -- * Split stack adjustment into two subtractions. -- the first subtractions could be optimized into "stp !". */ -+ * Use a single stack adjustment, no writeback. */ - - /* { dg-do run } */ - /* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ -@@ -15,6 +14,6 @@ - t_frame_pattern_outgoing (test10, 480, "x19", 24, a[8], a[9], a[10]) - t_frame_run (test10) - --/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */ --/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 1 } } */ -+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, \[0-9\]+\\\]" 1 } } */ -+/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp, \[0-9\]+\\\]" 1 } } */ - ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_12.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_12.c -@@ -13,6 +13,6 @@ t_frame_run (test12) - - /* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 1 } } */ - --/* Check epilogue using write-back. */ --/* { dg-final { scan-assembler-times "ldp\tx29, x30, \\\[sp\\\], \[0-9\]+" 3 } } */ -+/* Check epilogue using no write-back. */ -+/* { dg-final { scan-assembler-times "ldp\tx29, x30, \\\[sp, \[0-9\]+\\\]" 1 } } */ - ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_13.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_13.c -@@ -2,8 +2,7 @@ - * without outgoing. - * total frame size > 512. - * number of callee-save reg >= 2. -- * split the stack adjustment into two substractions, -- the second could be optimized into "stp !". */ -+ * Use a single stack adjustment, no writeback. */ - - /* { dg-do run } */ - /* { dg-options "-O2 --save-temps" } */ -@@ -14,4 +13,4 @@ t_frame_pattern (test13, 700, ) - t_frame_run (test13) - - /* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 1 } } */ --/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */ -+/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp\\\]" 1 } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_15.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_15.c -@@ -3,8 +3,7 @@ - * total frame size > 512. - area except outgoing <= 512 - * number of callee-save reg >= 2. -- * split the stack adjustment into two substractions, -- the first could be optimized into "stp !". */ -+ * Use a single stack adjustment, no writeback. */ - - /* { dg-do run } */ - /* { dg-options "-O2 --save-temps" } */ -@@ -15,4 +14,4 @@ t_frame_pattern_outgoing (test15, 480, , 8, a[8]) - t_frame_run (test15) - - /* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 1 } } */ --/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, -\[0-9\]+\\\]!" 3 } } */ -+/* { dg-final { scan-assembler-times "stp\tx29, x30, \\\[sp, \[0-9\]+\\\]" 1 } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_16.c -@@ -0,0 +1,25 @@ -+/* Verify: -+ * with outgoing. -+ * single int register push. -+ * varargs and callee-save size >= 256 -+ * Use 2 stack adjustments. */ -+ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ -+ -+#define REP8(X) X,X,X,X,X,X,X,X -+#define REP64(X) REP8(REP8(X)) -+ -+void outgoing (__builtin_va_list, ...); -+ -+double vararg_outgoing (int x1, ...) -+{ -+ double a1 = x1, a2 = x1 * 2, a3 = x1 * 3, a4 = x1 * 4, a5 = x1 * 5, a6 = x1 * 6; -+ __builtin_va_list vl; -+ __builtin_va_start (vl, x1); -+ outgoing (vl, a1, a2, a3, a4, a5, a6, REP64 (1)); -+ __builtin_va_end (vl); -+ return a1 + a2 + a3 + a4 + a5 + a6; -+} -+ -+/* { dg-final { scan-assembler-times "sub\tsp, sp, #\[0-9\]+" 2 } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_17.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 --save-temps" } */ -+ -+/* Test reuse of stack adjustment temporaries. */ -+ -+void foo (); -+ -+int reuse_mov (int i) -+{ -+ int arr[1025]; -+ return arr[i]; -+} -+ -+int no_reuse_mov (int i) -+{ -+ int arr[1025]; -+ foo (); -+ return arr[i]; -+} -+ -+/* { dg-final { scan-assembler-times "mov\tx16, \[0-9\]+" 3 } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_6.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_6.c -@@ -3,8 +3,7 @@ - * without outgoing. - * total frame size > 512. - * number of callee-saved reg == 1. -- * split stack adjustment into two subtractions. -- the second subtraction should use "str !". */ -+ * use a single stack adjustment, no writeback. */ - - /* { dg-do run } */ - /* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ -@@ -14,6 +13,7 @@ - t_frame_pattern (test6, 700, ) - t_frame_run (test6) - --/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 2 } } */ --/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\], \[0-9\]+" 2 } } */ -+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp\\\]" 1 } } */ -+/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\]" 2 } } */ -+/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\]," 1 } } */ - ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_7.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_7.c -@@ -3,8 +3,7 @@ - * without outgoing. - * total frame size > 512. - * number of callee-saved reg == 2. -- * split stack adjustment into two subtractions. -- the second subtraction should use "stp !". */ -+ * use a single stack adjustment, no writeback. */ - - /* { dg-do run } */ - /* { dg-options "-O2 -fomit-frame-pointer --save-temps" } */ -@@ -14,6 +13,6 @@ - t_frame_pattern (test7, 700, "x19") - t_frame_run (test7) - --/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */ --/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" 1 } } */ -+/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp]" 1 } } */ -+/* { dg-final { scan-assembler-times "ldp\tx19, x30, \\\[sp\\\]" 1 } } */ - ---- a/src/gcc/testsuite/gcc.target/aarch64/test_frame_8.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/test_frame_8.c -@@ -12,6 +12,6 @@ - t_frame_pattern_outgoing (test8, 700, , 8, a[8]) - t_frame_run (test8) - --/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, -\[0-9\]+\\\]!" 3 } } */ --/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp\\\], \[0-9\]+" 3 } } */ -+/* { dg-final { scan-assembler-times "str\tx30, \\\[sp, \[0-9\]+\\\]" 1 } } */ -+/* { dg-final { scan-assembler-times "ldr\tx30, \\\[sp, \[0-9\]+\\\]" 1 } } */ - ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/thunderxloadpair.c -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mcpu=thunderx" } */ -+ -+struct ldp -+{ -+ long long c; -+ int a, b; -+}; -+ -+ -+int f(struct ldp *a) -+{ -+ return a->a + a->b; -+} -+ -+ -+/* We know the alignement of a->a to be 8 byte aligned so it is profitable -+ to do ldp. */ -+/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]" 1 } } */ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/thunderxnoloadpair.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mcpu=thunderx" } */ -+ -+struct noldp -+{ -+ int a, b; -+}; -+ -+ -+int f(struct noldp *a) -+{ -+ return a->a + a->b; -+} -+ -+/* We know the alignement of a->a to be 4 byte aligned so it is not profitable -+ to do ldp. */ -+/* { dg-final { scan-assembler-not "ldp\tw\[0-9\]+, w\[0-9\]" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/ubfiz_lsl_1.c -@@ -0,0 +1,13 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+/* Check that an X-reg UBFIZ can be simplified into a W-reg LSL. */ -+ -+long long -+f2 (long long x) -+{ -+ return (x << 5) & 0xffffffff; -+} -+ -+/* { dg-final { scan-assembler "lsl\tw" } } */ -+/* { dg-final { scan-assembler-not "ubfiz\tx" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/ubfx_lsr_1.c -@@ -0,0 +1,14 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+/* Check that an X-reg UBFX can be simplified into a W-reg LSR. */ -+ -+int -+f (unsigned long long x) -+{ -+ x = (x >> 24) & 255; -+ return x + 1; -+} -+ -+/* { dg-final { scan-assembler "lsr\tw" } } */ -+/* { dg-final { scan-assembler-not "ubfx\tx" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/va_arg_1.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 --save-temps" } */ -+ -+int -+f (int a, ...) -+{ -+ /* { dg-final { scan-assembler-not "str" } } */ -+ return a; -+} -+ -+/* { dg-final { cleanup-saved-temps } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/va_arg_2.c -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 --save-temps" } */ -+ -+int -+foo (char *fmt, ...) -+{ -+ int d; -+ __builtin_va_list ap; -+ -+ __builtin_va_start (ap, fmt); -+ d = __builtin_va_arg (ap, int); -+ __builtin_va_end (ap); -+ -+ /* { dg-final { scan-assembler-not "x7" } } */ -+ return d; -+} -+ -+/* { dg-final { cleanup-saved-temps } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/va_arg_3.c -@@ -0,0 +1,26 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 --save-temps" } */ -+ -+int d2i (double a); -+ -+int -+foo (char *fmt, ...) -+{ -+ int d, e; -+ double f, g; -+ __builtin_va_list ap; -+ -+ __builtin_va_start (ap, fmt); -+ d = __builtin_va_arg (ap, int); -+ f = __builtin_va_arg (ap, double); -+ g = __builtin_va_arg (ap, double); -+ d += d2i (f); -+ d += d2i (g); -+ __builtin_va_end (ap); -+ -+ /* { dg-final { scan-assembler-not "x7" } } */ -+ /* { dg-final { scan-assembler-not "q7" } } */ -+ return d; -+} -+ -+/* { dg-final { cleanup-saved-temps } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/vect-abs-compile.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-abs-compile.c -@@ -1,6 +1,6 @@ - - /* { dg-do compile } */ --/* { dg-options "-O3" } */ -+/* { dg-options "-O3 -fno-vect-cost-model" } */ - - #define N 16 - ---- a/src/gcc/testsuite/gcc.target/aarch64/vect-clz.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-clz.c -@@ -1,5 +1,5 @@ - /* { dg-do run } */ --/* { dg-options "-O3 -save-temps -fno-inline" } */ -+/* { dg-options "-O3 -save-temps -fno-inline -fno-vect-cost-model" } */ - - extern void abort (); - ---- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c -@@ -1,5 +1,5 @@ - /* { dg-do run } */ --/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ -+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline -fno-vect-cost-model" } */ - - #define FTYPE double - #define ITYPE long ---- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c -@@ -1,5 +1,5 @@ - /* { dg-do run } */ --/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ -+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline -fno-vect-cost-model" } */ - - #define FTYPE double - #define ITYPE long ---- a/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c -@@ -1,5 +1,5 @@ - /* { dg-do run } */ --/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */ -+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline -fno-vect-cost-model" } */ - - #define FTYPE double - #define ITYPE long ---- a/src/gcc/testsuite/gcc.target/aarch64/vect-fmovd-zero.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fmovd-zero.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all" } */ -+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-vect-cost-model" } */ - - #define N 32 - ---- a/src/gcc/testsuite/gcc.target/aarch64/vect-fmovd.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fmovd.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all" } */ -+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-vect-cost-model" } */ - - #define N 32 - ---- a/src/gcc/testsuite/gcc.target/aarch64/vect-fmovf-zero.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fmovf-zero.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all" } */ -+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-vect-cost-model" } */ - - #define N 32 - ---- a/src/gcc/testsuite/gcc.target/aarch64/vect-fmovf.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vect-fmovf.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all" } */ -+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-vect-cost-model" } */ - - #define N 32 - ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/vect_copy_lane_1.c -@@ -0,0 +1,86 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O3" } */ -+ -+#include "arm_neon.h" -+ -+#define BUILD_TEST(TYPE1, TYPE2, Q1, Q2, SUFFIX, INDEX1, INDEX2) \ -+TYPE1 __attribute__((noinline,noclone)) \ -+test_copy##Q1##_lane##Q2##_##SUFFIX (TYPE1 a, TYPE2 b) \ -+{ \ -+ return vcopy##Q1##_lane##Q2##_##SUFFIX (a, INDEX1, b, INDEX2); \ -+} -+ -+/* vcopy_lane. */ -+BUILD_TEST (poly8x8_t, poly8x8_t, , , p8, 7, 6) -+BUILD_TEST (int8x8_t, int8x8_t, , , s8, 7, 6) -+BUILD_TEST (uint8x8_t, uint8x8_t, , , u8, 7, 6) -+/* { dg-final { scan-assembler-times "ins\\tv0.b\\\[7\\\], v1.b\\\[6\\\]" 3 } } */ -+BUILD_TEST (poly16x4_t, poly16x4_t, , , p16, 3, 2) -+BUILD_TEST (int16x4_t, int16x4_t, , , s16, 3, 2) -+BUILD_TEST (uint16x4_t, uint16x4_t, , , u16, 3, 2) -+/* { dg-final { scan-assembler-times "ins\\tv0.h\\\[3\\\], v1.h\\\[2\\\]" 3 } } */ -+BUILD_TEST (float32x2_t, float32x2_t, , , f32, 1, 0) -+BUILD_TEST (int32x2_t, int32x2_t, , , s32, 1, 0) -+BUILD_TEST (uint32x2_t, uint32x2_t, , , u32, 1, 0) -+/* { dg-final { scan-assembler-times "ins\\tv0.s\\\[1\\\], v1.s\\\[0\\\]" 3 } } */ -+BUILD_TEST (int64x1_t, int64x1_t, , , s64, 0, 0) -+BUILD_TEST (uint64x1_t, uint64x1_t, , , u64, 0, 0) -+BUILD_TEST (float64x1_t, float64x1_t, , , f64, 0, 0) -+/* { dg-final { scan-assembler-times "fmov\\td0, d1" 3 } } */ -+ -+/* vcopy_laneq. */ -+ -+BUILD_TEST (poly8x8_t, poly8x16_t, , q, p8, 7, 15) -+BUILD_TEST (int8x8_t, int8x16_t, , q, s8, 7, 15) -+BUILD_TEST (uint8x8_t, uint8x16_t, , q, u8, 7, 15) -+/* { dg-final { scan-assembler-times "ins\\tv0.b\\\[7\\\], v1.b\\\[15\\\]" 3 } } */ -+BUILD_TEST (poly16x4_t, poly16x8_t, , q, p16, 3, 7) -+BUILD_TEST (int16x4_t, int16x8_t, , q, s16, 3, 7) -+BUILD_TEST (uint16x4_t, uint16x8_t, , q, u16, 3, 7) -+/* { dg-final { scan-assembler-times "ins\\tv0.h\\\[3\\\], v1.h\\\[7\\\]" 3 } } */ -+BUILD_TEST (float32x2_t, float32x4_t, , q, f32, 1, 3) -+BUILD_TEST (int32x2_t, int32x4_t, , q, s32, 1, 3) -+BUILD_TEST (uint32x2_t, uint32x4_t, , q, u32, 1, 3) -+/* { dg-final { scan-assembler-times "ins\\tv0.s\\\[1\\\], v1.s\\\[3\\\]" 3 } } */ -+BUILD_TEST (float64x1_t, float64x2_t, , q, f64, 0, 1) -+BUILD_TEST (int64x1_t, int64x2_t, , q, s64, 0, 1) -+BUILD_TEST (uint64x1_t, uint64x2_t, , q, u64, 0, 1) -+/* XFAIL due to PR 71307. */ -+/* { dg-final { scan-assembler-times "dup\\td0, v1.d\\\[1\\\]" 3 { xfail *-*-* } } } */ -+ -+/* vcopyq_lane. */ -+BUILD_TEST (poly8x16_t, poly8x8_t, q, , p8, 15, 7) -+BUILD_TEST (int8x16_t, int8x8_t, q, , s8, 15, 7) -+BUILD_TEST (uint8x16_t, uint8x8_t, q, , u8, 15, 7) -+/* { dg-final { scan-assembler-times "ins\\tv0.b\\\[15\\\], v1.b\\\[7\\\]" 3 } } */ -+BUILD_TEST (poly16x8_t, poly16x4_t, q, , p16, 7, 3) -+BUILD_TEST (int16x8_t, int16x4_t, q, , s16, 7, 3) -+BUILD_TEST (uint16x8_t, uint16x4_t, q, , u16, 7, 3) -+/* { dg-final { scan-assembler-times "ins\\tv0.h\\\[7\\\], v1.h\\\[3\\\]" 3 } } */ -+BUILD_TEST (float32x4_t, float32x2_t, q, , f32, 3, 1) -+BUILD_TEST (int32x4_t, int32x2_t, q, , s32, 3, 1) -+BUILD_TEST (uint32x4_t, uint32x2_t, q, , u32, 3, 1) -+/* { dg-final { scan-assembler-times "ins\\tv0.s\\\[3\\\], v1.s\\\[1\\\]" 3 } } */ -+BUILD_TEST (float64x2_t, float64x1_t, q, , f64, 1, 0) -+BUILD_TEST (int64x2_t, int64x1_t, q, , s64, 1, 0) -+BUILD_TEST (uint64x2_t, uint64x1_t, q, , u64, 1, 0) -+/* { dg-final { scan-assembler-times "ins\\tv0.d\\\[1\\\], v1.d\\\[0\\\]" 3 } } */ -+ -+/* vcopyq_laneq. */ -+ -+BUILD_TEST (poly8x16_t, poly8x16_t, q, q, p8, 14, 15) -+BUILD_TEST (int8x16_t, int8x16_t, q, q, s8, 14, 15) -+BUILD_TEST (uint8x16_t, uint8x16_t, q, q, u8, 14, 15) -+/* { dg-final { scan-assembler-times "ins\\tv0.b\\\[14\\\], v1.b\\\[15\\\]" 3 } } */ -+BUILD_TEST (poly16x8_t, poly16x8_t, q, q, p16, 6, 7) -+BUILD_TEST (int16x8_t, int16x8_t, q, q, s16, 6, 7) -+BUILD_TEST (uint16x8_t, uint16x8_t, q, q, u16, 6, 7) -+/* { dg-final { scan-assembler-times "ins\\tv0.h\\\[6\\\], v1.h\\\[7\\\]" 3 } } */ -+BUILD_TEST (float32x4_t, float32x4_t, q, q, f32, 2, 3) -+BUILD_TEST (int32x4_t, int32x4_t, q, q, s32, 2, 3) -+BUILD_TEST (uint32x4_t, uint32x4_t, q, q, u32, 2, 3) -+/* { dg-final { scan-assembler-times "ins\\tv0.s\\\[2\\\], v1.s\\\[3\\\]" 3 } } */ -+BUILD_TEST (float64x2_t, float64x2_t, q, q, f64, 1, 1) -+BUILD_TEST (int64x2_t, int64x2_t, q, q, s64, 1, 1) -+BUILD_TEST (uint64x2_t, uint64x2_t, q, q, u64, 1, 1) -+/* { dg-final { scan-assembler-times "ins\\tv0.d\\\[1\\\], v1.d\\\[1\\\]" 3 } } */ ---- a/src/gcc/testsuite/gcc.target/aarch64/vect_ctz_1.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vect_ctz_1.c -@@ -1,5 +1,5 @@ - /* { dg-do run } */ --/* { dg-options "-O3 -save-temps -fno-inline" } */ -+/* { dg-options "-O3 -save-temps -fno-inline -fno-vect-cost-model" } */ - - extern void abort (); - ---- a/src/gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c -+++ b/src/gcc/testsuite/gcc.target/aarch64/vector_initialization_nostack.c -@@ -38,14 +38,14 @@ f11 (void) - return sum; - } - --char arr_c[100][100]; -+char arr_c[100]; - char - f12 (void) - { - int i; - char sum = 0; - for (i = 0; i < 100; i++) -- sum += arr_c[i][0] * arr_c[0][i]; -+ sum += arr_c[i] * arr_c[i]; - return sum; - } - ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/vget_set_lane_1.c -@@ -0,0 +1,72 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include "arm_neon.h" -+ -+#define BUILD_TEST(TYPE1, TYPE2, Q1, Q2, SUFFIX, INDEX1, INDEX2) \ -+TYPE1 __attribute__((noinline,noclone)) \ -+test_copy##Q1##_lane##Q2##_##SUFFIX (TYPE1 a, TYPE2 b) \ -+{ \ -+ return vset##Q1##_lane_##SUFFIX (vget##Q2##_lane_##SUFFIX (b, INDEX2),\ -+ a, INDEX1); \ -+} -+ -+BUILD_TEST (poly8x8_t, poly8x8_t, , , p8, 7, 6) -+BUILD_TEST (int8x8_t, int8x8_t, , , s8, 7, 6) -+BUILD_TEST (uint8x8_t, uint8x8_t, , , u8, 7, 6) -+/* { dg-final { scan-assembler-times "ins\\tv0.b\\\[7\\\], v1.b\\\[6\\\]" 3 } } */ -+BUILD_TEST (poly16x4_t, poly16x4_t, , , p16, 3, 2) -+BUILD_TEST (int16x4_t, int16x4_t, , , s16, 3, 2) -+BUILD_TEST (uint16x4_t, uint16x4_t, , , u16, 3, 2) -+/* { dg-final { scan-assembler-times "ins\\tv0.h\\\[3\\\], v1.h\\\[2\\\]" 3 } } */ -+BUILD_TEST (float32x2_t, float32x2_t, , , f32, 1, 0) -+BUILD_TEST (int32x2_t, int32x2_t, , , s32, 1, 0) -+BUILD_TEST (uint32x2_t, uint32x2_t, , , u32, 1, 0) -+/* { dg-final { scan-assembler-times "ins\\tv0.s\\\[1\\\], v1.s\\\[0\\\]" 3 } } */ -+ -+BUILD_TEST (poly8x8_t, poly8x16_t, , q, p8, 7, 15) -+BUILD_TEST (int8x8_t, int8x16_t, , q, s8, 7, 15) -+BUILD_TEST (uint8x8_t, uint8x16_t, , q, u8, 7, 15) -+/* { dg-final { scan-assembler-times "ins\\tv0.b\\\[7\\\], v1.b\\\[15\\\]" 3 } } */ -+BUILD_TEST (poly16x4_t, poly16x8_t, , q, p16, 3, 7) -+BUILD_TEST (int16x4_t, int16x8_t, , q, s16, 3, 7) -+BUILD_TEST (uint16x4_t, uint16x8_t, , q, u16, 3, 7) -+/* { dg-final { scan-assembler-times "ins\\tv0.h\\\[3\\\], v1.h\\\[7\\\]" 3 } } */ -+BUILD_TEST (float32x2_t, float32x4_t, , q, f32, 1, 3) -+BUILD_TEST (int32x2_t, int32x4_t, , q, s32, 1, 3) -+BUILD_TEST (uint32x2_t, uint32x4_t, , q, u32, 1, 3) -+/* { dg-final { scan-assembler-times "ins\\tv0.s\\\[1\\\], v1.s\\\[3\\\]" 3 } } */ -+ -+BUILD_TEST (poly8x16_t, poly8x8_t, q, , p8, 15, 7) -+BUILD_TEST (int8x16_t, int8x8_t, q, , s8, 15, 7) -+BUILD_TEST (uint8x16_t, uint8x8_t, q, , u8, 15, 7) -+/* { dg-final { scan-assembler-times "ins\\tv0.b\\\[15\\\], v1.b\\\[7\\\]" 3 } } */ -+BUILD_TEST (poly16x8_t, poly16x4_t, q, , p16, 7, 3) -+BUILD_TEST (int16x8_t, int16x4_t, q, , s16, 7, 3) -+BUILD_TEST (uint16x8_t, uint16x4_t, q, , u16, 7, 3) -+/* { dg-final { scan-assembler-times "ins\\tv0.h\\\[7\\\], v1.h\\\[3\\\]" 3 } } */ -+BUILD_TEST (float32x4_t, float32x2_t, q, , f32, 3, 1) -+BUILD_TEST (int32x4_t, int32x2_t, q, , s32, 3, 1) -+BUILD_TEST (uint32x4_t, uint32x2_t, q, , u32, 3, 1) -+/* { dg-final { scan-assembler-times "ins\\tv0.s\\\[3\\\], v1.s\\\[1\\\]" 3 } } */ -+BUILD_TEST (float64x2_t, float64x1_t, q, , f64, 1, 0) -+BUILD_TEST (int64x2_t, int64x1_t, q, , s64, 1, 0) -+BUILD_TEST (uint64x2_t, uint64x1_t, q, , u64, 1, 0) -+/* { dg-final { scan-assembler-times "ins\\tv0.d\\\[1\\\], v1.d\\\[0\\\]" 3 } } */ -+ -+BUILD_TEST (poly8x16_t, poly8x16_t, q, q, p8, 14, 15) -+BUILD_TEST (int8x16_t, int8x16_t, q, q, s8, 14, 15) -+BUILD_TEST (uint8x16_t, uint8x16_t, q, q, u8, 14, 15) -+/* { dg-final { scan-assembler-times "ins\\tv0.b\\\[14\\\], v1.b\\\[15\\\]" 3 } } */ -+BUILD_TEST (poly16x8_t, poly16x8_t, q, q, p16, 6, 7) -+BUILD_TEST (int16x8_t, int16x8_t, q, q, s16, 6, 7) -+BUILD_TEST (uint16x8_t, uint16x8_t, q, q, u16, 6, 7) -+/* { dg-final { scan-assembler-times "ins\\tv0.h\\\[6\\\], v1.h\\\[7\\\]" 3 } } */ -+BUILD_TEST (float32x4_t, float32x4_t, q, q, f32, 2, 3) -+BUILD_TEST (int32x4_t, int32x4_t, q, q, s32, 2, 3) -+BUILD_TEST (uint32x4_t, uint32x4_t, q, q, u32, 2, 3) -+/* { dg-final { scan-assembler-times "ins\\tv0.s\\\[2\\\], v1.s\\\[3\\\]" 3 } } */ -+BUILD_TEST (float64x2_t, float64x2_t, q, q, f64, 1, 1) -+BUILD_TEST (int64x2_t, int64x2_t, q, q, s64, 1, 1) -+BUILD_TEST (uint64x2_t, uint64x2_t, q, q, u64, 1, 1) -+/* { dg-final { scan-assembler-times "ins\\tv0.d\\\[1\\\], v1.d\\\[1\\\]" 3 } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/aarch64/vminmaxnm.c -@@ -0,0 +1,37 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+ -+#include "arm_neon.h" -+ -+/* For each of these intrinsics, we map directly to an unspec in RTL. -+ We're just using the argument directly and returning the result, so we -+ can precisely specify the exact instruction pattern and register -+ allocations we expect. */ -+ -+float64x1_t -+test_vmaxnm_f64 (float64x1_t a, float64x1_t b) -+{ -+ /* { dg-final { scan-assembler-times "fmaxnm\td0, d0, d1" 1 } } */ -+ return vmaxnm_f64 (a, b); -+} -+ -+float64x1_t -+test_vminnm_f64 (float64x1_t a, float64x1_t b) -+{ -+ /* { dg-final { scan-assembler-times "fminnm\td0, d0, d1" 1 } } */ -+ return vminnm_f64 (a, b); -+} -+ -+float64x1_t -+test_vmax_f64 (float64x1_t a, float64x1_t b) -+{ -+ /* { dg-final { scan-assembler-times "fmax\td0, d0, d1" 1 } } */ -+ return vmax_f64 (a, b); -+} -+ -+float64x1_t -+test_vmin_f64 (float64x1_t a, float64x1_t b) -+{ -+ /* { dg-final { scan-assembler-times "fmin\td0, d0, d1" 1 } } */ -+ return vmin_f64 (a, b); -+} -\ No newline at end of file ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect10.c -@@ -0,0 +1,32 @@ -+/* Test AAPCS layout (VFP variant for Neon types) */ -+ -+/* { dg-do run { target arm_eabi } } */ -+/* { dg-require-effective-target arm_hard_vfp_ok } */ -+/* { dg-require-effective-target arm_neon_fp16_hw } */ -+/* { dg-add-options arm_neon_fp16 } */ -+ -+#ifndef IN_FRAMEWORK -+#define VFP -+#define NEON -+#define TESTFILE "neon-vect10.c" -+#include "neon-constants.h" -+ -+#include "abitest.h" -+#else -+ -+ARG (int32x4_t, i32x4_constvec2, Q0) /* D0, D1. */ -+#if defined (__ARM_BIG_ENDIAN) -+ARG (__fp16, 3.0f, S4 + 2) /* D2, Q1. */ -+#else -+ARG (__fp16, 3.0f, S4) /* D2, Q1. */ -+#endif -+ARG (int32x4x2_t, i32x4x2_constvec1, Q2) /* Q2, Q3 - D4-D6 , s5-s12. */ -+ARG (double, 12.0, D3) /* Backfill this particular argument. */ -+#if defined (__ARM_BIG_ENDIAN) -+ARG (__fp16, 5.0f, S5 + 2) /* Backfill in S5. */ -+#else -+ARG (__fp16, 5.0f, S5) /* Backfill in S5. */ -+#endif -+ARG (int32x4x2_t, i32x4x2_constvec2, STACK) -+LAST_ARG (int, 3, R0) -+#endif ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/neon-vect9.c -@@ -0,0 +1,24 @@ -+/* Test AAPCS layout (VFP variant for Neon types) */ -+ -+/* { dg-do run { target arm_eabi } } */ -+/* { dg-require-effective-target arm_hard_vfp_ok } */ -+/* { dg-require-effective-target arm_neon_fp16_hw } */ -+/* { dg-add-options arm_neon_fp16 } */ -+ -+#ifndef IN_FRAMEWORK -+#define VFP -+#define NEON -+#define TESTFILE "neon-vect9.c" -+#include "neon-constants.h" -+ -+#include "abitest.h" -+#else -+ -+ARG (int32x4_t, i32x4_constvec2, Q0) /* D0, D1. */ -+#if defined (__ARM_BIG_ENDIAN) -+ARG (__fp16, 3.0f, S4 + 2) /* D2, Q1 occupied. */ -+#else -+ARG (__fp16, 3.0f, S4) /* D2, Q1 occupied. */ -+#endif -+LAST_ARG (int, 3, R0) -+#endif ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp18.c -@@ -0,0 +1,28 @@ -+/* Test AAPCS layout (VFP variant) */ -+ -+/* { dg-do run { target arm_eabi } } */ -+/* { dg-require-effective-target arm_hard_vfp_ok } */ -+/* { dg-require-effective-target arm_fp16_hw } */ -+/* { dg-add-options arm_fp16_ieee } */ -+ -+#ifndef IN_FRAMEWORK -+#define VFP -+#define TESTFILE "vfp18.c" -+#include "abitest.h" -+ -+#else -+#if defined (__ARM_BIG_ENDIAN) -+ARG (__fp16, 1.0f, S0 + 2) -+#else -+ARG (__fp16, 1.0f, S0) -+#endif -+ARG (float, 2.0f, S1) -+ARG (double, 4.0, D1) -+ARG (float, 2.0f, S4) -+#if defined (__ARM_BIG_ENDIAN) -+ARG (__fp16, 1.0f, S5 + 2) -+#else -+ARG (__fp16, 1.0f, S5) -+#endif -+LAST_ARG (int, 3, R0) -+#endif ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp19.c -@@ -0,0 +1,30 @@ -+/* Test AAPCS layout (VFP variant) */ -+ -+/* { dg-do run { target arm_eabi } } */ -+/* { dg-require-effective-target arm_hard_vfp_ok } */ -+/* { dg-require-effective-target arm_fp16_hw } */ -+/* { dg-add-options arm_fp16_ieee } */ -+ -+#ifndef IN_FRAMEWORK -+#define VFP -+#define TESTFILE "vfp19.c" -+ -+__complex__ x = 1.0+2.0i; -+ -+#include "abitest.h" -+#else -+#if defined (__ARM_BIG_ENDIAN) -+ARG (__fp16, 1.0f, S0 + 2) -+#else -+ARG (__fp16, 1.0f, S0) -+#endif -+ARG (float, 2.0f, S1) -+ARG (__complex__ double, x, D1) -+ARG (float, 3.0f, S6) -+#if defined (__ARM_BIG_ENDIAN) -+ARG (__fp16, 2.0f, S7 + 2) -+#else -+ARG (__fp16, 2.0f, S7) -+#endif -+LAST_ARG (int, 3, R0) -+#endif ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp20.c -@@ -0,0 +1,22 @@ -+/* Test AAPCS layout (VFP variant) */ -+ -+/* { dg-do run { target arm_eabi } } */ -+/* { dg-require-effective-target arm_hard_vfp_ok } */ -+/* { dg-require-effective-target arm_fp16_hw } */ -+/* { dg-add-options arm_fp16_ieee } */ -+ -+#ifndef IN_FRAMEWORK -+#define VFP -+#define TESTFILE "vfp20.c" -+ -+#define PCSATTR __attribute__((pcs("aapcs"))) -+ -+#include "abitest.h" -+#else -+ARG (float, 1.0f, R0) -+ARG (double, 2.0, R2) -+ARG (float, 3.0f, STACK) -+ARG (__fp16, 2.0f, STACK+4) -+LAST_ARG (double, 4.0, STACK+8) -+#endif -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp21.c -@@ -0,0 +1,26 @@ -+/* Test AAPCS layout (VFP variant) */ -+ -+/* { dg-do run { target arm_eabi } } */ -+/* { dg-require-effective-target arm_hard_vfp_ok } */ -+/* { dg-require-effective-target arm_fp16_hw } */ -+/* { dg-add-options arm_fp16_ieee } */ -+ -+#ifndef IN_FRAMEWORK -+#define VFP -+#define TESTFILE "vfp21.c" -+ -+#define PCSATTR __attribute__((pcs("aapcs"))) -+ -+#include "abitest.h" -+#else -+#if defined (__ARM_BIG_ENDIAN) -+ARG (__fp16, 1.0f, R0 + 2) -+#else -+ARG (__fp16, 1.0f, R0) -+#endif -+ARG (double, 2.0, R2) -+ARG (__fp16, 3.0f, STACK) -+ARG (float, 2.0f, STACK+4) -+LAST_ARG (double, 4.0, STACK+8) -+#endif -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp22.c -@@ -0,0 +1,28 @@ -+/* Test AAPCS layout (VFP variant) */ -+ -+/* { dg-do run { target arm_eabi } } */ -+/* { dg-require-effective-target arm_hard_vfp_ok } */ -+/* { dg-require-effective-target arm_fp16_hw } */ -+/* { dg-add-options arm_fp16_alternative } */ -+ -+#ifndef IN_FRAMEWORK -+#define VFP -+#define TESTFILE "vfp22.c" -+#include "abitest.h" -+ -+#else -+#if defined (__ARM_BIG_ENDIAN) -+ARG (__fp16, 1.0f, S0 + 2) -+#else -+ARG (__fp16, 1.0f, S0) -+#endif -+ARG (float, 2.0f, S1) -+ARG (double, 4.0, D1) -+ARG (float, 2.0f, S4) -+#if defined (__ARM_BIG_ENDIAN) -+ARG (__fp16, 1.0f, S5 + 2) -+#else -+ARG (__fp16, 1.0f, S5) -+#endif -+LAST_ARG (int, 3, R0) -+#endif ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp23.c -@@ -0,0 +1,30 @@ -+/* Test AAPCS layout (VFP variant) */ -+ -+/* { dg-do run { target arm_eabi } } */ -+/* { dg-require-effective-target arm_hard_vfp_ok } */ -+/* { dg-require-effective-target arm_fp16_hw } */ -+/* { dg-add-options arm_fp16_alternative } */ -+ -+#ifndef IN_FRAMEWORK -+#define VFP -+#define TESTFILE "vfp23.c" -+ -+__complex__ x = 1.0+2.0i; -+ -+#include "abitest.h" -+#else -+#if defined (__ARM_BIG_ENDIAN) -+ARG (__fp16, 1.0f, S0 + 2) -+#else -+ARG (__fp16, 1.0f, S0) -+#endif -+ARG (float, 2.0f, S1) -+ARG (__complex__ double, x, D1) -+ARG (float, 3.0f, S6) -+#if defined (__ARM_BIG_ENDIAN) -+ARG (__fp16, 2.0f, S7 + 2) -+#else -+ARG (__fp16, 2.0f, S7) -+#endif -+LAST_ARG (int, 3, R0) -+#endif ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp24.c -@@ -0,0 +1,21 @@ -+/* Test AAPCS layout (VFP variant) */ -+ -+/* { dg-do run { target arm_eabi } } */ -+/* { dg-require-effective-target arm_hard_vfp_ok } */ -+/* { dg-require-effective-target arm_fp16_hw } */ -+/* { dg-add-options arm_fp16_alternative } */ -+ -+#ifndef IN_FRAMEWORK -+#define VFP -+#define TESTFILE "vfp24.c" -+ -+#define PCSATTR __attribute__((pcs("aapcs"))) -+ -+#include "abitest.h" -+#else -+ARG (float, 1.0f, R0) -+ARG (double, 2.0, R2) -+ARG (float, 3.0f, STACK) -+ARG (__fp16, 2.0f, STACK+4) -+LAST_ARG (double, 4.0, STACK+8) -+#endif ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/aapcs/vfp25.c -@@ -0,0 +1,25 @@ -+/* Test AAPCS layout (VFP variant) */ -+ -+/* { dg-do run { target arm_eabi } } */ -+/* { dg-require-effective-target arm_hard_vfp_ok } */ -+/* { dg-require-effective-target arm_fp16_hw } */ -+/* { dg-add-options arm_fp16_alternative } */ -+ -+#ifndef IN_FRAMEWORK -+#define VFP -+#define TESTFILE "vfp25.c" -+ -+#define PCSATTR __attribute__((pcs("aapcs"))) -+ -+#include "abitest.h" -+#else -+#if defined (__ARM_BIG_ENDIAN) -+ARG (__fp16, 1.0f, R0 + 2) -+#else -+ARG (__fp16, 1.0f, R0) -+#endif -+ARG (double, 2.0, R2) -+ARG (__fp16, 3.0f, STACK) -+ARG (float, 2.0f, STACK+4) -+LAST_ARG (double, 4.0, STACK+8) -+#endif ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/armv5_thumb_isa.c -@@ -0,0 +1,8 @@ -+/* { dg-require-effective-target arm_arch_v5_ok } */ -+/* { dg-add-options arm_arch_v5 } */ -+ -+#if __ARM_ARCH_ISA_THUMB -+#error "__ARM_ARCH_ISA_THUMB defined for ARMv5" -+#endif -+ -+int foo; ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/armv8_2-fp16-arith-1.c -@@ -0,0 +1,105 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_ok } */ -+/* { dg-options "-O2 -ffast-math" } */ -+/* { dg-add-options arm_v8_2a_fp16_neon } */ -+ -+/* Test instructions generated for half-precision arithmetic. */ -+ -+typedef __fp16 float16_t; -+typedef __simd64_float16_t float16x4_t; -+typedef __simd128_float16_t float16x8_t; -+ -+typedef short int16x4_t __attribute__ ((vector_size (8))); -+typedef short int int16x8_t __attribute__ ((vector_size (16))); -+ -+float16_t -+fp16_abs (float16_t a) -+{ -+ return (a < 0) ? -a : a; -+} -+ -+#define TEST_UNOP(NAME, OPERATOR, TY) \ -+ TY test_##NAME##_##TY (TY a) \ -+ { \ -+ return OPERATOR (a); \ -+ } -+ -+#define TEST_BINOP(NAME, OPERATOR, TY) \ -+ TY test_##NAME##_##TY (TY a, TY b) \ -+ { \ -+ return a OPERATOR b; \ -+ } -+ -+#define TEST_CMP(NAME, OPERATOR, RTY, TY) \ -+ RTY test_##NAME##_##TY (TY a, TY b) \ -+ { \ -+ return a OPERATOR b; \ -+ } -+ -+/* Scalars. */ -+ -+TEST_UNOP (neg, -, float16_t) -+TEST_UNOP (abs, fp16_abs, float16_t) -+ -+TEST_BINOP (add, +, float16_t) -+TEST_BINOP (sub, -, float16_t) -+TEST_BINOP (mult, *, float16_t) -+TEST_BINOP (div, /, float16_t) -+ -+TEST_CMP (equal, ==, int, float16_t) -+TEST_CMP (unequal, !=, int, float16_t) -+TEST_CMP (lessthan, <, int, float16_t) -+TEST_CMP (greaterthan, >, int, float16_t) -+TEST_CMP (lessthanequal, <=, int, float16_t) -+TEST_CMP (greaterthanqual, >=, int, float16_t) -+ -+/* Vectors of size 4. */ -+ -+TEST_UNOP (neg, -, float16x4_t) -+ -+TEST_BINOP (add, +, float16x4_t) -+TEST_BINOP (sub, -, float16x4_t) -+TEST_BINOP (mult, *, float16x4_t) -+TEST_BINOP (div, /, float16x4_t) -+ -+TEST_CMP (equal, ==, int16x4_t, float16x4_t) -+TEST_CMP (unequal, !=, int16x4_t, float16x4_t) -+TEST_CMP (lessthan, <, int16x4_t, float16x4_t) -+TEST_CMP (greaterthan, >, int16x4_t, float16x4_t) -+TEST_CMP (lessthanequal, <=, int16x4_t, float16x4_t) -+TEST_CMP (greaterthanqual, >=, int16x4_t, float16x4_t) -+ -+/* Vectors of size 8. */ -+ -+TEST_UNOP (neg, -, float16x8_t) -+ -+TEST_BINOP (add, +, float16x8_t) -+TEST_BINOP (sub, -, float16x8_t) -+TEST_BINOP (mult, *, float16x8_t) -+TEST_BINOP (div, /, float16x8_t) -+ -+TEST_CMP (equal, ==, int16x8_t, float16x8_t) -+TEST_CMP (unequal, !=, int16x8_t, float16x8_t) -+TEST_CMP (lessthan, <, int16x8_t, float16x8_t) -+TEST_CMP (greaterthan, >, int16x8_t, float16x8_t) -+TEST_CMP (lessthanequal, <=, int16x8_t, float16x8_t) -+TEST_CMP (greaterthanqual, >=, int16x8_t, float16x8_t) -+ -+/* { dg-final { scan-assembler-times {vneg\.f16\ts[0-9]+, s[0-9]+} 1 } } */ -+/* { dg-final { scan-assembler-times {vneg\.f16\td[0-9]+, d[0-9]+} 1 } } */ -+/* { dg-final { scan-assembler-times {vneg\.f16\tq[0-9]+, q[0-9]+} 1 } } */ -+/* { dg-final { scan-assembler-times {vabs\.f16\ts[0-9]+, s[0-9]+} 2 } } */ -+ -+/* { dg-final { scan-assembler-times {vadd\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */ -+/* { dg-final { scan-assembler-times {vsub\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */ -+/* { dg-final { scan-assembler-times {vmul\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */ -+/* { dg-final { scan-assembler-times {vdiv\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 13 } } */ -+/* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 26 } } */ -+/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, s[0-9]+} 52 } } */ -+ -+/* { dg-final { scan-assembler-not {vadd\.f32} } } */ -+/* { dg-final { scan-assembler-not {vsub\.f32} } } */ -+/* { dg-final { scan-assembler-not {vmul\.f32} } } */ -+/* { dg-final { scan-assembler-not {vdiv\.f32} } } */ -+/* { dg-final { scan-assembler-not {vcmp\.f16} } } */ -+/* { dg-final { scan-assembler-not {vcmpe\.f16} } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/armv8_2-fp16-conv-1.c -@@ -0,0 +1,101 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+/* Test ARMv8.2 FP16 conversions. */ -+#include <arm_fp16.h> -+ -+float -+f16_to_f32 (__fp16 a) -+{ -+ return (float)a; -+} -+ -+float -+f16_to_pf32 (__fp16* a) -+{ -+ return (float)*a; -+} -+ -+short -+f16_to_s16 (__fp16 a) -+{ -+ return (short)a; -+} -+ -+short -+pf16_to_s16 (__fp16* a) -+{ -+ return (short)*a; -+} -+ -+/* { dg-final { scan-assembler-times {vcvtb\.f32\.f16\ts[0-9]+, s[0-9]+} 4 } } */ -+ -+__fp16 -+f32_to_f16 (float a) -+{ -+ return (__fp16)a; -+} -+ -+void -+f32_to_pf16 (__fp16* x, float a) -+{ -+ *x = (__fp16)a; -+} -+ -+__fp16 -+s16_to_f16 (short a) -+{ -+ return (__fp16)a; -+} -+ -+void -+s16_to_pf16 (__fp16* x, short a) -+{ -+ *x = (__fp16)a; -+} -+ -+/* { dg-final { scan-assembler-times {vcvtb\.f16\.f32\ts[0-9]+, s[0-9]+} 4 } } */ -+ -+float -+s16_to_f32 (short a) -+{ -+ return (float)a; -+} -+ -+/* { dg-final { scan-assembler-times {vcvt\.f32\.s32\ts[0-9]+, s[0-9]+} 3 } } */ -+ -+short -+f32_to_s16 (float a) -+{ -+ return (short)a; -+} -+ -+/* { dg-final { scan-assembler-times {vcvt\.s32\.f32\ts[0-9]+, s[0-9]+} 3 } } */ -+ -+unsigned short -+f32_to_u16 (float a) -+{ -+ return (unsigned short)a; -+} -+ -+/* { dg-final { scan-assembler-times {vcvt\.u32\.f32\ts[0-9]+, s[0-9]+} 1 } } */ -+ -+short -+f64_to_s16 (double a) -+{ -+ return (short)a; -+} -+ -+/* { dg-final { scan-assembler-times {vcvt\.s32\.f64\ts[0-9]+, d[0-9]+} 1 } } */ -+ -+unsigned short -+f64_to_u16 (double a) -+{ -+ return (unsigned short)a; -+} -+ -+/* { dg-final { scan-assembler-times {vcvt\.s32\.f64\ts[0-9]+, d[0-9]+} 1 } } */ -+ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c -@@ -0,0 +1,165 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+__fp16 -+test_load_1 (__fp16* a) -+{ -+ return *a; -+} -+ -+__fp16 -+test_load_2 (__fp16* a, int i) -+{ -+ return a[i]; -+} -+ -+/* { dg-final { scan-assembler-times {vld1\.16\t\{d[0-9]+\[[0-9]+\]\}, \[r[0-9]+\]} 2 } } */ -+ -+void -+test_store_1 (__fp16* a, __fp16 b) -+{ -+ *a = b; -+} -+ -+void -+test_store_2 (__fp16* a, int i, __fp16 b) -+{ -+ a[i] = b; -+} -+ -+/* { dg-final { scan-assembler-times {vst1\.16\t\{d[0-9]+\[[0-9]+\]\}, \[r[0-9]+\]} 2 } } */ -+ -+__fp16 -+test_load_store_1 (__fp16* a, int i, __fp16* b) -+{ -+ a[i] = b[i]; -+} -+ -+__fp16 -+test_load_store_2 (__fp16* a, int i, __fp16* b) -+{ -+ a[i] = b[i + 2]; -+ return a[i]; -+} -+/* { dg-final { scan-assembler-times {ldrh\tr[0-9]+} 2 } } */ -+/* { dg-final { scan-assembler-times {strh\tr[0-9]+} 2 } } */ -+ -+__fp16 -+test_select_1 (int sel, __fp16 a, __fp16 b) -+{ -+ if (sel) -+ return a; -+ else -+ return b; -+} -+ -+__fp16 -+test_select_2 (int sel, __fp16 a, __fp16 b) -+{ -+ return sel ? a : b; -+} -+ -+__fp16 -+test_select_3 (__fp16 a, __fp16 b, __fp16 c) -+{ -+ return (a == b) ? b : c; -+} -+ -+__fp16 -+test_select_4 (__fp16 a, __fp16 b, __fp16 c) -+{ -+ return (a != b) ? b : c; -+} -+ -+__fp16 -+test_select_5 (__fp16 a, __fp16 b, __fp16 c) -+{ -+ return (a < b) ? b : c; -+} -+ -+__fp16 -+test_select_6 (__fp16 a, __fp16 b, __fp16 c) -+{ -+ return (a <= b) ? b : c; -+} -+ -+__fp16 -+test_select_7 (__fp16 a, __fp16 b, __fp16 c) -+{ -+ return (a > b) ? b : c; -+} -+ -+__fp16 -+test_select_8 (__fp16 a, __fp16 b, __fp16 c) -+{ -+ return (a >= b) ? b : c; -+} -+ -+/* { dg-final { scan-assembler-times {vseleq\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 4 } } */ -+/* { dg-final { scan-assembler-times {vselgt\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ -+/* { dg-final { scan-assembler-times {vselge\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ -+ -+/* { dg-final { scan-assembler-times {vmov\.f16\ts[0-9]+, r[0-9]+} 4 } } */ -+/* { dg-final { scan-assembler-times {vmov\.f16\tr[0-9]+, s[0-9]+} 4 } } */ -+ -+int -+test_compare_1 (__fp16 a, __fp16 b) -+{ -+ if (a == b) -+ return -1; -+ else -+ return 0; -+} -+ -+int -+test_compare_ (__fp16 a, __fp16 b) -+{ -+ if (a != b) -+ return -1; -+ else -+ return 0; -+} -+ -+int -+test_compare_2 (__fp16 a, __fp16 b) -+{ -+ if (a > b) -+ return -1; -+ else -+ return 0; -+} -+ -+int -+test_compare_3 (__fp16 a, __fp16 b) -+{ -+ if (a >= b) -+ return -1; -+ else -+ return 0; -+} -+ -+int -+test_compare_4 (__fp16 a, __fp16 b) -+{ -+ if (a < b) -+ return -1; -+ else -+ return 0; -+} -+ -+int -+test_compare_5 (__fp16 a, __fp16 b) -+{ -+ if (a <= b) -+ return -1; -+ else -+ return 0; -+} -+ -+/* { dg-final { scan-assembler-not {vcmp\.f16} } } */ -+/* { dg-final { scan-assembler-not {vcmpe\.f16} } } */ -+ -+/* { dg-final { scan-assembler-times {vcmp\.f32} 4 } } */ -+/* { dg-final { scan-assembler-times {vcmpe\.f32} 8 } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/armv8_2-fp16-neon-1.c -@@ -0,0 +1,490 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_v8_2a_fp16_neon } */ -+ -+/* Test instructions generated for the FP16 vector intrinsics. */ -+ -+#include <arm_neon.h> -+ -+#define MSTRCAT(L, str) L##str -+ -+#define UNOP_TEST(insn) \ -+ float16x4_t \ -+ MSTRCAT (test_##insn, _16x4) (float16x4_t a) \ -+ { \ -+ return MSTRCAT (insn, _f16) (a); \ -+ } \ -+ float16x8_t \ -+ MSTRCAT (test_##insn, _16x8) (float16x8_t a) \ -+ { \ -+ return MSTRCAT (insn, q_f16) (a); \ -+ } -+ -+#define BINOP_TEST(insn) \ -+ float16x4_t \ -+ MSTRCAT (test_##insn, _16x4) (float16x4_t a, float16x4_t b) \ -+ { \ -+ return MSTRCAT (insn, _f16) (a, b); \ -+ } \ -+ float16x8_t \ -+ MSTRCAT (test_##insn, _16x8) (float16x8_t a, float16x8_t b) \ -+ { \ -+ return MSTRCAT (insn, q_f16) (a, b); \ -+ } -+ -+#define BINOP_LANE_TEST(insn, I) \ -+ float16x4_t \ -+ MSTRCAT (test_##insn##_lane, _16x4) (float16x4_t a, float16x4_t b) \ -+ { \ -+ return MSTRCAT (insn, _lane_f16) (a, b, I); \ -+ } \ -+ float16x8_t \ -+ MSTRCAT (test_##insn##_lane, _16x8) (float16x8_t a, float16x4_t b) \ -+ { \ -+ return MSTRCAT (insn, q_lane_f16) (a, b, I); \ -+ } -+ -+#define BINOP_LANEQ_TEST(insn, I) \ -+ float16x4_t \ -+ MSTRCAT (test_##insn##_laneq, _16x4) (float16x4_t a, float16x8_t b) \ -+ { \ -+ return MSTRCAT (insn, _laneq_f16) (a, b, I); \ -+ } \ -+ float16x8_t \ -+ MSTRCAT (test_##insn##_laneq, _16x8) (float16x8_t a, float16x8_t b) \ -+ { \ -+ return MSTRCAT (insn, q_laneq_f16) (a, b, I); \ -+ } \ -+ -+#define BINOP_N_TEST(insn) \ -+ float16x4_t \ -+ MSTRCAT (test_##insn##_n, _16x4) (float16x4_t a, float16_t b) \ -+ { \ -+ return MSTRCAT (insn, _n_f16) (a, b); \ -+ } \ -+ float16x8_t \ -+ MSTRCAT (test_##insn##_n, _16x8) (float16x8_t a, float16_t b) \ -+ { \ -+ return MSTRCAT (insn, q_n_f16) (a, b); \ -+ } -+ -+#define TERNOP_TEST(insn) \ -+ float16_t \ -+ MSTRCAT (test_##insn, _16) (float16_t a, float16_t b, float16_t c) \ -+ { \ -+ return MSTRCAT (insn, h_f16) (a, b, c); \ -+ } \ -+ float16x4_t \ -+ MSTRCAT (test_##insn, _16x4) (float16x4_t a, float16x4_t b, \ -+ float16x4_t c) \ -+ { \ -+ return MSTRCAT (insn, _f16) (a, b, c); \ -+ } \ -+ float16x8_t \ -+ MSTRCAT (test_##insn, _16x8) (float16x8_t a, float16x8_t b, \ -+ float16x8_t c) \ -+ { \ -+ return MSTRCAT (insn, q_f16) (a, b, c); \ -+ } -+ -+#define VCMP1_TEST(insn) \ -+ uint16x4_t \ -+ MSTRCAT (test_##insn, _16x4) (float16x4_t a) \ -+ { \ -+ return MSTRCAT (insn, _f16) (a); \ -+ } \ -+ uint16x8_t \ -+ MSTRCAT (test_##insn, _16x8) (float16x8_t a) \ -+ { \ -+ return MSTRCAT (insn, q_f16) (a); \ -+ } -+ -+#define VCMP2_TEST(insn) \ -+ uint16x4_t \ -+ MSTRCAT (test_##insn, _16x4) (float16x4_t a, float16x4_t b) \ -+ { \ -+ return MSTRCAT (insn, _f16) (a, b); \ -+ } \ -+ uint16x8_t \ -+ MSTRCAT (test_##insn, _16x8) (float16x8_t a, float16x8_t b) \ -+ { \ -+ return MSTRCAT (insn, q_f16) (a, b); \ -+ } -+ -+#define VCVT_TEST(insn, TY, TO, FR) \ -+ MSTRCAT (TO, 16x4_t) \ -+ MSTRCAT (test_##insn, TY) (MSTRCAT (FR, 16x4_t) a) \ -+ { \ -+ return MSTRCAT (insn, TY) (a); \ -+ } \ -+ MSTRCAT (TO, 16x8_t) \ -+ MSTRCAT (test_##insn##_q, TY) (MSTRCAT (FR, 16x8_t) a) \ -+ { \ -+ return MSTRCAT (insn, q##TY) (a); \ -+ } -+ -+#define VCVT_N_TEST(insn, TY, TO, FR) \ -+ MSTRCAT (TO, 16x4_t) \ -+ MSTRCAT (test_##insn##_n, TY) (MSTRCAT (FR, 16x4_t) a) \ -+ { \ -+ return MSTRCAT (insn, _n##TY) (a, 1); \ -+ } \ -+ MSTRCAT (TO, 16x8_t) \ -+ MSTRCAT (test_##insn##_n_q, TY) (MSTRCAT (FR, 16x8_t) a) \ -+ { \ -+ return MSTRCAT (insn, q_n##TY) (a, 1); \ -+ } -+ -+VCMP1_TEST (vceqz) -+/* { dg-final { scan-assembler-times {vceq\.f16\td[0-9]+, d[0-0]+, #0} 1 } } */ -+/* { dg-final { scan-assembler-times {vceq\.f16\tq[0-9]+, q[0-9]+, #0} 1 } } */ -+ -+VCMP1_TEST (vcgtz) -+/* { dg-final { scan-assembler-times {vcgt\.f16\td[0-9]+, d[0-9]+, #0} 1 } } */ -+/* { dg-final { scan-assembler-times {vceq\.f16\tq[0-9]+, q[0-9]+, #0} 1 } } */ -+ -+VCMP1_TEST (vcgez) -+/* { dg-final { scan-assembler-times {vcge\.f16\td[0-9]+, d[0-9]+, #0} 1 } } */ -+/* { dg-final { scan-assembler-times {vcge\.f16\tq[0-9]+, q[0-9]+, #0} 1 } } */ -+ -+VCMP1_TEST (vcltz) -+/* { dg-final { scan-assembler-times {vclt.f16\td[0-9]+, d[0-9]+, #0} 1 } } */ -+/* { dg-final { scan-assembler-times {vclt.f16\tq[0-9]+, q[0-9]+, #0} 1 } } */ -+ -+VCMP1_TEST (vclez) -+/* { dg-final { scan-assembler-times {vcle\.f16\td[0-9]+, d[0-9]+, #0} 1 } } */ -+/* { dg-final { scan-assembler-times {vcle\.f16\tq[0-9]+, q[0-9]+, #0} 1 } } */ -+ -+VCVT_TEST (vcvt, _f16_s16, float, int) -+VCVT_N_TEST (vcvt, _f16_s16, float, int) -+/* { dg-final { scan-assembler-times {vcvt\.f16\.s16\td[0-9]+, d[0-9]+} 2 } } -+ { dg-final { scan-assembler-times {vcvt\.f16\.s16\tq[0-9]+, q[0-9]+} 2 } } -+ { dg-final { scan-assembler-times {vcvt\.f16\.s16\td[0-9]+, d[0-9]+, #1} 1 } } -+ { dg-final { scan-assembler-times {vcvt\.f16\.s16\tq[0-9]+, q[0-9]+, #1} 1 } } */ -+ -+VCVT_TEST (vcvt, _f16_u16, float, uint) -+VCVT_N_TEST (vcvt, _f16_u16, float, uint) -+/* { dg-final { scan-assembler-times {vcvt\.f16\.u16\td[0-9]+, d[0-9]+} 2 } } -+ { dg-final { scan-assembler-times {vcvt\.f16\.u16\tq[0-9]+, q[0-9]+} 2 } } -+ { dg-final { scan-assembler-times {vcvt\.f16\.u16\td[0-9]+, d[0-9]+, #1} 1 } } -+ { dg-final { scan-assembler-times {vcvt\.f16\.u16\tq[0-9]+, q[0-9]+, #1} 1 } } */ -+ -+VCVT_TEST (vcvt, _s16_f16, int, float) -+VCVT_N_TEST (vcvt, _s16_f16, int, float) -+/* { dg-final { scan-assembler-times {vcvt\.s16\.f16\td[0-9]+, d[0-9]+} 2 } } -+ { dg-final { scan-assembler-times {vcvt\.s16\.f16\tq[0-9]+, q[0-9]+} 2 } } -+ { dg-final { scan-assembler-times {vcvt\.s16\.f16\td[0-9]+, d[0-9]+, #1} 1 } } -+ { dg-final { scan-assembler-times {vcvt\.s16\.f16\tq[0-9]+, q[0-9]+, #1} 1 } } */ -+ -+VCVT_TEST (vcvt, _u16_f16, uint, float) -+VCVT_N_TEST (vcvt, _u16_f16, uint, float) -+/* { dg-final { scan-assembler-times {vcvt\.u16\.f16\td[0-9]+, d[0-9]+} 2 } } -+ { dg-final { scan-assembler-times {vcvt\.u16\.f16\tq[0-9]+, q[0-9]+} 2 } } -+ { dg-final { scan-assembler-times {vcvt\.u16\.f16\td[0-9]+, d[0-9]+, #1} 1 } } -+ { dg-final { scan-assembler-times {vcvt\.u16\.f16\tq[0-9]+, q[0-9]+, #1} 1 } } */ -+ -+VCVT_TEST (vcvta, _s16_f16, int, float) -+/* { dg-final { scan-assembler-times {vcvta\.s16\.f16\td[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vcvta\.s16\.f16\tq[0-9]+, q[0-9]+} 1 } } -+*/ -+ -+VCVT_TEST (vcvta, _u16_f16, uint, float) -+/* { dg-final { scan-assembler-times {vcvta\.u16\.f16\td[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vcvta\.u16\.f16\tq[0-9]+, q[0-9]+} 1 } } -+*/ -+ -+VCVT_TEST (vcvtm, _s16_f16, int, float) -+/* { dg-final { scan-assembler-times {vcvtm\.s16\.f16\td[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vcvtm\.s16\.f16\tq[0-9]+, q[0-9]+} 1 } } -+*/ -+ -+VCVT_TEST (vcvtm, _u16_f16, uint, float) -+/* { dg-final { scan-assembler-times {vcvtm\.u16\.f16\td[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vcvtm\.u16\.f16\tq[0-9]+, q[0-9]+} 1 } } -+*/ -+ -+VCVT_TEST (vcvtn, _s16_f16, int, float) -+/* { dg-final { scan-assembler-times {vcvtn\.s16\.f16\td[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vcvtn\.s16\.f16\tq[0-9]+, q[0-9]+} 1 } } -+*/ -+ -+VCVT_TEST (vcvtn, _u16_f16, uint, float) -+/* { dg-final { scan-assembler-times {vcvtn\.u16\.f16\td[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vcvtn\.u16\.f16\tq[0-9]+, q[0-9]+} 1 } } -+*/ -+ -+VCVT_TEST (vcvtp, _s16_f16, int, float) -+/* { dg-final { scan-assembler-times {vcvtp\.s16\.f16\td[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vcvtp\.s16\.f16\tq[0-9]+, q[0-9]+} 1 } } -+*/ -+ -+VCVT_TEST (vcvtp, _u16_f16, uint, float) -+/* { dg-final { scan-assembler-times {vcvtp\.u16\.f16\td[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vcvtp\.u16\.f16\tq[0-9]+, q[0-9]+} 1 } } -+*/ -+ -+UNOP_TEST (vabs) -+/* { dg-final { scan-assembler-times {vabs\.f16\td[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vabs\.f16\tq[0-9]+, q[0-9]+} 1 } } */ -+ -+UNOP_TEST (vneg) -+/* { dg-final { scan-assembler-times {vneg\.f16\td[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vneg\.f16\tq[0-9]+, q[0-9]+} 1 } } */ -+ -+UNOP_TEST (vrecpe) -+/* { dg-final { scan-assembler-times {vrecpe\.f16\td[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vrecpe\.f16\tq[0-9]+, q[0-9]+} 1 } } */ -+ -+UNOP_TEST (vrnd) -+/* { dg-final { scan-assembler-times {vrintz\.f16\td[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vrintz\.f16\tq[0-9]+, q[0-9]+} 1 } } */ -+ -+UNOP_TEST (vrnda) -+/* { dg-final { scan-assembler-times {vrinta\.f16\td[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vrinta\.f16\tq[0-9]+, q[0-9]+} 1 } } */ -+ -+UNOP_TEST (vrndm) -+/* { dg-final { scan-assembler-times {vrintm\.f16\td[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vrintm\.f16\tq[0-9]+, q[0-9]+} 1 } } */ -+ -+UNOP_TEST (vrndn) -+/* { dg-final { scan-assembler-times {vrintn\.f16\td[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vrintn\.f16\tq[0-9]+, q[0-9]+} 1 } } */ -+ -+UNOP_TEST (vrndp) -+/* { dg-final { scan-assembler-times {vrintp\.f16\td[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vrintp\.f16\tq[0-9]+, q[0-9]+} 1 } } */ -+ -+UNOP_TEST (vrndx) -+/* { dg-final { scan-assembler-times {vrintx\.f16\td[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vrintx\.f16\tq[0-9]+, q[0-9]+} 1 } } */ -+ -+UNOP_TEST (vrsqrte) -+/* { dg-final { scan-assembler-times {vrsqrte\.f16\td[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vrsqrte\.f16\tq[0-9]+, q[0-9]+} 1 } } */ -+ -+BINOP_TEST (vadd) -+/* { dg-final { scan-assembler-times {vadd\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vadd\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -+ -+BINOP_TEST (vabd) -+/* { dg-final { scan-assembler-times {vabd\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vabd\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -+ -+VCMP2_TEST (vcage) -+/* { dg-final { scan-assembler-times {vacge\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vacge\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -+ -+VCMP2_TEST (vcagt) -+/* { dg-final { scan-assembler-times {vacgt\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vacgt\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -+ -+VCMP2_TEST (vcale) -+/* { dg-final { scan-assembler-times {vacle\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vacle\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -+ -+VCMP2_TEST (vcalt) -+/* { dg-final { scan-assembler-times {vaclt\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vaclt\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -+ -+VCMP2_TEST (vceq) -+/* { dg-final { scan-assembler-times {vceq\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vceq\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -+ -+VCMP2_TEST (vcge) -+/* { dg-final { scan-assembler-times {vcge\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vcge\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -+ -+VCMP2_TEST (vcgt) -+/* { dg-final { scan-assembler-times {vcgt\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vcgt\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -+ -+VCMP2_TEST (vcle) -+/* { dg-final { scan-assembler-times {vcle\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vcle\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -+ -+VCMP2_TEST (vclt) -+/* { dg-final { scan-assembler-times {vclt\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vclt\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -+ -+BINOP_TEST (vmax) -+/* { dg-final { scan-assembler-times {vmax\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vmax\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -+ -+BINOP_TEST (vmin) -+/* { dg-final { scan-assembler-times {vmin\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vmin\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -+ -+BINOP_TEST (vmaxnm) -+/* { dg-final { scan-assembler-times {vmaxnm\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vmaxnm\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -+ -+BINOP_TEST (vminnm) -+/* { dg-final { scan-assembler-times {vminnm\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vminnm\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -+ -+BINOP_TEST (vmul) -+/* { dg-final { scan-assembler-times {vmul\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 3 } } -+ { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -+BINOP_LANE_TEST (vmul, 2) -+/* { dg-final { scan-assembler-times {vmul\.f16\td[0-9]+, d[0-9]+, d[0-9]+\[2\]} 1 } } -+ { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[2\]} 1 } } */ -+BINOP_N_TEST (vmul) -+/* { dg-final { scan-assembler-times {vmul\.f16\td[0-9]+, d[0-9]+, d[0-9]+\[0\]} 1 } } -+ { dg-final { scan-assembler-times {vmul\.f16\tq[0-9]+, q[0-9]+, d[0-9]+\[0\]} 1 } }*/ -+ -+float16x4_t -+test_vpadd_16x4 (float16x4_t a, float16x4_t b) -+{ -+ return vpadd_f16 (a, b); -+} -+/* { dg-final { scan-assembler-times {vpadd\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */ -+ -+float16x4_t -+test_vpmax_16x4 (float16x4_t a, float16x4_t b) -+{ -+ return vpmax_f16 (a, b); -+} -+/* { dg-final { scan-assembler-times {vpmax\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */ -+ -+float16x4_t -+test_vpmin_16x4 (float16x4_t a, float16x4_t b) -+{ -+ return vpmin_f16 (a, b); -+} -+/* { dg-final { scan-assembler-times {vpmin\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */ -+ -+BINOP_TEST (vsub) -+/* { dg-final { scan-assembler-times {vsub\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vsub\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -+ -+BINOP_TEST (vrecps) -+/* { dg-final { scan-assembler-times {vrecps\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vrecps\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -+ -+BINOP_TEST (vrsqrts) -+/* { dg-final { scan-assembler-times {vrsqrts\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vrsqrts\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -+ -+TERNOP_TEST (vfma) -+/* { dg-final { scan-assembler-times {vfma\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vfma\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -+ -+TERNOP_TEST (vfms) -+/* { dg-final { scan-assembler-times {vfms\.f16\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vfms\.f16\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -+ -+float16x4_t -+test_vmov_n_f16 (float16_t a) -+{ -+ return vmov_n_f16 (a); -+} -+ -+float16x4_t -+test_vdup_n_f16 (float16_t a) -+{ -+ return vdup_n_f16 (a); -+} -+/* { dg-final { scan-assembler-times {vdup\.16\td[0-9]+, r[0-9]+} 2 } } */ -+ -+float16x8_t -+test_vmovq_n_f16 (float16_t a) -+{ -+ return vmovq_n_f16 (a); -+} -+ -+float16x8_t -+test_vdupq_n_f16 (float16_t a) -+{ -+ return vdupq_n_f16 (a); -+} -+/* { dg-final { scan-assembler-times {vdup\.16\tq[0-9]+, r[0-9]+} 2 } } */ -+ -+float16x4_t -+test_vdup_lane_f16 (float16x4_t a) -+{ -+ return vdup_lane_f16 (a, 1); -+} -+/* { dg-final { scan-assembler-times {vdup\.16\td[0-9]+, d[0-9]+\[1\]} 1 } } */ -+ -+float16x8_t -+test_vdupq_lane_f16 (float16x4_t a) -+{ -+ return vdupq_lane_f16 (a, 1); -+} -+/* { dg-final { scan-assembler-times {vdup\.16\tq[0-9]+, d[0-9]+\[1\]} 1 } } */ -+ -+float16x4_t -+test_vext_f16 (float16x4_t a, float16x4_t b) -+{ -+ return vext_f16 (a, b, 1); -+} -+/* { dg-final { scan-assembler-times {vext\.16\td[0-9]+, d[0-9]+, d[0-9]+, #1} 1 } } */ -+ -+float16x8_t -+test_vextq_f16 (float16x8_t a, float16x8_t b) -+{ -+ return vextq_f16 (a, b, 1); -+} -+/* { dg-final { scan-assembler-times {vext\.16\tq[0-9]+, q[0-9]+, q[0-9]+, #1} 1 } } */ -+ -+UNOP_TEST (vrev64) -+/* { dg-final { scan-assembler-times {vrev64\.16\td[0-9]+, d[0-9]+} 1 } } -+ { dg-final { scan-assembler-times {vrev64\.16\tq[0-9]+, q[0-9]+} 1 } } */ -+ -+float16x4_t -+test_vbsl16x4 (uint16x4_t a, float16x4_t b, float16x4_t c) -+{ -+ return vbsl_f16 (a, b, c); -+} -+/* { dg-final { scan-assembler-times {vbsl\td[0-9]+, d[0-9]+, d[0-9]+} 1 } } */ -+ -+float16x8_t -+test_vbslq16x8 (uint16x8_t a, float16x8_t b, float16x8_t c) -+{ -+ return vbslq_f16 (a, b, c); -+} -+/*{ dg-final { scan-assembler-times {vbsl\tq[0-9]+, q[0-9]+, q[0-9]+} 1 } } */ -+ -+float16x4x2_t -+test_vzip16x4 (float16x4_t a, float16x4_t b) -+{ -+ return vzip_f16 (a, b); -+} -+/* { dg-final { scan-assembler-times {vzip\.16\td[0-9]+, d[0-9]+} 1 } } */ -+ -+float16x8x2_t -+test_vzipq16x8 (float16x8_t a, float16x8_t b) -+{ -+ return vzipq_f16 (a, b); -+} -+/*{ dg-final { scan-assembler-times {vzip\.16\tq[0-9]+, q[0-9]+} 1 } } */ -+ -+float16x4x2_t -+test_vuzp16x4 (float16x4_t a, float16x4_t b) -+{ -+ return vuzp_f16 (a, b); -+} -+/* { dg-final { scan-assembler-times {vuzp\.16\td[0-9]+, d[0-9]+} 1 } } */ -+ -+float16x8x2_t -+test_vuzpq16x8 (float16x8_t a, float16x8_t b) -+{ -+ return vuzpq_f16 (a, b); -+} -+/*{ dg-final { scan-assembler-times {vuzp\.16\tq[0-9]+, q[0-9]+} 1 } } */ -+ -+float16x4x2_t -+test_vtrn16x4 (float16x4_t a, float16x4_t b) -+{ -+ return vtrn_f16 (a, b); -+} -+/* { dg-final { scan-assembler-times {vtrn\.16\td[0-9]+, d[0-9]+} 1 } } */ -+ -+float16x8x2_t -+test_vtrnq16x8 (float16x8_t a, float16x8_t b) -+{ -+ return vtrnq_f16 (a, b); -+} -+/*{ dg-final { scan-assembler-times {vtrn\.16\tq[0-9]+, q[0-9]+} 1 } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/armv8_2-fp16-scalar-1.c -@@ -0,0 +1,203 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+/* Test instructions generated for the FP16 scalar intrinsics. */ -+#include <arm_fp16.h> -+ -+#define MSTRCAT(L, str) L##str -+ -+#define UNOP_TEST(insn) \ -+ float16_t \ -+ MSTRCAT (test_##insn, 16) (float16_t a) \ -+ { \ -+ return MSTRCAT (insn, h_f16) (a); \ -+ } -+ -+#define BINOP_TEST(insn) \ -+ float16_t \ -+ MSTRCAT (test_##insn, 16) (float16_t a, float16_t b) \ -+ { \ -+ return MSTRCAT (insn, h_f16) (a, b); \ -+ } -+ -+#define TERNOP_TEST(insn) \ -+ float16_t \ -+ MSTRCAT (test_##insn, 16) (float16_t a, float16_t b, float16_t c) \ -+ { \ -+ return MSTRCAT (insn, h_f16) (a, b, c); \ -+ } -+ -+float16_t -+test_vcvth_f16_s32 (int32_t a) -+{ -+ return vcvth_f16_s32 (a); -+} -+ -+float16_t -+test_vcvth_n_f16_s32 (int32_t a) -+{ -+ return vcvth_n_f16_s32 (a, 1); -+} -+/* { dg-final { scan-assembler-times {vcvt\.f16\.s32\ts[0-9]+, s[0-9]+} 2 } } */ -+/* { dg-final { scan-assembler-times {vcvt\.f16\.s32\ts[0-9]+, s[0-9]+, #1} 1 } } */ -+ -+float16_t -+test_vcvth_f16_u32 (uint32_t a) -+{ -+ return vcvth_f16_u32 (a); -+} -+ -+float16_t -+test_vcvth_n_f16_u32 (uint32_t a) -+{ -+ return vcvth_n_f16_u32 (a, 1); -+} -+ -+/* { dg-final { scan-assembler-times {vcvt\.f16\.u32\ts[0-9]+, s[0-9]+} 2 } } */ -+/* { dg-final { scan-assembler-times {vcvt\.f16\.u32\ts[0-9]+, s[0-9]+, #1} 1 } } */ -+ -+uint32_t -+test_vcvth_u32_f16 (float16_t a) -+{ -+ return vcvth_u32_f16 (a); -+} -+/* { dg-final { scan-assembler-times {vcvt\.u32\.f16\ts[0-9]+, s[0-9]+} 2 } } */ -+ -+uint32_t -+test_vcvth_n_u32_f16 (float16_t a) -+{ -+ return vcvth_n_u32_f16 (a, 1); -+} -+/* { dg-final { scan-assembler-times {vcvt\.u32\.f16\ts[0-9]+, s[0-9]+, #1} 1 } } */ -+ -+int32_t -+test_vcvth_s32_f16 (float16_t a) -+{ -+ return vcvth_s32_f16 (a); -+} -+ -+int32_t -+test_vcvth_n_s32_f16 (float16_t a) -+{ -+ return vcvth_n_s32_f16 (a, 1); -+} -+ -+/* { dg-final { scan-assembler-times {vcvt\.s32\.f16\ts[0-9]+, s[0-9]+} 2 } } */ -+/* { dg-final { scan-assembler-times {vcvt\.s32\.f16\ts[0-9]+, s[0-9]+, #1} 1 } } */ -+ -+int32_t -+test_vcvtah_s32_f16 (float16_t a) -+{ -+ return vcvtah_s32_f16 (a); -+} -+/* { dg-final { scan-assembler-times {vcvta\.s32\.f16\ts[0-9]+, s[0-9]+} 1 } } */ -+ -+uint32_t -+test_vcvtah_u32_f16 (float16_t a) -+{ -+ return vcvtah_u32_f16 (a); -+} -+/* { dg-final { scan-assembler-times {vcvta\.u32\.f16\ts[0-9]+, s[0-9]+} 1 } } */ -+ -+int32_t -+test_vcvtmh_s32_f16 (float16_t a) -+{ -+ return vcvtmh_s32_f16 (a); -+} -+/* { dg-final { scan-assembler-times {vcvtm\.s32\.f16\ts[0-9]+, s[0-9]+} 1 } } */ -+ -+uint32_t -+test_vcvtmh_u32_f16 (float16_t a) -+{ -+ return vcvtmh_u32_f16 (a); -+} -+/* { dg-final { scan-assembler-times {vcvtm\.u32\.f16\ts[0-9]+, s[0-9]+} 1 } } -+ */ -+ -+int32_t -+test_vcvtnh_s32_f16 (float16_t a) -+{ -+ return vcvtnh_s32_f16 (a); -+} -+/* { dg-final { scan-assembler-times {vcvtn\.s32\.f16\ts[0-9]+, s[0-9]+} 1 } } -+ */ -+ -+uint32_t -+test_vcvtnh_u32_f16 (float16_t a) -+{ -+ return vcvtnh_u32_f16 (a); -+} -+/* { dg-final { scan-assembler-times {vcvtn\.u32\.f16\ts[0-9]+, s[0-9]+} 1 } } -+ */ -+ -+int32_t -+test_vcvtph_s32_f16 (float16_t a) -+{ -+ return vcvtph_s32_f16 (a); -+} -+/* { dg-final { scan-assembler-times {vcvtp\.s32\.f16\ts[0-9]+, s[0-9]+} 1 } } -+ */ -+ -+uint32_t -+test_vcvtph_u32_f16 (float16_t a) -+{ -+ return vcvtph_u32_f16 (a); -+} -+/* { dg-final { scan-assembler-times {vcvtp\.u32\.f16\ts[0-9]+, s[0-9]+} 1 } } -+ */ -+ -+UNOP_TEST (vabs) -+/* { dg-final { scan-assembler-times {vabs\.f16\ts[0-9]+, s[0-9]+} 1 } } */ -+ -+UNOP_TEST (vneg) -+/* { dg-final { scan-assembler-times {vneg\.f16\ts[0-9]+, s[0-9]+} 1 } } */ -+ -+UNOP_TEST (vrnd) -+/* { dg-final { scan-assembler-times {vrintz\.f16\ts[0-9]+, s[0-9]+} 1 } } */ -+ -+UNOP_TEST (vrndi) -+/* { dg-final { scan-assembler-times {vrintr\.f16\ts[0-9]+, s[0-9]+} 1 } } */ -+ -+UNOP_TEST (vrnda) -+/* { dg-final { scan-assembler-times {vrinta\.f16\ts[0-9]+, s[0-9]+} 1 } } */ -+ -+UNOP_TEST (vrndm) -+/* { dg-final { scan-assembler-times {vrinta\.f16\ts[0-9]+, s[0-9]+} 1 } } */ -+ -+UNOP_TEST (vrndn) -+/* { dg-final { scan-assembler-times {vrinta\.f16\ts[0-9]+, s[0-9]+} 1 } } */ -+ -+UNOP_TEST (vrndp) -+/* { dg-final { scan-assembler-times {vrinta\.f16\ts[0-9]+, s[0-9]+} 1 } } */ -+ -+UNOP_TEST (vrndx) -+/* { dg-final { scan-assembler-times {vrinta\.f16\ts[0-9]+, s[0-9]+} 1 } } */ -+ -+UNOP_TEST (vsqrt) -+/* { dg-final { scan-assembler-times {vsqrt\.f16\ts[0-9]+, s[0-9]+} 1 } } */ -+ -+BINOP_TEST (vadd) -+/* { dg-final { scan-assembler-times {vadd\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ -+ -+BINOP_TEST (vdiv) -+/* { dg-final { scan-assembler-times {vdiv\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ -+ -+BINOP_TEST (vmaxnm) -+/* { dg-final { scan-assembler-times {vmaxnm\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ -+ -+BINOP_TEST (vminnm) -+/* { dg-final { scan-assembler-times {vminnm\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ -+ -+BINOP_TEST (vmul) -+/* { dg-final { scan-assembler-times {vmul\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ -+ -+BINOP_TEST (vsub) -+/* { dg-final { scan-assembler-times {vsub\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ -+ -+TERNOP_TEST (vfma) -+/* { dg-final { scan-assembler-times {vfma\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ -+ -+TERNOP_TEST (vfms) -+/* { dg-final { scan-assembler-times {vfms\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/armv8_2-fp16-scalar-2.c -@@ -0,0 +1,71 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */ -+/* { dg-options "-O2 -std=c11" } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+/* Test compiler use of FP16 instructions. */ -+#include <arm_fp16.h> -+ -+float16_t -+test_mov_imm_1 (float16_t a) -+{ -+ return 1.0; -+} -+ -+float16_t -+test_mov_imm_2 (float16_t a) -+{ -+ float16_t b = 1.0; -+ return b; -+} -+ -+float16_t -+test_vmov_imm_3 (float16_t a) -+{ -+ float16_t b = 1.0; -+ return vaddh_f16 (a, b); -+} -+ -+float16_t -+test_vmov_imm_4 (float16_t a) -+{ -+ return vaddh_f16 (a, 1.0); -+} -+ -+/* { dg-final { scan-assembler-times {vmov.f16\ts[0-9]+, #1\.0e\+0} 4 } } -+ { dg-final { scan-assembler-times {vadd.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 2 } } */ -+ -+float16_t -+test_vmla_1 (float16_t a, float16_t b, float16_t c) -+{ -+ return vaddh_f16 (vmulh_f16 (a, b), c); -+} -+/* { dg-final { scan-assembler-times {vmla\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ -+ -+float16_t -+test_vmla_2 (float16_t a, float16_t b, float16_t c) -+{ -+ return vsubh_f16 (vmulh_f16 (vnegh_f16 (a), b), c); -+} -+/* { dg-final { scan-assembler-times {vnmla\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ -+ -+float16_t -+test_vmls_1 (float16_t a, float16_t b, float16_t c) -+{ -+ return vsubh_f16 (c, vmulh_f16 (a, b)); -+} -+ -+float16_t -+test_vmls_2 (float16_t a, float16_t b, float16_t c) -+{ -+ return vsubh_f16 (a, vmulh_f16 (b, c)); -+} -+/* { dg-final { scan-assembler-times {vmls\.f16} 2 } } */ -+ -+float16_t -+test_vnmls_1 (float16_t a, float16_t b, float16_t c) -+{ -+ return vsubh_f16 (vmulh_f16 (a, b), c); -+} -+/* { dg-final { scan-assembler-times {vnmls\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-comp-swap-release-acquire-1.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8a_ok } */ -+/* { dg-options "-O2 -fno-ipa-icf" } */ -+/* { dg-add-options arm_arch_v8a } */ -+ -+#include "../aarch64/atomic-comp-swap-release-acquire.x" -+ -+/* { dg-final { scan-assembler-times "ldaex" 4 } } */ -+/* { dg-final { scan-assembler-times "stlex" 4 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-comp-swap-release-acquire-2.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-options "-O2 -fno-ipa-icf" } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+ -+#include "../aarch64/atomic-comp-swap-release-acquire.x" -+ -+/* { dg-final { scan-assembler-times "ldaex" 4 } } */ -+/* { dg-final { scan-assembler-times "stlex" 4 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-comp-swap-release-acquire-3.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_base_ok } */ -+/* { dg-options "-O2 -fno-ipa-icf" } */ -+/* { dg-add-options arm_arch_v8m_base } */ -+ -+#include "../aarch64/atomic-comp-swap-release-acquire.x" -+ -+/* { dg-final { scan-assembler-times "ldaex" 4 } } */ -+/* { dg-final { scan-assembler-times "stlex" 4 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/atomic-comp-swap-release-acquire.c -+++ b/src//dev/null -@@ -1,10 +0,0 @@ --/* { dg-do compile } */ --/* { dg-require-effective-target arm_arch_v8a_ok } */ --/* { dg-options "-O2 -fno-ipa-icf" } */ --/* { dg-add-options arm_arch_v8a } */ -- --#include "../aarch64/atomic-comp-swap-release-acquire.x" -- --/* { dg-final { scan-assembler-times "ldaex" 4 } } */ --/* { dg-final { scan-assembler-times "stlex" 4 } } */ --/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-acq_rel-1.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8a_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8a } */ -+ -+#include "../aarch64/atomic-op-acq_rel.x" -+ -+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-acq_rel-2.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+ -+#include "../aarch64/atomic-op-acq_rel.x" -+ -+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-acq_rel-3.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_base_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8m_base } */ -+ -+#include "../aarch64/atomic-op-acq_rel.x" -+ -+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/atomic-op-acq_rel.c -+++ b/src//dev/null -@@ -1,10 +0,0 @@ --/* { dg-do compile } */ --/* { dg-require-effective-target arm_arch_v8a_ok } */ --/* { dg-options "-O2" } */ --/* { dg-add-options arm_arch_v8a } */ -- --#include "../aarch64/atomic-op-acq_rel.x" -- --/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ --/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ --/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-acquire-1.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8a_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8a } */ -+ -+#include "../aarch64/atomic-op-acquire.x" -+ -+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-acquire-2.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+ -+#include "../aarch64/atomic-op-acquire.x" -+ -+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-acquire-3.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_base_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8m_base } */ -+ -+#include "../aarch64/atomic-op-acquire.x" -+ -+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/atomic-op-acquire.c -+++ b/src//dev/null -@@ -1,10 +0,0 @@ --/* { dg-do compile } */ --/* { dg-require-effective-target arm_arch_v8a_ok } */ --/* { dg-options "-O2" } */ --/* { dg-add-options arm_arch_v8a } */ -- --#include "../aarch64/atomic-op-acquire.x" -- --/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ --/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ --/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-char-1.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8a_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8a } */ -+ -+#include "../aarch64/atomic-op-char.x" -+ -+/* { dg-final { scan-assembler-times "ldrexb\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "strexb\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-char-2.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+ -+#include "../aarch64/atomic-op-char.x" -+ -+/* { dg-final { scan-assembler-times "ldrexb\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "strexb\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-char-3.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_base_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8m_base } */ -+ -+#include "../aarch64/atomic-op-char.x" -+ -+/* { dg-final { scan-assembler-times "ldrexb\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "strexb\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/atomic-op-char.c -+++ b/src//dev/null -@@ -1,10 +0,0 @@ --/* { dg-do compile } */ --/* { dg-require-effective-target arm_arch_v8a_ok } */ --/* { dg-options "-O2" } */ --/* { dg-add-options arm_arch_v8a } */ -- --#include "../aarch64/atomic-op-char.x" -- --/* { dg-final { scan-assembler-times "ldrexb\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ --/* { dg-final { scan-assembler-times "strexb\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ --/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-consume-1.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8a_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8a } */ -+ -+#include "../aarch64/atomic-op-consume.x" -+ -+/* Scan for ldaex is a PR59448 consume workaround. */ -+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-consume-2.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+ -+#include "../aarch64/atomic-op-consume.x" -+ -+/* Scan for ldaex is a PR59448 consume workaround. */ -+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-consume-3.c -@@ -0,0 +1,11 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_base_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8m_base } */ -+ -+#include "../aarch64/atomic-op-consume.x" -+ -+/* Scan for ldaex is a PR59448 consume workaround. */ -+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/atomic-op-consume.c -+++ b/src//dev/null -@@ -1,11 +0,0 @@ --/* { dg-do compile } */ --/* { dg-require-effective-target arm_arch_v8a_ok } */ --/* { dg-options "-O2" } */ --/* { dg-add-options arm_arch_v8a } */ -- --#include "../aarch64/atomic-op-consume.x" -- --/* Scan for ldaex is a PR59448 consume workaround. */ --/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ --/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ --/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-int-1.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8a_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8a } */ -+ -+#include "../aarch64/atomic-op-int.x" -+ -+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-int-2.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+ -+#include "../aarch64/atomic-op-int.x" -+ -+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-int-3.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_base_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8m_base } */ -+ -+#include "../aarch64/atomic-op-int.x" -+ -+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/atomic-op-int.c -+++ b/src//dev/null -@@ -1,10 +0,0 @@ --/* { dg-do compile } */ --/* { dg-require-effective-target arm_arch_v8a_ok } */ --/* { dg-options "-O2" } */ --/* { dg-add-options arm_arch_v8a } */ -- --#include "../aarch64/atomic-op-int.x" -- --/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ --/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ --/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-relaxed-1.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8a_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8a } */ -+ -+#include "../aarch64/atomic-op-relaxed.x" -+ -+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-relaxed-2.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+ -+#include "../aarch64/atomic-op-relaxed.x" -+ -+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-relaxed-3.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_base_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8m_base } */ -+ -+#include "../aarch64/atomic-op-relaxed.x" -+ -+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/atomic-op-relaxed.c -+++ b/src//dev/null -@@ -1,10 +0,0 @@ --/* { dg-do compile } */ --/* { dg-require-effective-target arm_arch_v8a_ok } */ --/* { dg-options "-O2" } */ --/* { dg-add-options arm_arch_v8a } */ -- --#include "../aarch64/atomic-op-relaxed.x" -- --/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ --/* { dg-final { scan-assembler-times "strex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ --/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-release-1.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8a_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8a } */ -+ -+#include "../aarch64/atomic-op-release.x" -+ -+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-release-2.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+ -+#include "../aarch64/atomic-op-release.x" -+ -+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-release-3.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_base_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8m_base } */ -+ -+#include "../aarch64/atomic-op-release.x" -+ -+/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/atomic-op-release.c -+++ b/src//dev/null -@@ -1,10 +0,0 @@ --/* { dg-do compile } */ --/* { dg-require-effective-target arm_arch_v8a_ok } */ --/* { dg-options "-O2" } */ --/* { dg-add-options arm_arch_v8a } */ -- --#include "../aarch64/atomic-op-release.x" -- --/* { dg-final { scan-assembler-times "ldrex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ --/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ --/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-seq_cst-1.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8a_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8a } */ -+ -+#include "../aarch64/atomic-op-seq_cst.x" -+ -+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-seq_cst-2.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+ -+#include "../aarch64/atomic-op-seq_cst.x" -+ -+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-seq_cst-3.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_base_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8m_base } */ -+ -+#include "../aarch64/atomic-op-seq_cst.x" -+ -+/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/atomic-op-seq_cst.c -+++ b/src//dev/null -@@ -1,10 +0,0 @@ --/* { dg-do compile } */ --/* { dg-require-effective-target arm_arch_v8a_ok } */ --/* { dg-options "-O2" } */ --/* { dg-add-options arm_arch_v8a } */ -- --#include "../aarch64/atomic-op-seq_cst.x" -- --/* { dg-final { scan-assembler-times "ldaex\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ --/* { dg-final { scan-assembler-times "stlex\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ --/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-short-1.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8a_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8a } */ -+ -+#include "../aarch64/atomic-op-short.x" -+ -+/* { dg-final { scan-assembler-times "ldrexh\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "strexh\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-short-2.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+ -+#include "../aarch64/atomic-op-short.x" -+ -+/* { dg-final { scan-assembler-times "ldrexh\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "strexh\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/atomic-op-short-3.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_base_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v8m_base } */ -+ -+#include "../aarch64/atomic-op-short.x" -+ -+/* { dg-final { scan-assembler-times "ldrexh\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-times "strexh\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ -+/* { dg-final { scan-assembler-not "dmb" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/atomic-op-short.c -+++ b/src//dev/null -@@ -1,10 +0,0 @@ --/* { dg-do compile } */ --/* { dg-require-effective-target arm_arch_v8a_ok } */ --/* { dg-options "-O2" } */ --/* { dg-add-options arm_arch_v8a } */ -- --#include "../aarch64/atomic-op-short.x" -- --/* { dg-final { scan-assembler-times "ldrexh\tr\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ --/* { dg-final { scan-assembler-times "strexh\t...?, r\[0-9\]+, \\\[r\[0-9\]+\\\]" 6 } } */ --/* { dg-final { scan-assembler-not "dmb" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/attr-fp16-arith-1.c -@@ -0,0 +1,58 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_v8_2a_fp16_neon_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_v8_2a_fp16_scalar } */ -+ -+/* Reset fpu to a value compatible with the next pragmas. */ -+#pragma GCC target ("fpu=vfp") -+ -+#pragma GCC push_options -+#pragma GCC target ("fpu=fp-armv8") -+ -+#ifndef __ARM_FEATURE_FP16_SCALAR_ARITHMETIC -+#error __ARM_FEATURE_FP16_SCALAR_ARITHMETIC not defined. -+#endif -+ -+#pragma GCC push_options -+#pragma GCC target ("fpu=neon-fp-armv8") -+ -+#ifndef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -+#error __ARM_FEATURE_FP16_VECTOR_ARITHMETIC not defined. -+#endif -+ -+#ifndef __ARM_NEON -+#error __ARM_NEON not defined. -+#endif -+ -+#if !defined (__ARM_FP) || !(__ARM_FP & 0x2) -+#error Invalid value for __ARM_FP -+#endif -+ -+#include "arm_neon.h" -+ -+float16_t -+foo (float16x4_t b) -+{ -+ float16x4_t a = {2.0, 3.0, 4.0, 5.0}; -+ float16x4_t res = vadd_f16 (a, b); -+ -+ return res[0]; -+} -+ -+/* { dg-final { scan-assembler "vadd\\.f16\td\[0-9\]+, d\[0-9\]+" } } */ -+ -+#pragma GCC pop_options -+ -+/* Check that the FP version is correctly reset to mfpu=fp-armv8. */ -+ -+#if !defined (__ARM_FP) || !(__ARM_FP & 0x2) -+#error __ARM_FP should record FP16 support. -+#endif -+ -+#pragma GCC pop_options -+ -+/* Check that the FP version is correctly reset to mfpu=vfp. */ -+ -+#if !defined (__ARM_FP) || (__ARM_FP & 0x2) -+#error Unexpected value for __ARM_FP. -+#endif ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/builtin_saddl.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+/* { dg-require-effective-target arm32 } */ -+extern void overflow_handler (); -+ -+long overflow_add (long x, long y) -+{ -+ long r; -+ -+ int ovr = __builtin_saddl_overflow (x, y, &r); -+ if (ovr) -+ overflow_handler (); -+ -+ return r; -+} -+ -+/* { dg-final { scan-assembler "adds" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/builtin_saddll.c -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+/* { dg-require-effective-target arm32 } */ -+extern void overflow_handler (); -+ -+long long overflow_add (long long x, long long y) -+{ -+ long long r; -+ -+ int ovr = __builtin_saddll_overflow (x, y, &r); -+ if (ovr) -+ overflow_handler (); -+ -+ return r; -+} -+ -+/* { dg-final { scan-assembler "adds" } } */ -+/* { dg-final { scan-assembler "adcs" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/builtin_ssubl.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+/* { dg-require-effective-target arm32 } */ -+extern void overflow_handler (); -+ -+long overflow_sub (long x, long y) -+{ -+ long r; -+ -+ int ovr = __builtin_ssubl_overflow (x, y, &r); -+ if (ovr) -+ overflow_handler (); -+ -+ return r; -+} -+ -+/* { dg-final { scan-assembler "subs" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/builtin_ssubll.c -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+/* { dg-require-effective-target arm32 } */ -+extern void overflow_handler (); -+ -+long long overflow_sub (long long x, long long y) -+{ -+ long long r; -+ -+ int ovr = __builtin_ssubll_overflow (x, y, &r); -+ if (ovr) -+ overflow_handler (); -+ -+ return r; -+} -+ -+/* { dg-final { scan-assembler "subs" } } */ -+/* { dg-final { scan-assembler "sbcs" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/builtin_uaddl.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+/* { dg-require-effective-target arm32 } */ -+extern void overflow_handler (); -+ -+unsigned long overflow_add (unsigned long x, unsigned long y) -+{ -+ unsigned long r; -+ -+ int ovr = __builtin_uaddl_overflow (x, y, &r); -+ if (ovr) -+ overflow_handler (); -+ -+ return r; -+} -+ -+/* { dg-final { scan-assembler "adds" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/builtin_uaddll.c -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+/* { dg-require-effective-target arm32 } */ -+extern void overflow_handler (); -+ -+unsigned long long overflow_add (unsigned long long x, unsigned long long y) -+{ -+ unsigned long long r; -+ -+ int ovr = __builtin_uaddll_overflow (x, y, &r); -+ if (ovr) -+ overflow_handler (); -+ -+ return r; -+} -+ -+/* { dg-final { scan-assembler "adds" } } */ -+/* { dg-final { scan-assembler "adcs" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/builtin_usubl.c -@@ -0,0 +1,17 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+/* { dg-require-effective-target arm32 } */ -+extern void overflow_handler (); -+ -+unsigned long overflow_sub (unsigned long x, unsigned long y) -+{ -+ unsigned long r; -+ -+ int ovr = __builtin_usubl_overflow (x, y, &r); -+ if (ovr) -+ overflow_handler (); -+ -+ return r; -+} -+ -+/* { dg-final { scan-assembler "subs" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/builtin_usubll.c -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2" } */ -+/* { dg-require-effective-target arm32 } */ -+extern void overflow_handler (); -+ -+unsigned long long overflow_sub (unsigned long long x, unsigned long long y) -+{ -+ unsigned long long r; -+ -+ int ovr = __builtin_usubll_overflow (x, y, &r); -+ if (ovr) -+ overflow_handler (); -+ -+ return r; -+} -+ -+/* { dg-final { scan-assembler "subs" } } */ -+/* { dg-final { scan-assembler "sbcs" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cbz.c -@@ -0,0 +1,12 @@ -+/* { dg-do compile {target { arm_thumb2 || arm_thumb1_cbz_ok } } } */ -+/* { dg-options "-O2" } */ -+ -+int -+foo (int a, int *b) -+{ -+ if (a) -+ *b = 1; -+ return 0; -+} -+ -+/* { dg-final { scan-assembler-times "cbz\\tr\\d" 1 } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-4.c -@@ -0,0 +1,57 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+typedef struct -+{ -+ unsigned char a; -+ unsigned int b:5; -+ unsigned int c:11, :0, d:8; -+ struct { unsigned int ee:2; } e; -+} test_st; -+ -+typedef union -+{ -+ test_st st; -+ struct -+ { -+ unsigned int v1; -+ unsigned int v2; -+ unsigned int v3; -+ unsigned int v4; -+ }values; -+} read_st; -+ -+ -+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st); -+ -+extern void foo (test_st st); -+ -+int -+main (void) -+{ -+ read_st r; -+ foo_ns f; -+ -+ f = (foo_ns) 0x200000; -+ r.values.v1 = 0xFFFFFFFF; -+ r.values.v2 = 0xFFFFFFFF; -+ r.values.v3 = 0xFFFFFFFF; -+ r.values.v4 = 0xFFFFFFFF; -+ -+ f (r.st); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "mov\tip, r4" } } */ -+/* { dg-final { scan-assembler "movw\tr4, #65535" } } */ -+/* { dg-final { scan-assembler "movt\tr4, 255" } } */ -+/* { dg-final { scan-assembler "ands\tr0, r4" } } */ -+/* { dg-final { scan-assembler "movs\tr4, #255" } } */ -+/* { dg-final { scan-assembler "ands\tr1, r4" } } */ -+/* { dg-final { scan-assembler "movs\tr4, #3" } } */ -+/* { dg-final { scan-assembler "ands\tr2, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr4, ip" } } */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "movs\tr3, r4" } } */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-5.c -@@ -0,0 +1,53 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+typedef struct -+{ -+ unsigned char a; -+ unsigned short b :5; -+ unsigned char c; -+ unsigned short d :11; -+} test_st; -+ -+typedef union -+{ -+ test_st st; -+ struct -+ { -+ unsigned int v1; -+ unsigned int v2; -+ unsigned int v3; -+ unsigned int v4; -+ }values; -+} read_st; -+ -+ -+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st); -+ -+int -+main (void) -+{ -+ read_st r; -+ foo_ns f; -+ -+ f = (foo_ns) 0x200000; -+ r.values.v1 = 0xFFFFFFFF; -+ r.values.v2 = 0xFFFFFFFF; -+ -+ f (r.st); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "mov\tip, r4" } } */ -+/* { dg-final { scan-assembler "movw\tr4, #8191" } } */ -+/* { dg-final { scan-assembler "movt\tr4, 255" } } */ -+/* { dg-final { scan-assembler "ands\tr0, r4" } } */ -+/* { dg-final { scan-assembler "movw\tr4, #2047" } } */ -+/* { dg-final { scan-assembler "ands\tr1, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr4, ip" } } */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "movs\tr2, r4" } } */ -+/* { dg-final { scan-assembler "movs\tr3, r4" } } */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-6.c -@@ -0,0 +1,63 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+typedef struct -+{ -+ unsigned char a; -+ unsigned int b : 3; -+ unsigned int c : 14; -+ unsigned int d : 1; -+ struct { -+ unsigned int ee : 2; -+ unsigned short ff : 15; -+ } e; -+ unsigned char g : 1; -+ unsigned char : 4; -+ unsigned char h : 3; -+} test_st; -+ -+typedef union -+{ -+ test_st st; -+ struct -+ { -+ unsigned int v1; -+ unsigned int v2; -+ unsigned int v3; -+ unsigned int v4; -+ }values; -+} read_st; -+ -+ -+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st); -+ -+int -+main (void) -+{ -+ read_st r; -+ foo_ns f; -+ -+ f = (foo_ns) 0x200000; -+ r.values.v1 = 0xFFFFFFFF; -+ r.values.v2 = 0xFFFFFFFF; -+ r.values.v3 = 0xFFFFFFFF; -+ r.values.v4 = 0xFFFFFFFF; -+ -+ f (r.st); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "mov\tip, r4" } } */ -+/* { dg-final { scan-assembler "movw\tr4, #65535" } } */ -+/* { dg-final { scan-assembler "movt\tr4, 1023" } } */ -+/* { dg-final { scan-assembler "ands\tr0, r4" } } */ -+/* { dg-final { scan-assembler "movs\tr4, #3" } } */ -+/* { dg-final { scan-assembler "movt\tr4, 32767" } } */ -+/* { dg-final { scan-assembler "ands\tr1, r4" } } */ -+/* { dg-final { scan-assembler "movs\tr4, #255" } } */ -+/* { dg-final { scan-assembler "ands\tr2, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr4, ip" } } */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "movs\tr3, r4" } } */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-7.c -@@ -0,0 +1,54 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+typedef struct -+{ -+ unsigned char a; -+ unsigned short b :5; -+ unsigned char c; -+ unsigned short d :11; -+} test_st; -+ -+typedef union -+{ -+ test_st st; -+ struct -+ { -+ unsigned int v1; -+ unsigned int v2; -+ unsigned int v3; -+ unsigned int v4; -+ }values; -+} read_st; -+ -+ -+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st); -+ -+int -+main (void) -+{ -+ read_st r; -+ foo_ns f; -+ -+ f = (foo_ns) 0x200000; -+ r.values.v1 = 0xFFFFFFFF; -+ r.values.v2 = 0xFFFFFFFF; -+ -+ f (r.st); -+ return 0; -+} -+ -+ -+/* { dg-final { scan-assembler "mov\tip, r4" } } */ -+/* { dg-final { scan-assembler "movw\tr4, #8191" } } */ -+/* { dg-final { scan-assembler "movt\tr4, 255" } } */ -+/* { dg-final { scan-assembler "ands\tr0, r4" } } */ -+/* { dg-final { scan-assembler "movw\tr4, #2047" } } */ -+/* { dg-final { scan-assembler "ands\tr1, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr4, ip" } } */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "movs\tr2, r4" } } */ -+/* { dg-final { scan-assembler "movs\tr3, r4" } } */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-8.c -@@ -0,0 +1,57 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+typedef struct -+{ -+ unsigned char a; -+ unsigned int :0; -+ unsigned int b :1; -+ unsigned short :0; -+ unsigned short c; -+ unsigned int :0; -+ unsigned int d :21; -+} test_st; -+ -+typedef union -+{ -+ test_st st; -+ struct -+ { -+ unsigned int v1; -+ unsigned int v2; -+ unsigned int v3; -+ unsigned int v4; -+ }values; -+} read_st; -+ -+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st); -+ -+int -+main (void) -+{ -+ read_st r; -+ foo_ns f; -+ -+ f = (foo_ns) 0x200000; -+ r.values.v1 = 0xFFFFFFFF; -+ r.values.v2 = 0xFFFFFFFF; -+ r.values.v3 = 0xFFFFFFFF; -+ -+ f (r.st); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "mov\tip, r4" } } */ -+/* { dg-final { scan-assembler "movs\tr4, #255" } } */ -+/* { dg-final { scan-assembler "ands\tr0, r4" } } */ -+/* { dg-final { scan-assembler "movs\tr4, #1" } } */ -+/* { dg-final { scan-assembler "movt\tr4, 65535" } } */ -+/* { dg-final { scan-assembler "ands\tr1, r4" } } */ -+/* { dg-final { scan-assembler "movw\tr4, #65535" } } */ -+/* { dg-final { scan-assembler "movt\tr4, 31" } } */ -+/* { dg-final { scan-assembler "ands\tr2, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr4, ip" } } */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "movs\tr3, r4" } } */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-9.c -@@ -0,0 +1,56 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+typedef struct -+{ -+ char a:3; -+} test_st3; -+ -+typedef struct -+{ -+ char a:3; -+} test_st2; -+ -+typedef struct -+{ -+ test_st2 st2; -+ test_st3 st3; -+} test_st; -+ -+typedef union -+{ -+ test_st st; -+ struct -+ { -+ unsigned int v1; -+ unsigned int v2; -+ unsigned int v3; -+ unsigned int v4; -+ }values; -+} read_st; -+ -+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st); -+ -+int -+main (void) -+{ -+ read_st r; -+ foo_ns f; -+ -+ f = (foo_ns) 0x200000; -+ r.values.v1 = 0xFFFFFFFF; -+ -+ f (r.st); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "mov\tip, r4" } } */ -+/* { dg-final { scan-assembler "movw\tr4, #1799" } } */ -+/* { dg-final { scan-assembler "ands\tr0, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr4, ip" } } */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "movs\tr1, r4" } } */ -+/* { dg-final { scan-assembler "movs\tr2, r4" } } */ -+/* { dg-final { scan-assembler "movs\tr3, r4" } } */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-and-union-1.c -@@ -0,0 +1,96 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+typedef struct -+{ -+ unsigned short a :11; -+} test_st_4; -+ -+typedef union -+{ -+ char a; -+ test_st_4 st4; -+}test_un_2; -+ -+typedef struct -+{ -+ unsigned char a; -+ unsigned int :0; -+ unsigned int b :1; -+ unsigned short :0; -+ unsigned short c; -+ unsigned int :0; -+ unsigned int d :21; -+} test_st_3; -+ -+typedef struct -+{ -+ unsigned char a :3; -+ unsigned int b :13; -+ test_un_2 un2; -+} test_st_2; -+ -+typedef union -+{ -+ test_st_2 st2; -+ test_st_3 st3; -+}test_un_1; -+ -+typedef struct -+{ -+ unsigned char a :2; -+ unsigned char :0; -+ unsigned short b :5; -+ unsigned char :0; -+ unsigned char c :4; -+ test_un_1 un1; -+} test_st_1; -+ -+typedef union -+{ -+ test_st_1 st1; -+ struct -+ { -+ unsigned int v1; -+ unsigned int v2; -+ unsigned int v3; -+ unsigned int v4; -+ }values; -+} read_st_1; -+ -+ -+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st_1); -+ -+int -+main (void) -+{ -+ read_st_1 r; -+ foo_ns f; -+ -+ f = (foo_ns) 0x200000; -+ r.values.v1 = 0xFFFFFFFF; -+ r.values.v2 = 0xFFFFFFFF; -+ r.values.v3 = 0xFFFFFFFF; -+ r.values.v4 = 0xFFFFFFFF; -+ -+ f (r.st1); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "mov\tip, r4" } } */ -+/* { dg-final { scan-assembler "movw\tr4, #7939" } } */ -+/* { dg-final { scan-assembler "movt\tr4, 15" } } */ -+/* { dg-final { scan-assembler "ands\tr0, r4" } } */ -+/* { dg-final { scan-assembler "movw\tr4, #65535" } } */ -+/* { dg-final { scan-assembler "movt\tr4, 2047" } } */ -+/* { dg-final { scan-assembler "ands\tr1, r4" } } */ -+/* { dg-final { scan-assembler "movs\tr4, #1" } } */ -+/* { dg-final { scan-assembler "movt\tr4, 65535" } } */ -+/* { dg-final { scan-assembler "ands\tr2, r4" } } */ -+/* { dg-final { scan-assembler "movw\tr4, #65535" } } */ -+/* { dg-final { scan-assembler "movt\tr4, 31" } } */ -+/* { dg-final { scan-assembler "ands\tr3, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr4, ip" } } */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-11.c -@@ -0,0 +1,22 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_base_ok } */ -+/* { dg-add-options arm_arch_v8m_base } */ -+/* { dg-options "-mcmse" } */ -+ -+int __attribute__ ((cmse_nonsecure_call)) (*bar) (int); -+ -+int -+foo (int a) -+{ -+ return bar (bar (a + 1)); -+} -+ -+/* Checks for saving and clearing prior to function call. */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "movs\tr1, r4" } } */ -+/* { dg-final { scan-assembler "movs\tr2, r4" } } */ -+/* { dg-final { scan-assembler "movs\tr3, r4" } } */ -+ -+/* Now we check that we use the correct intrinsic to call. */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-13.c -@@ -0,0 +1,25 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_base_ok } */ -+/* { dg-add-options arm_arch_v8m_base } */ -+/* { dg-options "-mcmse" } */ -+ -+int __attribute__ ((cmse_nonsecure_call)) (*bar) (float, double); -+ -+int -+foo (int a) -+{ -+ return bar (1.0f, 2.0) + a + 1; -+} -+ -+/* Checks for saving and clearing prior to function call. */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler-not "movs\tr0, r4" } } */ -+/* { dg-final { scan-assembler "\n\tmovs\tr1, r4" } } */ -+/* { dg-final { scan-assembler-not "\n\tmovs\tr2, r4\n\tmovs\tr3, r4" } } */ -+/* { dg-final { scan-assembler-not "vmov" } } */ -+/* { dg-final { scan-assembler-not "vmsr" } } */ -+ -+/* Now we check that we use the correct intrinsic to call. */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-2.c -@@ -0,0 +1,19 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_base_ok } */ -+/* { dg-add-options arm_arch_v8m_base } */ -+/* { dg-options "-mcmse" } */ -+ -+extern float bar (void); -+ -+float __attribute__ ((cmse_nonsecure_entry)) -+foo (void) -+{ -+ return bar (); -+} -+/* { dg-final { scan-assembler "movs\tr1, r0" } } */ -+/* { dg-final { scan-assembler "movs\tr2, r0" } } */ -+/* { dg-final { scan-assembler "movs\tr3, r0" } } */ -+/* { dg-final { scan-assembler "mov\tip, r0" } } */ -+/* { dg-final { scan-assembler "mov\tlr, r0" } } */ -+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq," } } */ -+/* { dg-final { scan-assembler "bxns" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-6.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_base_ok } */ -+/* { dg-add-options arm_arch_v8m_base } */ -+/* { dg-options "-mcmse" } */ -+ -+int __attribute__ ((cmse_nonsecure_call)) (*bar) (double); -+ -+int -+foo (int a) -+{ -+ return bar (2.0) + a + 1; -+} -+ -+/* Remember dont clear r0 and r1, because we are passing the double parameter -+ * for bar in them. */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "movs\tr2, r4" } } */ -+ -+/* Now we check that we use the correct intrinsic to call. */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/baseline/softfp.c -@@ -0,0 +1,29 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_base_ok } */ -+/* { dg-add-options arm_arch_v8m_base } */ -+/* { dg-options "-mcmse -mfloat-abi=softfp" } */ -+ -+double __attribute__ ((cmse_nonsecure_call)) (*bar) (float, double); -+ -+double -+foo (double a) -+{ -+ return bar (1.0f, 2.0) + a; -+} -+ -+float __attribute__ ((cmse_nonsecure_entry)) -+baz (float a, double b) -+{ -+ return (float) bar (a, b); -+} -+ -+/* Make sure we are not using FP instructions, since ARMv8-M Baseline does not -+ support such instructions. */ -+/* { dg-final { scan-assembler-not "vmov" } } */ -+/* { dg-final { scan-assembler-not "vmsr" } } */ -+/* { dg-final { scan-assembler-not "vmrs" } } */ -+ -+/* Just double checking that we are still doing cmse though. */ -+/* { dg-final { scan-assembler-not "vmrs" } } */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/baseline/union-1.c -@@ -0,0 +1,71 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+typedef struct -+{ -+ unsigned char a :2; -+ unsigned char :0; -+ unsigned short b :5; -+ unsigned char :0; -+ unsigned short c :3; -+ unsigned char :0; -+ unsigned int d :9; -+} test_st_1; -+ -+typedef struct -+{ -+ unsigned short a :7; -+ unsigned char :0; -+ unsigned char b :1; -+ unsigned char :0; -+ unsigned short c :6; -+} test_st_2; -+ -+typedef union -+{ -+ test_st_1 st_1; -+ test_st_2 st_2; -+}test_un; -+ -+typedef union -+{ -+ test_un un; -+ struct -+ { -+ unsigned int v1; -+ unsigned int v2; -+ unsigned int v3; -+ unsigned int v4; -+ }values; -+} read_un; -+ -+ -+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_un); -+ -+int -+main (void) -+{ -+ read_un r; -+ foo_ns f; -+ -+ f = (foo_ns) 0x200000; -+ r.values.v1 = 0xFFFFFFFF; -+ r.values.v2 = 0xFFFFFFFF; -+ -+ f (r.un); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "mov\tip, r4" } } */ -+/* { dg-final { scan-assembler "movw\tr4, #8063" } } */ -+/* { dg-final { scan-assembler "movt\tr4, 63" } } */ -+/* { dg-final { scan-assembler "ands\tr0, r4" } } */ -+/* { dg-final { scan-assembler "movw\tr4, #511" } } */ -+/* { dg-final { scan-assembler "ands\tr1, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr4, ip" } } */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "movs\tr2, r4" } } */ -+/* { dg-final { scan-assembler "movs\tr3, r4" } } */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/baseline/union-2.c -@@ -0,0 +1,86 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+typedef struct -+{ -+ unsigned char a :2; -+ unsigned char :0; -+ unsigned short b :5; -+ unsigned char :0; -+ unsigned short c :3; -+ unsigned char :0; -+ unsigned int d :9; -+} test_st_1; -+ -+typedef struct -+{ -+ unsigned short a :7; -+ unsigned char :0; -+ unsigned char b :1; -+ unsigned char :0; -+ unsigned short c :6; -+} test_st_2; -+ -+typedef struct -+{ -+ unsigned char a; -+ unsigned int :0; -+ unsigned int b :1; -+ unsigned short :0; -+ unsigned short c; -+ unsigned int :0; -+ unsigned int d :21; -+} test_st_3; -+ -+typedef union -+{ -+ test_st_1 st_1; -+ test_st_2 st_2; -+ test_st_3 st_3; -+}test_un; -+ -+typedef union -+{ -+ test_un un; -+ struct -+ { -+ unsigned int v1; -+ unsigned int v2; -+ unsigned int v3; -+ unsigned int v4; -+ }values; -+} read_un; -+ -+ -+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_un); -+ -+int -+main (void) -+{ -+ read_un r; -+ foo_ns f; -+ -+ f = (foo_ns) 0x200000; -+ r.values.v1 = 0xFFFFFFFF; -+ r.values.v2 = 0xFFFFFFFF; -+ r.values.v3 = 0xFFFFFFFF; -+ -+ f (r.un); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "mov\tip, r4" } } */ -+/* { dg-final { scan-assembler "movw\tr4, #8191" } } */ -+/* { dg-final { scan-assembler "movt\tr4, 63" } } */ -+/* { dg-final { scan-assembler "ands\tr0, r4" } } */ -+/* { dg-final { scan-assembler "movw\tr4, #511" } } */ -+/* { dg-final { scan-assembler "movt\tr4, 65535" } } */ -+/* { dg-final { scan-assembler "ands\tr1, r4" } } */ -+/* { dg-final { scan-assembler "movw\tr4, #65535" } } */ -+/* { dg-final { scan-assembler "movt\tr4, 31" } } */ -+/* { dg-final { scan-assembler "ands\tr2, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr4, ip" } } */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "movs\tr3, r4" } } */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/bitfield-1.c -@@ -0,0 +1,39 @@ -+/* { dg-do run } */ -+/* { dg-options "--save-temps -mcmse -Wl,--section-start,.gnu.sgstubs=0x20400000" } */ -+ -+typedef struct -+{ -+ unsigned short a : 6; -+ unsigned char b : 3; -+ unsigned char c; -+ unsigned short d : 8; -+} test_st; -+ -+test_st __attribute__ ((cmse_nonsecure_entry)) foo (void) -+{ -+ test_st t; -+ t.a = 63u; -+ t.b = 7u; -+ t.c = 255u; -+ t.d = 255u; -+ return t; -+} -+ -+int -+main (void) -+{ -+ test_st t; -+ t = foo (); -+ if (t.a != 63u -+ || t.b != 7u -+ || t.c != 255u -+ || t.d != 255u) -+ __builtin_abort (); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "movw\tr1, #1855" } } */ -+/* { dg-final { scan-assembler "movt\tr1, 65535" } } */ -+/* { dg-final { scan-assembler "ands\tr0(, r0)?, r1" } } */ -+/* { dg-final { scan-assembler "bxns" } } */ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/bitfield-2.c -@@ -0,0 +1,36 @@ -+/* { dg-do run } */ -+/* { dg-options "--save-temps -mcmse -Wl,--section-start,.gnu.sgstubs=0x20400000" } */ -+ -+typedef struct -+{ -+ short a : 7; -+ signed char b : 3; -+ short c : 11; -+} test_st; -+ -+test_st __attribute__ ((cmse_nonsecure_entry)) foo (void) -+{ -+ test_st t; -+ t.a = -64; -+ t.b = -4 ; -+ t.c = -1024; -+ return t; -+} -+ -+int -+main (void) -+{ -+ test_st t; -+ t = foo (); -+ if (t.a != -64 -+ || t.b != -4 -+ || t.c != -1024) -+ __builtin_abort (); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "movw\tr1, #1919" } } */ -+/* { dg-final { scan-assembler "movt\tr1, 2047" } } */ -+/* { dg-final { scan-assembler "ands\tr0(, r0)?, r1" } } */ -+/* { dg-final { scan-assembler "bxns" } } */ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/bitfield-3.c -@@ -0,0 +1,37 @@ -+/* { dg-do run } */ -+/* { dg-options "--save-temps -mcmse -Wl,--section-start,.gnu.sgstubs=0x20400000" } */ -+ -+typedef struct -+{ -+ short a; -+ signed char b : 2; -+ short : 1; -+ signed char c : 3; -+} test_st; -+ -+test_st __attribute__ ((cmse_nonsecure_entry)) foo (void) -+{ -+ test_st t; -+ t.a = -32768; -+ t.b = -2; -+ t.c = -4; -+ return t; -+} -+ -+int -+main (void) -+{ -+ test_st t; -+ t = foo (); -+ if (t.a != -32768 -+ || t.b != -2 -+ || t.c != -4) -+ __builtin_abort (); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "movw\tr1, #65535" } } */ -+/* { dg-final { scan-assembler "movt\tr1, 63" } } */ -+/* { dg-final { scan-assembler "ands\tr0(, r0)?, r1" } } */ -+/* { dg-final { scan-assembler "bxns" } } */ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/cmse-1.c -@@ -0,0 +1,106 @@ -+/* { dg-do compile } */ -+/* { dg-options "-Os -mcmse -fdump-rtl-expand" } */ -+ -+#include <arm_cmse.h> -+ -+extern int a; -+extern int bar (void); -+ -+int foo (char * p) -+{ -+ cmse_address_info_t cait; -+ -+ cait = cmse_TT (&a); -+ if (cait.flags.mpu_region) -+ a++; -+ -+ cait = cmse_TT_fptr (&bar); -+ if (cait.flags.mpu_region) -+ a+= bar (); -+ -+ cait = cmse_TTA (&a); -+ if (cait.flags.mpu_region) -+ a++; -+ -+ cait = cmse_TTA_fptr (&bar); -+ if (cait.flags.mpu_region) -+ a+= bar (); -+ -+ cait = cmse_TTT (&a); -+ if (cait.flags.mpu_region) -+ a++; -+ -+ cait = cmse_TTT_fptr (&bar); -+ if (cait.flags.mpu_region) -+ a+= bar (); -+ -+ cait = cmse_TTAT (&a); -+ if (cait.flags.mpu_region) -+ a++; -+ -+ cait = cmse_TTAT_fptr (&bar); -+ if (cait.flags.mpu_region) -+ a+= bar (); -+ -+ p = (char *) cmse_check_address_range ((void *) p, sizeof (char), 0); -+ p = (char *) cmse_check_address_range ((void *) p, sizeof (char), -+ CMSE_MPU_UNPRIV); -+ p = (char *) cmse_check_address_range ((void *) p, sizeof (char), -+ CMSE_MPU_READWRITE); -+ p = (char *) cmse_check_address_range ((void *) p, sizeof (char), -+ CMSE_MPU_UNPRIV | CMSE_MPU_READ); -+ p = (char *) cmse_check_address_range ((void *) p, sizeof (char), -+ CMSE_AU_NONSECURE -+ | CMSE_MPU_NONSECURE); -+ p = (char *) cmse_check_address_range ((void *) p, sizeof (char), -+ CMSE_NONSECURE | CMSE_MPU_UNPRIV); -+ -+ p = (char *) cmse_check_pointed_object (p, CMSE_NONSECURE | CMSE_MPU_UNPRIV); -+ -+ return a; -+} -+/* { dg-final { scan-assembler-times "\ttt " 2 } } */ -+/* { dg-final { scan-assembler-times "ttt " 2 } } */ -+/* { dg-final { scan-assembler-times "tta " 2 } } */ -+/* { dg-final { scan-assembler-times "ttat " 2 } } */ -+/* { dg-final { scan-assembler-times "bl.cmse_check_address_range" 7 } } */ -+/* { dg-final { scan-assembler-not "cmse_check_pointed_object" } } */ -+ -+int __attribute__ ((cmse_nonsecure_entry)) -+baz (void) -+{ -+ return cmse_nonsecure_caller (); -+} -+ -+typedef int __attribute__ ((cmse_nonsecure_call)) (int_nsfunc_t) (void); -+ -+int default_callback (void) -+{ -+ return 0; -+} -+ -+int_nsfunc_t * fp = (int_nsfunc_t *) default_callback; -+ -+void __attribute__ ((cmse_nonsecure_entry)) -+qux (int_nsfunc_t * callback) -+{ -+ fp = cmse_nsfptr_create (callback); -+} -+ -+int call_callback (void) -+{ -+ if (cmse_is_nsfptr (fp)) -+ return fp (); -+ else -+ return default_callback (); -+} -+/* { dg-final { scan-assembler "baz:" } } */ -+/* { dg-final { scan-assembler "__acle_se_baz:" } } */ -+/* { dg-final { scan-assembler "qux:" } } */ -+/* { dg-final { scan-assembler "__acle_se_qux:" } } */ -+/* { dg-final { scan-assembler-not "\tcmse_nonsecure_caller" } } */ -+/* { dg-final { scan-rtl-dump "and.*reg.*const_int 1" expand } } */ -+/* { dg-final { scan-assembler "bic" } } */ -+/* { dg-final { scan-assembler "push\t\{r4, r5, r6" } } */ -+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq" } } */ -+/* { dg-final { scan-assembler-times "bl\\s+__gnu_cmse_nonsecure_call" 1 } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/cmse-10.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+void -+foo (void) {} -+ -+/* { dg-final { scan-assembler-not "bxns" } } */ -+/* { dg-final { scan-assembler "foo:" } } */ -+/* { dg-final { scan-assembler-not "__acle_se_foo:" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/cmse-12.c -@@ -0,0 +1,14 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+#include <arm_cmse.h> -+ -+char * -+foo (char * p) -+{ -+ if (!cmse_is_nsfptr (p)) -+ return cmse_nsfptr_create (p); -+} -+ -+/* Checks for saving and clearing prior to function call. */ -+/* { dg-final { scan-assembler-not "cmse_is_nsfptr" } } */ -+/* { dg-final { scan-assembler-not "cmse_nsfptr_create" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/cmse-14.c -@@ -0,0 +1,13 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+ -+int __attribute__ ((cmse_nonsecure_call)) (*bar) (void); -+ -+int foo (void) -+{ -+ return bar (); -+} -+ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ -+/* { dg-final { scan-assembler-not "b\[^ y\n\]*\\s+bar" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c -@@ -0,0 +1,72 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+int __attribute__ ((cmse_nonsecure_call)) (*ns_foo) (void); -+int (*s_bar) (void); -+int __attribute__ ((cmse_nonsecure_call)) (**ns_foo2) (void); -+int (**s_bar2) (void); -+ -+typedef int __attribute__ ((cmse_nonsecure_call)) ns_foo_t (void); -+typedef int s_bar_t (void); -+typedef int __attribute__ ((cmse_nonsecure_call)) (* ns_foo_ptr) (void); -+typedef int (*s_bar_ptr) (void); -+ -+int nonsecure0 (ns_foo_t * ns_foo_p) -+{ -+ return ns_foo_p (); -+} -+ -+int nonsecure1 (ns_foo_t ** ns_foo_p) -+{ -+ return (*ns_foo_p) (); -+} -+ -+int nonsecure2 (ns_foo_ptr ns_foo_p) -+{ -+ return ns_foo_p (); -+} -+int nonsecure3 (ns_foo_ptr * ns_foo_p) -+{ -+ return (*ns_foo_p) (); -+} -+ -+int secure0 (s_bar_t * s_bar_p) -+{ -+ return s_bar_p (); -+} -+ -+int secure1 (s_bar_t ** s_bar_p) -+{ -+ return (*s_bar_p) (); -+} -+ -+int secure2 (s_bar_ptr s_bar_p) -+{ -+ return s_bar_p (); -+} -+ -+int secure3 (s_bar_ptr * s_bar_p) -+{ -+ return (*s_bar_p) (); -+} -+ -+int nonsecure4 (void) -+{ -+ return ns_foo (); -+} -+ -+int nonsecure5 (void) -+{ -+ return (*ns_foo2) (); -+} -+ -+int secure4 (void) -+{ -+ return s_bar (); -+} -+ -+int secure5 (void) -+{ -+ return (*s_bar2) (); -+} -+/* { dg-final { scan-assembler-times "bl\\s+__gnu_cmse_nonsecure_call" 6 } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/cmse-3.c -@@ -0,0 +1,45 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+struct span { -+ int a, b; -+}; -+struct span2 { -+ float a, b, c, d; -+}; -+ -+union test_union -+{ -+ long long a; -+ int b; -+ struct span2 c; -+} test_union; -+ -+void __attribute__ ((cmse_nonsecure_entry)) -+foo (long long a, int b, long long c) {} /* { dg-error "not available to functions with arguments passed on the stack" } */ -+ -+void __attribute__ ((cmse_nonsecure_entry)) -+bar (long long a, int b, struct span c) {} /* { dg-error "not available to functions with arguments passed on the stack" } */ -+ -+void __attribute__ ((cmse_nonsecure_entry)) -+baz (int a, ...) {} /* { dg-error "not available to functions with variable number of arguments" } */ -+ -+struct span __attribute__ ((cmse_nonsecure_entry)) -+qux (void) { /* { dg-error "not available to functions that return value on the stack" } */ -+ struct span ret = {0, 0}; -+ return ret; -+} -+ -+void __attribute__ ((cmse_nonsecure_entry)) -+norf (struct span2 a) {} -+ -+void __attribute__ ((cmse_nonsecure_entry)) -+foo2 (long long a, int b, union test_union c) {} /* { dg-error "not available to functions with arguments passed on the stack" } */ -+ -+typedef void __attribute__ ((cmse_nonsecure_call)) bar2 (long long a, int b, long long c); /* { dg-error "not available to functions with arguments passed on the stack" } */ -+ -+typedef void __attribute__ ((cmse_nonsecure_call)) baz2 (long long a, int b, struct span c); /* { dg-error "not available to functions with arguments passed on the stack" } */ -+ -+typedef struct span __attribute__ ((cmse_nonsecure_call)) qux2 (void); /* { dg-error "not available to functions that return value on the stack" } */ -+ -+typedef void __attribute__ ((cmse_nonsecure_call)) norf2 (int a, ...); /* { dg-error "not available to functions with variable number of arguments" } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/cmse-4.c -@@ -0,0 +1,34 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+struct span { -+ int a, b; -+}; -+ -+extern int qux (void); -+ -+void __attribute__ ((cmse_nonsecure_entry)) -+foo (void) {} -+ -+static void __attribute__ ((cmse_nonsecure_entry)) -+bar (void) {} /* { dg-warning "has no effect on functions with static linkage" } */ -+ -+int __attribute__ ((cmse_nonsecure_entry)) -+baz (void) -+{ -+ return qux (); -+} -+ -+void __attribute__ ((cmse_nonsecure_call)) -+quux (void) {} /* { dg-warning "attribute only applies to base type of a function pointer" } */ -+ -+int __attribute__ ((cmse_nonsecure_call)) norf; /* { dg-warning "attribute only applies to base type of a function pointer" } */ -+ -+/* { dg-final { scan-assembler-times "bxns" 2 } } */ -+/* { dg-final { scan-assembler "foo:" } } */ -+/* { dg-final { scan-assembler "__acle_se_foo:" } } */ -+/* { dg-final { scan-assembler-not "__acle_se_bar:" } } */ -+/* { dg-final { scan-assembler "baz:" } } */ -+/* { dg-final { scan-assembler "__acle_se_baz:" } } */ -+/* { dg-final { scan-assembler-not "__acle_se_quux:" } } */ -+/* { dg-final { scan-assembler-not "__acle_se_norf:" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/cmse-9.c -@@ -0,0 +1,20 @@ -+/* { dg-do compile } */ -+/* { dg-skip-if "Testing exclusion of -mcmse" { arm-*-* } { "-mcmse" } { "" } } */ -+ -+ -+void __attribute__ ((cmse_nonsecure_call)) (*bar) (int); /* { dg-warning "attribute ignored without -mcmse option" } */ -+typedef void __attribute__ ((cmse_nonsecure_call)) baz (int); /* { dg-warning "attribute ignored without -mcmse option" } */ -+ -+int __attribute__ ((cmse_nonsecure_entry)) -+foo (int a, baz b) -+{ /* { dg-warning "attribute ignored without -mcmse option" } */ -+ bar (a); -+ b (a); -+ return a + 1; -+} -+ -+/* { dg-final { scan-assembler-not "bxns" } } */ -+/* { dg-final { scan-assembler-not "blxns" } } */ -+/* { dg-final { scan-assembler-not "bl\t__gnu_cmse_nonsecure_call" } } */ -+/* { dg-final { scan-assembler "foo:" } } */ -+/* { dg-final { scan-assembler-not "__acle_se_foo:" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/cmse.exp -@@ -0,0 +1,72 @@ -+# Copyright (C) 1997-2016 Free Software Foundation, Inc. -+ -+# This program is free software; you can redistribute it and/or modify -+# it under the terms of the GNU General Public License as published by -+# the Free Software Foundation; either version 3 of the License, or -+# (at your option) any later version. -+# -+# This program is distributed in the hope that it will be useful, -+# but WITHOUT ANY WARRANTY; without even the implied warranty of -+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+# GNU General Public License for more details. -+# -+# You should have received a copy of the GNU General Public License -+# along with GCC; see the file COPYING3. If not see -+# <http://www.gnu.org/licenses/>. -+ -+# GCC testsuite for ARMv8-M Security Extensions using the `dg.exp' driver. -+ -+# Load support procs. -+load_lib gcc-dg.exp -+ -+# Exit immediately if the target does not support -mcmse. -+if ![check_effective_target_arm_cmse_ok] then { -+ return -+} -+ -+# If a testcase doesn't have special options, use these. -+global DEFAULT_CFLAGS -+if ![info exists DEFAULT_CFLAGS] then { -+ set DEFAULT_CFLAGS " -ansi -pedantic-errors" -+} -+ -+# Initialize `dg'. -+dg-init -+ -+set saved-dg-do-what-default ${dg-do-what-default} -+set dg-do-what-default "assemble" -+ -+set saved-lto_torture_options ${LTO_TORTURE_OPTIONS} -+set LTO_TORTURE_OPTIONS "" -+ -+# These are for both baseline and mainline. -+gcc-dg-runtest [lsort [glob $srcdir/$subdir/*.c]] \ -+ "" $DEFAULT_CFLAGS -+ -+if {[check_effective_target_arm_arch_v8m_base_ok]} then { -+ # Baseline only -+ gcc-dg-runtest [lsort [glob $srcdir/$subdir/baseline/*.c]] \ -+ "" $DEFAULT_CFLAGS -+} -+ -+if {[check_effective_target_arm_arch_v8m_main_ok]} then { -+ gcc-dg-runtest [lsort [glob $srcdir/$subdir/mainline/*.c]] \ -+ "" $DEFAULT_CFLAGS -+ # Mainline -mfloat-abi=soft -+ gcc-dg-runtest [lsort [glob $srcdir/$subdir/mainline/soft/*.c]] \ -+ "-mfloat-abi=soft" $DEFAULT_CFLAGS -+ gcc-dg-runtest [lsort [glob $srcdir/$subdir/mainline/softfp/*.c]] \ -+ "" $DEFAULT_CFLAGS -+ gcc-dg-runtest [lsort [glob $srcdir/$subdir/mainline/softfp-sp/*.c]] \ -+ "" $DEFAULT_CFLAGS -+ gcc-dg-runtest [lsort [glob $srcdir/$subdir/mainline/hard/*.c]] \ -+ "" $DEFAULT_CFLAGS -+ gcc-dg-runtest [lsort [glob $srcdir/$subdir/mainline/hard-sp/*.c]] \ -+ "" $DEFAULT_CFLAGS -+} -+ -+set LTO_TORTURE_OPTIONS ${saved-lto_torture_options} -+set dg-do-what-default ${saved-dg-do-what-default} -+ -+# All done. -+dg-finish ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-4.c -@@ -0,0 +1,55 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+typedef struct -+{ -+ unsigned char a; -+ unsigned int b:5; -+ unsigned int c:11, :0, d:8; -+ struct { unsigned int ee:2; } e; -+} test_st; -+ -+typedef union -+{ -+ test_st st; -+ struct -+ { -+ unsigned int v1; -+ unsigned int v2; -+ unsigned int v3; -+ unsigned int v4; -+ }values; -+} read_st; -+ -+ -+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st); -+ -+extern void foo (test_st st); -+ -+int -+main (void) -+{ -+ read_st r; -+ foo_ns f; -+ -+ f = (foo_ns) 0x200000; -+ r.values.v1 = 0xFFFFFFFF; -+ r.values.v2 = 0xFFFFFFFF; -+ r.values.v3 = 0xFFFFFFFF; -+ r.values.v4 = 0xFFFFFFFF; -+ -+ f (r.st); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "movw\tip, #65535" } } */ -+/* { dg-final { scan-assembler "movt\tip, 255" } } */ -+/* { dg-final { scan-assembler "and\tr0, r0, ip" } } */ -+/* { dg-final { scan-assembler "mov\tip, #255" } } */ -+/* { dg-final { scan-assembler "and\tr1, r1, ip" } } */ -+/* { dg-final { scan-assembler "mov\tip, #3" } } */ -+/* { dg-final { scan-assembler "and\tr2, r2, ip" } } */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "mov\tr3, r4" } } */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-5.c -@@ -0,0 +1,51 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+typedef struct -+{ -+ unsigned char a; -+ unsigned short b :5; -+ unsigned char c; -+ unsigned short d :11; -+} test_st; -+ -+typedef union -+{ -+ test_st st; -+ struct -+ { -+ unsigned int v1; -+ unsigned int v2; -+ unsigned int v3; -+ unsigned int v4; -+ }values; -+} read_st; -+ -+ -+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st); -+ -+int -+main (void) -+{ -+ read_st r; -+ foo_ns f; -+ -+ f = (foo_ns) 0x200000; -+ r.values.v1 = 0xFFFFFFFF; -+ r.values.v2 = 0xFFFFFFFF; -+ -+ f (r.st); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "movw\tip, #8191" } } */ -+/* { dg-final { scan-assembler "movt\tip, 255" } } */ -+/* { dg-final { scan-assembler "and\tr0, r0, ip" } } */ -+/* { dg-final { scan-assembler "movw\tip, #2047" } } */ -+/* { dg-final { scan-assembler "and\tr1, r1, ip" } } */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "mov\tr2, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr3, r4" } } */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-6.c -@@ -0,0 +1,61 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+typedef struct -+{ -+ unsigned char a; -+ unsigned int b : 3; -+ unsigned int c : 14; -+ unsigned int d : 1; -+ struct { -+ unsigned int ee : 2; -+ unsigned short ff : 15; -+ } e; -+ unsigned char g : 1; -+ unsigned char : 4; -+ unsigned char h : 3; -+} test_st; -+ -+typedef union -+{ -+ test_st st; -+ struct -+ { -+ unsigned int v1; -+ unsigned int v2; -+ unsigned int v3; -+ unsigned int v4; -+ }values; -+} read_st; -+ -+ -+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st); -+ -+int -+main (void) -+{ -+ read_st r; -+ foo_ns f; -+ -+ f = (foo_ns) 0x200000; -+ r.values.v1 = 0xFFFFFFFF; -+ r.values.v2 = 0xFFFFFFFF; -+ r.values.v3 = 0xFFFFFFFF; -+ r.values.v4 = 0xFFFFFFFF; -+ -+ f (r.st); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "movw\tip, #65535" } } */ -+/* { dg-final { scan-assembler "movt\tip, 1023" } } */ -+/* { dg-final { scan-assembler "and\tr0, r0, ip" } } */ -+/* { dg-final { scan-assembler "mov\tip, #3" } } */ -+/* { dg-final { scan-assembler "movt\tip, 32767" } } */ -+/* { dg-final { scan-assembler "and\tr1, r1, ip" } } */ -+/* { dg-final { scan-assembler "mov\tip, #255" } } */ -+/* { dg-final { scan-assembler "and\tr2, r2, ip" } } */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "mov\tr3, r4" } } */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-7.c -@@ -0,0 +1,52 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+typedef struct -+{ -+ unsigned char a; -+ unsigned short b :5; -+ unsigned char c; -+ unsigned short d :11; -+} test_st; -+ -+typedef union -+{ -+ test_st st; -+ struct -+ { -+ unsigned int v1; -+ unsigned int v2; -+ unsigned int v3; -+ unsigned int v4; -+ }values; -+} read_st; -+ -+ -+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st); -+ -+int -+main (void) -+{ -+ read_st r; -+ foo_ns f; -+ -+ f = (foo_ns) 0x200000; -+ r.values.v1 = 0xFFFFFFFF; -+ r.values.v2 = 0xFFFFFFFF; -+ -+ f (r.st); -+ return 0; -+} -+ -+ -+/* { dg-final { scan-assembler "movw\tip, #8191" } } */ -+/* { dg-final { scan-assembler "movt\tip, 255" } } */ -+/* { dg-final { scan-assembler "and\tr0, r0, ip" } } */ -+/* { dg-final { scan-assembler "movw\tip, #2047" } } */ -+/* { dg-final { scan-assembler "and\tr1, r1, ip" } } */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "mov\tr2, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr3, r4" } } */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-8.c -@@ -0,0 +1,55 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+typedef struct -+{ -+ unsigned char a; -+ unsigned int :0; -+ unsigned int b :1; -+ unsigned short :0; -+ unsigned short c; -+ unsigned int :0; -+ unsigned int d :21; -+} test_st; -+ -+typedef union -+{ -+ test_st st; -+ struct -+ { -+ unsigned int v1; -+ unsigned int v2; -+ unsigned int v3; -+ unsigned int v4; -+ }values; -+} read_st; -+ -+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st); -+ -+int -+main (void) -+{ -+ read_st r; -+ foo_ns f; -+ -+ f = (foo_ns) 0x200000; -+ r.values.v1 = 0xFFFFFFFF; -+ r.values.v2 = 0xFFFFFFFF; -+ r.values.v3 = 0xFFFFFFFF; -+ -+ f (r.st); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "mov\tip, #255" } } */ -+/* { dg-final { scan-assembler "and\tr0, r0, ip" } } */ -+/* { dg-final { scan-assembler "mov\tip, #1" } } */ -+/* { dg-final { scan-assembler "movt\tip, 65535" } } */ -+/* { dg-final { scan-assembler "and\tr1, r1, ip" } } */ -+/* { dg-final { scan-assembler "movw\tip, #65535" } } */ -+/* { dg-final { scan-assembler "movt\tip, 31" } } */ -+/* { dg-final { scan-assembler "and\tr2, r2, ip" } } */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "mov\tr3, r4" } } */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-9.c -@@ -0,0 +1,54 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+typedef struct -+{ -+ char a:3; -+} test_st3; -+ -+typedef struct -+{ -+ char a:3; -+} test_st2; -+ -+typedef struct -+{ -+ test_st2 st2; -+ test_st3 st3; -+} test_st; -+ -+typedef union -+{ -+ test_st st; -+ struct -+ { -+ unsigned int v1; -+ unsigned int v2; -+ unsigned int v3; -+ unsigned int v4; -+ }values; -+} read_st; -+ -+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st); -+ -+int -+main (void) -+{ -+ read_st r; -+ foo_ns f; -+ -+ f = (foo_ns) 0x200000; -+ r.values.v1 = 0xFFFFFFFF; -+ -+ f (r.st); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "movw\tip, #1799" } } */ -+/* { dg-final { scan-assembler "and\tr0, r0, ip" } } */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "mov\tr1, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr2, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr3, r4" } } */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-and-union-1.c -@@ -0,0 +1,94 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+typedef struct -+{ -+ unsigned short a :11; -+} test_st_4; -+ -+typedef union -+{ -+ char a; -+ test_st_4 st4; -+}test_un_2; -+ -+typedef struct -+{ -+ unsigned char a; -+ unsigned int :0; -+ unsigned int b :1; -+ unsigned short :0; -+ unsigned short c; -+ unsigned int :0; -+ unsigned int d :21; -+} test_st_3; -+ -+typedef struct -+{ -+ unsigned char a :3; -+ unsigned int b :13; -+ test_un_2 un2; -+} test_st_2; -+ -+typedef union -+{ -+ test_st_2 st2; -+ test_st_3 st3; -+}test_un_1; -+ -+typedef struct -+{ -+ unsigned char a :2; -+ unsigned char :0; -+ unsigned short b :5; -+ unsigned char :0; -+ unsigned char c :4; -+ test_un_1 un1; -+} test_st_1; -+ -+typedef union -+{ -+ test_st_1 st1; -+ struct -+ { -+ unsigned int v1; -+ unsigned int v2; -+ unsigned int v3; -+ unsigned int v4; -+ }values; -+} read_st_1; -+ -+ -+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st_1); -+ -+int -+main (void) -+{ -+ read_st_1 r; -+ foo_ns f; -+ -+ f = (foo_ns) 0x200000; -+ r.values.v1 = 0xFFFFFFFF; -+ r.values.v2 = 0xFFFFFFFF; -+ r.values.v3 = 0xFFFFFFFF; -+ r.values.v4 = 0xFFFFFFFF; -+ -+ f (r.st1); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "movw\tip, #7939" } } */ -+/* { dg-final { scan-assembler "movt\tip, 15" } } */ -+/* { dg-final { scan-assembler "and\tr0, r0, ip" } } */ -+/* { dg-final { scan-assembler "movw\tip, #65535" } } */ -+/* { dg-final { scan-assembler "movt\tip, 2047" } } */ -+/* { dg-final { scan-assembler "and\tr1, r1, ip" } } */ -+/* { dg-final { scan-assembler "mov\tip, #1" } } */ -+/* { dg-final { scan-assembler "movt\tip, 65535" } } */ -+/* { dg-final { scan-assembler "and\tr2, r2, ip" } } */ -+/* { dg-final { scan-assembler "movw\tip, #65535" } } */ -+/* { dg-final { scan-assembler "movt\tip, 31" } } */ -+/* { dg-final { scan-assembler "and\tr3, r3, ip" } } */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-13.c -@@ -0,0 +1,43 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */ -+/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */ -+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-sp-d16" } */ -+ -+ -+int __attribute__ ((cmse_nonsecure_call)) (*bar) (float, double); -+ -+int -+foo (int a) -+{ -+ return bar (3.0f, 2.0) + a + 1; -+} -+ -+/* Checks for saving and clearing prior to function call. */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "mov\tr0, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr1, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr2, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr3, r4" } } */ -+/* { dg-final { scan-assembler-not "vldr\.32\ts0, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts1, .L" } } */ -+/* { dg-final { scan-assembler-not "vldr\.32\ts2, .L" } } */ -+/* { dg-final { scan-assembler-not "vldr\.32\ts3, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts4, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts5, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts6, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts7, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts8, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts9, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts10, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts11, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts12, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts13, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts14, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts15, .L" } } */ -+ -+/* Now we check that we use the correct intrinsic to call. */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-5.c -@@ -0,0 +1,45 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */ -+/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */ -+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-sp-d16" } */ -+ -+extern float bar (void); -+ -+float __attribute__ ((cmse_nonsecure_entry)) -+foo (void) -+{ -+ return bar (); -+} -+/* { dg-final { scan-assembler "mov\tr0, lr" } } */ -+/* { dg-final { scan-assembler "mov\tr1, lr" } } */ -+/* { dg-final { scan-assembler "mov\tr2, lr" } } */ -+/* { dg-final { scan-assembler "mov\tr3, lr" } } */ -+/* { dg-final { scan-assembler-not "vmov\.f32\ts0, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts1, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts2, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts3, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts4, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts5, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts6, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts7, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts8, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts9, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts10, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts11, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts12, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts13, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts14, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts15, #1\.0" } } */ -+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { arm_arch_v8m_main_ok && { ! arm_dsp } } } } } */ -+/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target { arm_arch_v8m_main_ok && arm_dsp } } } } */ -+/* { dg-final { scan-assembler "push\t{r4}" } } */ -+/* { dg-final { scan-assembler "vmrs\tip, fpscr" } } */ -+/* { dg-final { scan-assembler "movw\tr4, #65376" } } */ -+/* { dg-final { scan-assembler "movt\tr4, #4095" } } */ -+/* { dg-final { scan-assembler "and\tip, r4" } } */ -+/* { dg-final { scan-assembler "vmsr\tfpscr, ip" } } */ -+/* { dg-final { scan-assembler "pop\t{r4}" } } */ -+/* { dg-final { scan-assembler "mov\tip, lr" } } */ -+/* { dg-final { scan-assembler "bxns" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-7.c -@@ -0,0 +1,42 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */ -+/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */ -+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-sp-d16" } */ -+ -+int __attribute__ ((cmse_nonsecure_call)) (*bar) (void); -+ -+int -+foo (int a) -+{ -+ return bar () + a + 1; -+} -+ -+/* Checks for saving and clearing prior to function call. */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "mov\tr0, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr1, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr2, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr3, r4" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts0, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts1, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts2, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts3, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts4, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts5, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts6, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts7, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts8, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts9, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts10, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts11, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts12, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts13, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts14, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts15, .L" } } */ -+ -+/* Now we check that we use the correct intrinsic to call. */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-8.c -@@ -0,0 +1,41 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */ -+/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */ -+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-sp-d16" } */ -+ -+int __attribute__ ((cmse_nonsecure_call)) (*bar) (double); -+ -+int -+foo (int a) -+{ -+ return bar (2.0) + a + 1; -+} -+ -+/* Checks for saving and clearing prior to function call. */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "mov\tr0, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr1, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr2, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr3, r4" } } */ -+/* { dg-final { scan-assembler-not "vldr\.32\ts0, .L" } } */ -+/* { dg-final { scan-assembler-not "vldr\.32\ts1, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts2, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts3, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts4, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts5, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts6, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts7, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts8, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts9, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts10, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts11, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts12, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts13, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts14, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts15, .L" } } */ -+ -+/* Now we check that we use the correct intrinsic to call. */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-13.c -@@ -0,0 +1,38 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */ -+/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */ -+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-d16" } */ -+ -+ -+int __attribute__ ((cmse_nonsecure_call)) (*bar) (float, double); -+ -+int -+foo (int a) -+{ -+ return bar (3.0f, 2.0) + a + 1; -+} -+ -+/* Checks for saving and clearing prior to function call. */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "mov\tr0, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr1, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr2, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr3, r4" } } */ -+/* { dg-final { scan-assembler "vldr\.32\ts1, .L" } } */ -+/* { dg-final { scan-assembler-not "vldr\.64\td0, .L" } } */ -+/* { dg-final { scan-assembler-not "vldr\.32\ts0, .L" } } */ -+/* { dg-final { scan-assembler-not "vldr\.64\td1, .L" } } */ -+/* { dg-final { scan-assembler-not "vldr\.32\ts2, .L" } } */ -+/* { dg-final { scan-assembler-not "vldr\.32\ts3, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.64\td2, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.64\td3, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.64\td4, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.64\td5, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.64\td6, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.64\td7, .L" } } */ -+ -+/* Now we check that we use the correct intrinsic to call. */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-5.c -@@ -0,0 +1,38 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */ -+/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */ -+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-d16" } */ -+ -+extern float bar (void); -+ -+float __attribute__ ((cmse_nonsecure_entry)) -+foo (void) -+{ -+ return bar (); -+} -+/* { dg-final { scan-assembler "mov\tr0, lr" } } */ -+/* { dg-final { scan-assembler "mov\tr1, lr" } } */ -+/* { dg-final { scan-assembler "mov\tr2, lr" } } */ -+/* { dg-final { scan-assembler "mov\tr3, lr" } } */ -+/* { dg-final { scan-assembler-not "vmov\.f32\ts0, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts1, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f64\td1, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f64\td2, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f64\td3, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f64\td4, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f64\td5, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f64\td6, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f64\td7, #1\.0" } } */ -+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { arm_arch_v8m_main_ok && { ! arm_dsp } } } } } */ -+/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target { arm_arch_v8m_main_ok && arm_dsp } } } } */ -+/* { dg-final { scan-assembler "push\t{r4}" } } */ -+/* { dg-final { scan-assembler "vmrs\tip, fpscr" } } */ -+/* { dg-final { scan-assembler "movw\tr4, #65376" } } */ -+/* { dg-final { scan-assembler "movt\tr4, #4095" } } */ -+/* { dg-final { scan-assembler "and\tip, r4" } } */ -+/* { dg-final { scan-assembler "vmsr\tfpscr, ip" } } */ -+/* { dg-final { scan-assembler "pop\t{r4}" } } */ -+/* { dg-final { scan-assembler "mov\tip, lr" } } */ -+/* { dg-final { scan-assembler "bxns" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-7.c -@@ -0,0 +1,34 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */ -+/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */ -+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-d16" } */ -+ -+int __attribute__ ((cmse_nonsecure_call)) (*bar) (void); -+ -+int -+foo (int a) -+{ -+ return bar () + a + 1; -+} -+ -+/* Checks for saving and clearing prior to function call. */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "mov\tr0, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr1, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr2, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr3, r4" } } */ -+/* { dg-final { scan-assembler "vldr\.64\td0, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.64\td1, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.64\td2, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.64\td3, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.64\td4, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.64\td5, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.64\td6, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.64\td7, .L" } } */ -+ -+/* Now we check that we use the correct intrinsic to call. */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-8.c -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */ -+/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */ -+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-d16" } */ -+ -+int __attribute__ ((cmse_nonsecure_call)) (*bar) (double); -+ -+int -+foo (int a) -+{ -+ return bar (2.0) + a + 1; -+} -+ -+/* Checks for saving and clearing prior to function call. */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "mov\tr0, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr1, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr2, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr3, r4" } } */ -+/* { dg-final { scan-assembler-not "vldr\.64\td0, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.64\td1, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.64\td2, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.64\td3, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.64\td4, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.64\td5, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.64\td6, .L" } } */ -+/* { dg-final { scan-assembler "vldr\.64\td7, .L" } } */ -+ -+/* Now we check that we use the correct intrinsic to call. */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-13.c -@@ -0,0 +1,27 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=hard" -mfloat-abi=softfp } {""} } */ -+/* { dg-options "-mcmse -mfloat-abi=soft" } */ -+ -+int __attribute__ ((cmse_nonsecure_call)) (*bar) (float, double); -+ -+int -+foo (int a) -+{ -+ return bar (1.0f, 2.0) + a + 1; -+} -+ -+/* Checks for saving and clearing prior to function call. */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler-not "mov\tr0, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr1, r4" } } */ -+/* { dg-final { scan-assembler-not "mov\tr2, r4" } } */ -+/* { dg-final { scan-assembler-not "mov\tr3, r4" } } */ -+/* { dg-final { scan-assembler-not "vmov" } } */ -+/* { dg-final { scan-assembler-not "vmsr" } } */ -+ -+/* Now we check that we use the correct intrinsic to call. */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-5.c -@@ -0,0 +1,24 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=hard" -mfloat-abi=softfp } {""} } */ -+/* { dg-options "-mcmse -mfloat-abi=soft" } */ -+ -+extern float bar (void); -+ -+float __attribute__ ((cmse_nonsecure_entry)) -+foo (void) -+{ -+ return bar (); -+} -+ -+/* { dg-final { scan-assembler "mov\tr1, lr" } } */ -+/* { dg-final { scan-assembler "mov\tr2, lr" } } */ -+/* { dg-final { scan-assembler "mov\tr3, lr" } } */ -+/* { dg-final { scan-assembler "mov\tip, lr" } } */ -+/* { dg-final { scan-assembler-not "vmov" } } */ -+/* { dg-final { scan-assembler-not "vmsr" } } */ -+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { arm_arch_v8m_main_ok && { ! arm_dsp } } } } } */ -+/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target { arm_arch_v8m_main_ok && arm_dsp } } } } */ -+/* { dg-final { scan-assembler "bxns" } } */ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-7.c -@@ -0,0 +1,27 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=hard" -mfloat-abi=softfp } {""} } */ -+/* { dg-options "-mcmse -mfloat-abi=soft" } */ -+ -+int __attribute__ ((cmse_nonsecure_call)) (*bar) (void); -+ -+int -+foo (int a) -+{ -+ return bar () + a + 1; -+} -+ -+/* Checks for saving and clearing prior to function call. */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "mov\tr0, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr1, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr2, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr3, r4" } } */ -+/* { dg-final { scan-assembler-not "vmov" } } */ -+/* { dg-final { scan-assembler-not "vmsr" } } */ -+ -+/* Now we check that we use the correct intrinsic to call. */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-8.c -@@ -0,0 +1,26 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=hard" -mfloat-abi=softfp } {""} } */ -+/* { dg-options "-mcmse -mfloat-abi=soft" } */ -+ -+int __attribute__ ((cmse_nonsecure_call)) (*bar) (double); -+ -+int -+foo (int a) -+{ -+ return bar (2.0) + a + 1; -+} -+ -+/* Checks for saving and clearing prior to function call. */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler-not "mov\tr0, r4" } } */ -+/* { dg-final { scan-assembler-not "mov\tr1, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr2, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr3, r4" } } */ -+/* { dg-final { scan-assembler-not "vmov" } } */ -+/* { dg-final { scan-assembler-not "vmsr" } } */ -+ -+/* Now we check that we use the correct intrinsic to call. */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-5.c -@@ -0,0 +1,46 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */ -+/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */ -+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-sp-d16" } */ -+ -+extern float bar (void); -+ -+float __attribute__ ((cmse_nonsecure_entry)) -+foo (void) -+{ -+ return bar (); -+} -+/* { dg-final { scan-assembler "__acle_se_foo:" } } */ -+/* { dg-final { scan-assembler-not "mov\tr0, lr" } } */ -+/* { dg-final { scan-assembler "mov\tr1, lr" } } */ -+/* { dg-final { scan-assembler "mov\tr2, lr" } } */ -+/* { dg-final { scan-assembler "mov\tr3, lr" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts0, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts1, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts2, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts3, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts4, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts5, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts6, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts7, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts8, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts9, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts10, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts11, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts12, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts13, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts14, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f32\ts15, #1\.0" } } */ -+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { arm_arch_v8m_main_ok && { ! arm_dsp } } } } } */ -+/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target { arm_arch_v8m_main_ok && arm_dsp } } } } */ -+/* { dg-final { scan-assembler "push\t{r4}" } } */ -+/* { dg-final { scan-assembler "vmrs\tip, fpscr" } } */ -+/* { dg-final { scan-assembler "movw\tr4, #65376" } } */ -+/* { dg-final { scan-assembler "movt\tr4, #4095" } } */ -+/* { dg-final { scan-assembler "and\tip, r4" } } */ -+/* { dg-final { scan-assembler "vmsr\tfpscr, ip" } } */ -+/* { dg-final { scan-assembler "pop\t{r4}" } } */ -+/* { dg-final { scan-assembler "mov\tip, lr" } } */ -+/* { dg-final { scan-assembler "bxns" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-7.c -@@ -0,0 +1,26 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */ -+/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */ -+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-sp-d16" } */ -+ -+int __attribute__ ((cmse_nonsecure_call)) (*bar) (void); -+ -+int -+foo (int a) -+{ -+ return bar () + a + 1; -+} -+ -+/* Checks for saving and clearing prior to function call. */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "mov\tr0, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr1, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr2, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr3, r4" } } */ -+ -+/* Now we check that we use the correct intrinsic to call. */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-8.c -@@ -0,0 +1,25 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */ -+/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */ -+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-sp-d16" } */ -+ -+int __attribute__ ((cmse_nonsecure_call)) (*bar) (double); -+ -+int -+foo (int a) -+{ -+ return bar (2.0) + a + 1; -+} -+ -+/* Checks for saving and clearing prior to function call. */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler-not "mov\tr0, r4" } } */ -+/* { dg-final { scan-assembler-not "mov\tr1, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr2, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr3, r4" } } */ -+ -+/* Now we check that we use the correct intrinsic to call. */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-13.c -@@ -0,0 +1,25 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */ -+/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */ -+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16" } */ -+ -+int __attribute__ ((cmse_nonsecure_call)) (*bar) (float, double); -+ -+int -+foo (int a) -+{ -+ return bar (1.0f, 2.0) + a + 1; -+} -+ -+/* Checks for saving and clearing prior to function call. */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler-not "mov\tr0, r4" } } */ -+/* { dg-final { scan-assembler "\n\tmov\tr1, r4" } } */ -+/* { dg-final { scan-assembler-not "\n\tmov\tr2, r4\n\tmov\tr3, r4" } } */ -+ -+/* Now we check that we use the correct intrinsic to call. */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-5.c -@@ -0,0 +1,38 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */ -+/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */ -+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16" } */ -+ -+extern float bar (void); -+ -+float __attribute__ ((cmse_nonsecure_entry)) -+foo (void) -+{ -+ return bar (); -+} -+/* { dg-final { scan-assembler "__acle_se_foo:" } } */ -+/* { dg-final { scan-assembler-not "mov\tr0, lr" } } */ -+/* { dg-final { scan-assembler "mov\tr1, lr" } } */ -+/* { dg-final { scan-assembler "mov\tr2, lr" } } */ -+/* { dg-final { scan-assembler "mov\tr3, lr" } } */ -+/* { dg-final { scan-assembler "vmov\.f64\td0, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f64\td1, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f64\td2, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f64\td3, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f64\td4, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f64\td5, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f64\td6, #1\.0" } } */ -+/* { dg-final { scan-assembler "vmov\.f64\td7, #1\.0" } } */ -+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { arm_arch_v8m_main_ok && { ! arm_dsp } } } } } */ -+/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target { arm_arch_v8m_main_ok && arm_dsp } } } } */ -+/* { dg-final { scan-assembler "push\t{r4}" } } */ -+/* { dg-final { scan-assembler "vmrs\tip, fpscr" } } */ -+/* { dg-final { scan-assembler "movw\tr4, #65376" } } */ -+/* { dg-final { scan-assembler "movt\tr4, #4095" } } */ -+/* { dg-final { scan-assembler "and\tip, r4" } } */ -+/* { dg-final { scan-assembler "vmsr\tfpscr, ip" } } */ -+/* { dg-final { scan-assembler "pop\t{r4}" } } */ -+/* { dg-final { scan-assembler "mov\tip, lr" } } */ -+/* { dg-final { scan-assembler "bxns" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-7.c -@@ -0,0 +1,26 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */ -+/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */ -+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16" } */ -+ -+int __attribute__ ((cmse_nonsecure_call)) (*bar) (void); -+ -+int -+foo (int a) -+{ -+ return bar () + a + 1; -+} -+ -+/* Checks for saving and clearing prior to function call. */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "mov\tr0, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr1, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr2, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr3, r4" } } */ -+ -+/* Now we check that we use the correct intrinsic to call. */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-8.c -@@ -0,0 +1,25 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v8m_main_ok } */ -+/* { dg-add-options arm_arch_v8m_main } */ -+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */ -+/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */ -+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16" } */ -+ -+int __attribute__ ((cmse_nonsecure_call)) (*bar) (double); -+ -+int -+foo (int a) -+{ -+ return bar (2.0) + a + 1; -+} -+ -+/* Checks for saving and clearing prior to function call. */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler-not "mov\tr0, r4" } } */ -+/* { dg-final { scan-assembler-not "mov\tr1, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr2, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr3, r4" } } */ -+ -+/* Now we check that we use the correct intrinsic to call. */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/union-1.c -@@ -0,0 +1,69 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+typedef struct -+{ -+ unsigned char a :2; -+ unsigned char :0; -+ unsigned short b :5; -+ unsigned char :0; -+ unsigned short c :3; -+ unsigned char :0; -+ unsigned int d :9; -+} test_st_1; -+ -+typedef struct -+{ -+ unsigned short a :7; -+ unsigned char :0; -+ unsigned char b :1; -+ unsigned char :0; -+ unsigned short c :6; -+} test_st_2; -+ -+typedef union -+{ -+ test_st_1 st_1; -+ test_st_2 st_2; -+}test_un; -+ -+typedef union -+{ -+ test_un un; -+ struct -+ { -+ unsigned int v1; -+ unsigned int v2; -+ unsigned int v3; -+ unsigned int v4; -+ }values; -+} read_un; -+ -+ -+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_un); -+ -+int -+main (void) -+{ -+ read_un r; -+ foo_ns f; -+ -+ f = (foo_ns) 0x200000; -+ r.values.v1 = 0xFFFFFFFF; -+ r.values.v2 = 0xFFFFFFFF; -+ -+ f (r.un); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "movw\tip, #8063" } } */ -+/* { dg-final { scan-assembler "movt\tip, 63" } } */ -+/* { dg-final { scan-assembler "and\tr0, r0, ip" } } */ -+/* { dg-final { scan-assembler "movw\tip, #511" } } */ -+/* { dg-final { scan-assembler "and\tr1, r1, ip" } } */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "mov\tr2, r4" } } */ -+/* { dg-final { scan-assembler "mov\tr3, r4" } } */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/mainline/union-2.c -@@ -0,0 +1,84 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mcmse" } */ -+ -+typedef struct -+{ -+ unsigned char a :2; -+ unsigned char :0; -+ unsigned short b :5; -+ unsigned char :0; -+ unsigned short c :3; -+ unsigned char :0; -+ unsigned int d :9; -+} test_st_1; -+ -+typedef struct -+{ -+ unsigned short a :7; -+ unsigned char :0; -+ unsigned char b :1; -+ unsigned char :0; -+ unsigned short c :6; -+} test_st_2; -+ -+typedef struct -+{ -+ unsigned char a; -+ unsigned int :0; -+ unsigned int b :1; -+ unsigned short :0; -+ unsigned short c; -+ unsigned int :0; -+ unsigned int d :21; -+} test_st_3; -+ -+typedef union -+{ -+ test_st_1 st_1; -+ test_st_2 st_2; -+ test_st_3 st_3; -+}test_un; -+ -+typedef union -+{ -+ test_un un; -+ struct -+ { -+ unsigned int v1; -+ unsigned int v2; -+ unsigned int v3; -+ unsigned int v4; -+ }values; -+} read_un; -+ -+ -+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_un); -+ -+int -+main (void) -+{ -+ read_un r; -+ foo_ns f; -+ -+ f = (foo_ns) 0x200000; -+ r.values.v1 = 0xFFFFFFFF; -+ r.values.v2 = 0xFFFFFFFF; -+ r.values.v3 = 0xFFFFFFFF; -+ -+ f (r.un); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "movw\tip, #8191" } } */ -+/* { dg-final { scan-assembler "movt\tip, 63" } } */ -+/* { dg-final { scan-assembler "and\tr0, r0, ip" } } */ -+/* { dg-final { scan-assembler "movw\tip, #511" } } */ -+/* { dg-final { scan-assembler "movt\tip, 65535" } } */ -+/* { dg-final { scan-assembler "and\tr1, r1, ip" } } */ -+/* { dg-final { scan-assembler "movw\tip, #65535" } } */ -+/* { dg-final { scan-assembler "movt\tip, 31" } } */ -+/* { dg-final { scan-assembler "and\tr2, r2, ip" } } */ -+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ -+/* { dg-final { scan-assembler "mov\tr3, r4" } } */ -+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/cmse/struct-1.c -@@ -0,0 +1,33 @@ -+/* { dg-do run } */ -+/* { dg-options "--save-temps -mcmse -Wl,--section-start,.gnu.sgstubs=0x20400000" } */ -+ -+typedef struct -+{ -+ unsigned char a; -+ unsigned short b; -+} test_st; -+ -+test_st __attribute__ ((cmse_nonsecure_entry)) foo (void) -+{ -+ test_st t; -+ t.a = 255u; -+ t.b = 32767u; -+ return t; -+} -+ -+int -+main (void) -+{ -+ test_st t; -+ t = foo (); -+ if (t.a != 255u || t.b != 32767u) -+ __builtin_abort (); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler "movs\tr1, #255" } } */ -+/* { dg-final { scan-assembler "movt\tr1, 65535" } } */ -+/* { dg-final { scan-assembler "ands\tr0(, r0)?, r1" } } */ -+/* { dg-final { scan-assembler "bxns" } } */ -+ -+ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/data-rel-1.c -@@ -0,0 +1,12 @@ -+/* { dg-options "-fPIC -mno-pic-data-is-text-relative" } */ -+/* { dg-final { scan-assembler-not "j-\\(.LPIC" } } */ -+/* { dg-final { scan-assembler-not "_GLOBAL_OFFSET_TABLE_-\\(.LPIC" } } */ -+/* { dg-final { scan-assembler "j\\(GOT\\)" } } */ -+/* { dg-final { scan-assembler "(ldr|mov)\tr\[0-9\]+, \\\[?r9" } } */ -+ -+static int j; -+ -+int *Foo () -+{ -+ return &j; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/data-rel-2.c -@@ -0,0 +1,11 @@ -+/* { dg-options "-fPIC -mno-pic-data-is-text-relative -mno-single-pic-base" } */ -+/* { dg-final { scan-assembler-not "j-\\(.LPIC" } } */ -+/* { dg-final { scan-assembler "_GLOBAL_OFFSET_TABLE_-\\(.LPIC" } } */ -+/* { dg-final { scan-assembler "j\\(GOT\\)" } } */ -+ -+static int j; -+ -+int *Foo () -+{ -+ return &j; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/data-rel-3.c -@@ -0,0 +1,11 @@ -+/* { dg-options "-fPIC -mpic-data-is-text-relative" } */ -+/* { dg-final { scan-assembler "j-\\(.LPIC" } } */ -+/* { dg-final { scan-assembler-not "_GLOBAL_OFFSET_TABLE_-\\(.LPIC" } } */ -+/* { dg-final { scan-assembler-not "j\\(GOT\\)" } } */ -+ -+static int j; -+ -+int *Foo () -+{ -+ return &j; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/fp16-aapcs-1.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_hard_vfp_ok } */ -+/* { dg-require-effective-target arm_fp16_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_fp16_ieee } */ -+ -+/* Test __fp16 arguments and return value in registers (hard-float). */ -+ -+void -+swap (__fp16, __fp16); -+ -+__fp16 -+F (__fp16 a, __fp16 b, __fp16 c) -+{ -+ swap (b, a); -+ return c; -+} -+ -+/* { dg-final { scan-assembler {vmov(\.f16)?\tr[0-9]+, s[0-9]+} } } */ -+/* { dg-final { scan-assembler {vmov(\.f32)?\ts1, s0} } } */ -+/* { dg-final { scan-assembler {vmov(\.f16)?\ts0, r[0-9]+} } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/fp16-aapcs-2.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_fp16_ok } */ -+/* { dg-options "-mfloat-abi=softfp -O2" } */ -+/* { dg-add-options arm_fp16_ieee } */ -+/* { dg-skip-if "incompatible float-abi" { arm*-*-* } { "-mfloat-abi=hard" } } */ -+ -+/* Test __fp16 arguments and return value in registers (softfp). */ -+ -+void -+swap (__fp16, __fp16); -+ -+__fp16 -+F (__fp16 a, __fp16 b, __fp16 c) -+{ -+ swap (b, a); -+ return c; -+} -+ -+/* { dg-final { scan-assembler-times {mov\tr[0-9]+, r[0-2]} 3 } } */ -+/* { dg-final { scan-assembler-times {mov\tr1, r0} 1 } } */ -+/* { dg-final { scan-assembler-times {mov\tr0, r[0-9]+} 2 } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/fp16-aapcs-3.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_hard_vfp_ok } */ -+/* { dg-require-effective-target arm_fp16_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_fp16_alternative } */ -+ -+/* Test __fp16 arguments and return value in registers (hard-float). */ -+ -+void -+swap (__fp16, __fp16); -+ -+__fp16 -+F (__fp16 a, __fp16 b, __fp16 c) -+{ -+ swap (b, a); -+ return c; -+} -+ -+/* { dg-final { scan-assembler-times {vmov\tr[0-9]+, s[0-2]} 2 } } */ -+/* { dg-final { scan-assembler-times {vmov.f32\ts1, s0} 1 } } */ -+/* { dg-final { scan-assembler-times {vmov\ts0, r[0-9]+} 2 } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/fp16-aapcs-4.c -@@ -0,0 +1,21 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_fp16_ok } */ -+/* { dg-options "-mfloat-abi=softfp -O2" } */ -+/* { dg-add-options arm_fp16_alternative } */ -+/* { dg-skip-if "incompatible float-abi" { arm*-*-* } { "-mfloat-abi=hard" } } */ -+ -+/* Test __fp16 arguments and return value in registers (softfp). */ -+ -+void -+swap (__fp16, __fp16); -+ -+__fp16 -+F (__fp16 a, __fp16 b, __fp16 c) -+{ -+ swap (b, a); -+ return c; -+} -+ -+/* { dg-final { scan-assembler-times {mov\tr[0-9]+, r[0-2]} 3 } } */ -+/* { dg-final { scan-assembler-times {mov\tr1, r0} 1 } } */ -+/* { dg-final { scan-assembler-times {mov\tr0, r[0-9]+} 2 } } */ ---- a/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-1.c -@@ -1,4 +1,5 @@ - /* { dg-do compile } */ -+/* { dg-require-effective-target arm_fp16_alternative_ok } */ - /* { dg-options "-mfp16-format=alternative" } */ - - __fp16 xx = 0.0; ---- a/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-10.c -+++ b/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-10.c -@@ -1,4 +1,5 @@ - /* { dg-do compile } */ -+/* { dg-require-effective-target arm_fp16_alternative_ok } */ - /* { dg-options "-mfp16-format=alternative -pedantic -std=gnu99" } */ - - #include <math.h> ---- a/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-11.c -+++ b/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-11.c -@@ -1,4 +1,5 @@ - /* { dg-do compile } */ -+/* { dg-require-effective-target arm_fp16_alternative_ok } */ - /* { dg-options "-mfp16-format=alternative -pedantic -std=gnu99" } */ - - #include <math.h> ---- a/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-12.c -+++ b/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-12.c -@@ -1,4 +1,5 @@ - /* { dg-do compile } */ -+/* { dg-require-effective-target arm_fp16_alternative_ok } */ - /* { dg-options "-mfp16-format=alternative" } */ - - float xx __attribute__((mode(HF))) = 0.0; ---- a/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-2.c -+++ b/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-2.c -@@ -1,4 +1,5 @@ - /* { dg-do compile } */ -+/* { dg-require-effective-target arm_fp16_alternative_ok } */ - /* { dg-options "-mfp16-format=alternative" } */ - - /* Encoding taken from: http://en.wikipedia.org/wiki/Half_precision */ ---- a/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-3.c -+++ b/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-3.c -@@ -1,4 +1,5 @@ - /* { dg-do compile } */ -+/* { dg-require-effective-target arm_fp16_alternative_ok } */ - /* { dg-options "-mfp16-format=alternative" } */ - - /* Encoding taken from: http://en.wikipedia.org/wiki/Half_precision */ ---- a/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-4.c -+++ b/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-4.c -@@ -1,4 +1,5 @@ - /* { dg-do compile } */ -+/* { dg-require-effective-target arm_fp16_alternative_ok } */ - /* { dg-options "-mfp16-format=alternative" } */ - - /* Encoding taken from: http://en.wikipedia.org/wiki/Half_precision */ ---- a/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-5.c -+++ b/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-5.c -@@ -1,4 +1,5 @@ - /* { dg-do compile } */ -+/* { dg-require-effective-target arm_fp16_alternative_ok } */ - /* { dg-options "-mfp16-format=alternative" } */ - - /* Encoding taken from: http://en.wikipedia.org/wiki/Half_precision */ ---- a/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-6.c -+++ b/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-6.c -@@ -1,4 +1,5 @@ - /* { dg-do compile } */ -+/* { dg-require-effective-target arm_fp16_alternative_ok } */ - /* { dg-options "-mfp16-format=alternative" } */ - - /* This number is the maximum value representable in the alternative ---- a/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-7.c -+++ b/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-7.c -@@ -1,4 +1,5 @@ - /* { dg-do compile } */ -+/* { dg-require-effective-target arm_fp16_alternative_ok } */ - /* { dg-options "-mfp16-format=alternative -pedantic" } */ - - /* This number overflows the range of the alternative encoding. Since this ---- a/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-8.c -+++ b/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-8.c -@@ -1,4 +1,5 @@ - /* { dg-do compile } */ -+/* { dg-require-effective-target arm_fp16_alternative_ok } */ - /* { dg-options "-mfp16-format=alternative" } */ - - /* Encoding taken from: http://en.wikipedia.org/wiki/Half_precision */ ---- a/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-9.c -+++ b/src/gcc/testsuite/gcc.target/arm/fp16-compile-alt-9.c -@@ -1,4 +1,5 @@ - /* { dg-do compile } */ -+/* { dg-require-effective-target arm_fp16_alternative_ok } */ - /* { dg-options "-mfp16-format=alternative" } */ - - /* Encoding taken from: http://en.wikipedia.org/wiki/Half_precision */ ---- a/src/gcc/testsuite/gcc.target/arm/fp16-compile-none-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/fp16-compile-none-1.c -@@ -1,4 +1,5 @@ - /* { dg-do compile } */ -+/* { dg-require-effective-target arm_fp16_none_ok } */ - /* { dg-options "-mfp16-format=none" } */ - - /* __fp16 type name is not recognized unless you explicitly enable it ---- a/src/gcc/testsuite/gcc.target/arm/fp16-compile-none-2.c -+++ b/src/gcc/testsuite/gcc.target/arm/fp16-compile-none-2.c -@@ -1,4 +1,5 @@ - /* { dg-do compile } */ -+/* { dg-require-effective-target arm_fp16_none_ok } */ - /* { dg-options "-mfp16-format=none" } */ - - /* mode(HF) attributes are not recognized unless you explicitly enable ---- a/src/gcc/testsuite/gcc.target/arm/fp16-param-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/fp16-param-1.c -@@ -1,10 +1,14 @@ - /* { dg-do compile } */ - /* { dg-options "-mfp16-format=ieee" } */ - --/* Functions cannot have parameters of type __fp16. */ --extern void f (__fp16); /* { dg-error "parameters cannot have __fp16 type" } */ --extern void (*pf) (__fp16); /* { dg-error "parameters cannot have __fp16 type" } */ -+/* Test that the ACLE macro is defined. */ -+#if __ARM_FP16_ARGS != 1 -+#error Unexpected value for __ARM_FP16_ARGS -+#endif -+ -+/* Test that __fp16 is supported as a parameter type. */ -+extern void f (__fp16); -+extern void (*pf) (__fp16); - --/* These should be OK. */ - extern void g (__fp16 *); - extern void (*pg) (__fp16 *); ---- a/src/gcc/testsuite/gcc.target/arm/fp16-return-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/fp16-return-1.c -@@ -1,10 +1,9 @@ - /* { dg-do compile } */ - /* { dg-options "-mfp16-format=ieee" } */ - --/* Functions cannot return type __fp16. */ --extern __fp16 f (void); /* { dg-error "cannot return __fp16" } */ --extern __fp16 (*pf) (void); /* { dg-error "cannot return __fp16" } */ -+/* Test that __fp16 is supported as a return type. */ -+extern __fp16 f (void); -+extern __fp16 (*pf) (void); - --/* These should be OK. */ - extern __fp16 *g (void); - extern __fp16 *(*pg) (void); ---- a/src/gcc/testsuite/gcc.target/arm/fp16-rounding-alt-1.c -+++ b/src/gcc/testsuite/gcc.target/arm/fp16-rounding-alt-1.c -@@ -3,6 +3,7 @@ - from double to __fp16. */ - - /* { dg-do run } */ -+/* { dg-require-effective-target arm_fp16_alternative_ok } */ - /* { dg-options "-mfp16-format=alternative" } */ - - #include <stdlib.h> ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/movdi_movw.c -@@ -0,0 +1,12 @@ -+/* { dg-do compile { target { arm_thumb2_ok || arm_thumb1_movt_ok } } } */ -+/* { dg-options "-O2" } */ -+ -+long long -+movdi (int a) -+{ -+ return 0xF0F0; -+} -+ -+/* Accept r1 because big endian targets put the low bits in the highest -+ numbered register of a pair. */ -+/* { dg-final { scan-assembler-times "movw\tr\[01\], #61680" 1 } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/movhi_movw.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile { target { arm_thumb2_ok || arm_thumb1_movt_ok } } } */ -+/* { dg-options "-O2" } */ -+ -+short -+movsi (void) -+{ -+ return (short) 0x7070; -+} -+ -+/* { dg-final { scan-assembler-times "movw\tr0, #28784" 1 } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/movsi_movw.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile { target { arm_thumb2_ok || arm_thumb1_movt_ok } } } */ -+/* { dg-options "-O2" } */ -+ -+int -+movsi (void) -+{ -+ return 0xF0F0; -+} -+ -+/* { dg-final { scan-assembler-times "movw\tr0, #61680" 1 } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/neon-vaddws16.c -@@ -0,0 +1,19 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-O3" } */ -+/* { dg-add-options arm_neon } */ -+ -+ -+ -+int -+t6 (int len, void * dummy, short * __restrict x) -+{ -+ len = len & ~31; -+ int result = 0; -+ __asm volatile (""); -+ for (int i = 0; i < len; i++) -+ result += x[i]; -+ return result; -+} -+ -+/* { dg-final { scan-assembler "vaddw\.s16" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/neon-vaddws32.c -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-O3" } */ -+/* { dg-add-options arm_neon } */ -+ -+ -+int -+t6 (int len, void * dummy, int * __restrict x) -+{ -+ len = len & ~31; -+ long long result = 0; -+ __asm volatile (""); -+ for (int i = 0; i < len; i++) -+ result += x[i]; -+ return result; -+} -+ -+/* { dg-final { scan-assembler "vaddw\.s32" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/neon-vaddwu16.c -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-O3" } */ -+/* { dg-add-options arm_neon } */ -+ -+ -+int -+t6 (int len, void * dummy, unsigned short * __restrict x) -+{ -+ len = len & ~31; -+ unsigned int result = 0; -+ __asm volatile (""); -+ for (int i = 0; i < len; i++) -+ result += x[i]; -+ return result; -+} -+ -+/* { dg-final { scan-assembler "vaddw.u16" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/neon-vaddwu32.c -@@ -0,0 +1,18 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-O3" } */ -+/* { dg-add-options arm_neon } */ -+ -+ -+int -+t6 (int len, void * dummy, unsigned int * __restrict x) -+{ -+ len = len & ~31; -+ unsigned long long result = 0; -+ __asm volatile (""); -+ for (int i = 0; i < len; i++) -+ result += x[i]; -+ return result; -+} -+ -+/* { dg-final { scan-assembler "vaddw\.u32" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/neon-vaddwu8.c -@@ -0,0 +1,19 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-O3" } */ -+/* { dg-add-options arm_neon } */ -+ -+ -+ -+int -+t6 (int len, void * dummy, char * __restrict x) -+{ -+ len = len & ~31; -+ unsigned short result = 0; -+ __asm volatile (""); -+ for (int i = 0; i < len; i++) -+ result += x[i]; -+ return result; -+} -+ -+/* { dg-final { scan-assembler "vaddw\.u8" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/neon.exp -+++ b/src//dev/null -@@ -1,35 +0,0 @@ --# Copyright (C) 1997-2016 Free Software Foundation, Inc. -- --# This program is free software; you can redistribute it and/or modify --# it under the terms of the GNU General Public License as published by --# the Free Software Foundation; either version 3 of the License, or --# (at your option) any later version. --# --# This program is distributed in the hope that it will be useful, --# but WITHOUT ANY WARRANTY; without even the implied warranty of --# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the --# GNU General Public License for more details. --# --# You should have received a copy of the GNU General Public License --# along with GCC; see the file COPYING3. If not see --# <http://www.gnu.org/licenses/>. -- --# GCC testsuite that uses the `dg.exp' driver. -- --# Exit immediately if this isn't an ARM target. --if ![istarget arm*-*-*] then { -- return --} -- --# Load support procs. --load_lib gcc-dg.exp -- --# Initialize `dg'. --dg-init -- --# Main loop. --dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \ -- "" "" -- --# All done. --dg-finish ---- a/src/gcc/testsuite/gcc.target/arm/neon/polytypes.c -+++ b/src//dev/null -@@ -1,48 +0,0 @@ --/* Check that NEON polynomial vector types are suitably incompatible with -- integer vector types of the same layout. */ -- --/* { dg-do compile } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-add-options arm_neon } */ -- --#include <arm_neon.h> -- --void s64_8 (int8x8_t a) {} --void u64_8 (uint8x8_t a) {} --void p64_8 (poly8x8_t a) {} --void s64_16 (int16x4_t a) {} --void u64_16 (uint16x4_t a) {} --void p64_16 (poly16x4_t a) {} -- --void s128_8 (int8x16_t a) {} --void u128_8 (uint8x16_t a) {} --void p128_8 (poly8x16_t a) {} --void s128_16 (int16x8_t a) {} --void u128_16 (uint16x8_t a) {} --void p128_16 (poly16x8_t a) {} -- --void foo () --{ -- poly8x8_t v64_8; -- poly16x4_t v64_16; -- poly8x16_t v128_8; -- poly16x8_t v128_16; -- -- s64_8 (v64_8); /* { dg-message "use -flax-vector-conversions" } */ -- /* { dg-error "incompatible type for argument 1 of 's64_8'" "" { target *-*-* } 31 } */ -- u64_8 (v64_8); /* { dg-error "incompatible type for argument 1 of 'u64_8'" } */ -- p64_8 (v64_8); -- -- s64_16 (v64_16); /* { dg-error "incompatible type for argument 1 of 's64_16'" } */ -- u64_16 (v64_16); /* { dg-error "incompatible type for argument 1 of 'u64_16'" } */ -- p64_16 (v64_16); -- -- s128_8 (v128_8); /* { dg-error "incompatible type for argument 1 of 's128_8'" } */ -- u128_8 (v128_8); /* { dg-error "incompatible type for argument 1 of 'u128_8'" } */ -- p128_8 (v128_8); -- -- s128_16 (v128_16); /* { dg-error "incompatible type for argument 1 of 's128_16'" } */ -- u128_16 (v128_16); /* { dg-error "incompatible type for argument 1 of 'u128_16'" } */ -- p128_16 (v128_16); --} --/* { dg-message "note: expected '\[^'\n\]*' but argument is of type '\[^'\n\]*'" "note: expected" { target *-*-* } 0 } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/pr51534.c -+++ b/src//dev/null -@@ -1,83 +0,0 @@ --/* Test the vector comparison intrinsics when comparing to immediate zero. -- */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -mfloat-abi=hard -O3" } */ --/* { dg-add-options arm_neon } */ -- --#include <arm_neon.h> -- --#define GEN_TEST(T, D, C, R) \ -- R test_##C##_##T (T a) { return C (a, D (0)); } -- --#define GEN_DOUBLE_TESTS(S, T, C) \ -- GEN_TEST (T, vdup_n_s##S, C##_s##S, u##T) \ -- GEN_TEST (u##T, vdup_n_u##S, C##_u##S, u##T) -- --#define GEN_QUAD_TESTS(S, T, C) \ -- GEN_TEST (T, vdupq_n_s##S, C##q_s##S, u##T) \ -- GEN_TEST (u##T, vdupq_n_u##S, C##q_u##S, u##T) -- --#define GEN_COND_TESTS(C) \ -- GEN_DOUBLE_TESTS (8, int8x8_t, C) \ -- GEN_DOUBLE_TESTS (16, int16x4_t, C) \ -- GEN_DOUBLE_TESTS (32, int32x2_t, C) \ -- GEN_QUAD_TESTS (8, int8x16_t, C) \ -- GEN_QUAD_TESTS (16, int16x8_t, C) \ -- GEN_QUAD_TESTS (32, int32x4_t, C) -- --GEN_COND_TESTS(vcgt) --GEN_COND_TESTS(vcge) --GEN_COND_TESTS(vclt) --GEN_COND_TESTS(vcle) --GEN_COND_TESTS(vceq) -- --/* Scan for expected outputs. */ --/* { dg-final { scan-assembler "vcgt\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler-times "vcgt\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" 2 } } */ --/* { dg-final { scan-assembler "vcgt\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler-times "vcgt\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" 2 } } */ --/* { dg-final { scan-assembler "vcgt\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler-times "vcgt\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" 2 } } */ --/* { dg-final { scan-assembler "vcgt\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler-times "vcgt\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" 2 } } */ --/* { dg-final { scan-assembler "vcgt\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler-times "vcgt\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" 2 } } */ --/* { dg-final { scan-assembler "vcgt\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler-times "vcgt\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" 2 } } */ --/* { dg-final { scan-assembler "vcge\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler-times "vcge\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" 2 } } */ --/* { dg-final { scan-assembler "vcge\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler-times "vcge\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" 2 } } */ --/* { dg-final { scan-assembler "vcge\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler-times "vcge\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" 2 } } */ --/* { dg-final { scan-assembler "vcge\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler-times "vcge\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" 2 } } */ --/* { dg-final { scan-assembler "vcge\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler-times "vcge\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" 2 } } */ --/* { dg-final { scan-assembler "vcge\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler-times "vcge\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" 2 } } */ --/* { dg-final { scan-assembler "vclt\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler "vclt\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler "vclt\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler "vclt\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler "vclt\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler "vclt\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler "vcle\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler "vcle\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler "vcle\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler "vcle\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler "vcle\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler "vcle\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ --/* { dg-final { scan-assembler-times "vceq\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" 2 } } */ --/* { dg-final { scan-assembler-times "vceq\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" 2 } } */ --/* { dg-final { scan-assembler-times "vceq\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" 2 } } */ --/* { dg-final { scan-assembler-times "vceq\.i8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" 2 } } */ --/* { dg-final { scan-assembler-times "vceq\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" 2 } } */ --/* { dg-final { scan-assembler-times "vceq\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" 2 } } */ -- --/* And ensure we don't have unexpected output too. */ --/* { dg-final { scan-assembler-not "vc\[gl\]\[te\]\.u\[0-9\]+\[ \]+\[qQdD\]\[0-9\]+, \[qQdD\]\[0-9\]+, #0" } } */ -- --/* Tidy up. */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRaddhns16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRaddhns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRaddhns16 (void) --{ -- int8x8_t out_int8x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int8x8_t = vraddhn_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vraddhn\.i16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRaddhns32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRaddhns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRaddhns32 (void) --{ -- int16x4_t out_int16x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int16x4_t = vraddhn_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vraddhn\.i32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRaddhns64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRaddhns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRaddhns64 (void) --{ -- int32x2_t out_int32x2_t; -- int64x2_t arg0_int64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_int32x2_t = vraddhn_s64 (arg0_int64x2_t, arg1_int64x2_t); --} -- --/* { dg-final { scan-assembler "vraddhn\.i64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRaddhnu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRaddhnu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRaddhnu16 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint8x8_t = vraddhn_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vraddhn\.i16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRaddhnu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRaddhnu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRaddhnu32 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint16x4_t = vraddhn_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vraddhn\.i32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRaddhnu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRaddhnu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRaddhnu64 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint64x2_t arg1_uint64x2_t; -- -- out_uint32x2_t = vraddhn_u64 (arg0_uint64x2_t, arg1_uint64x2_t); --} -- --/* { dg-final { scan-assembler "vraddhn\.i64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRhaddQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRhaddQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRhaddQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vrhaddq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vrhadd\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRhaddQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRhaddQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRhaddQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vrhaddq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vrhadd\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRhaddQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRhaddQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRhaddQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vrhaddq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vrhadd\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRhaddQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRhaddQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRhaddQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vrhaddq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vrhadd\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRhaddQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRhaddQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRhaddQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vrhaddq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vrhadd\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRhaddQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRhaddQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRhaddQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vrhaddq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vrhadd\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRhadds16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRhadds16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRhadds16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vrhadd_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vrhadd\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRhadds32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRhadds32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRhadds32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vrhadd_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vrhadd\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRhadds8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRhadds8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRhadds8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vrhadd_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vrhadd\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRhaddu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRhaddu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRhaddu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vrhadd_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vrhadd\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRhaddu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRhaddu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRhaddu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vrhadd_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vrhadd\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRhaddu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRhaddu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRhaddu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vrhadd_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vrhadd\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshlQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRshlQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshlQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vrshlq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vrshl\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshlQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRshlQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshlQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vrshlq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vrshl\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshlQs64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRshlQs64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshlQs64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_int64x2_t = vrshlq_s64 (arg0_int64x2_t, arg1_int64x2_t); --} -- --/* { dg-final { scan-assembler "vrshl\.s64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshlQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRshlQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshlQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vrshlq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vrshl\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshlQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRshlQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshlQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_uint16x8_t = vrshlq_u16 (arg0_uint16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vrshl\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshlQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRshlQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshlQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_uint32x4_t = vrshlq_u32 (arg0_uint32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vrshl\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshlQu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRshlQu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshlQu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_uint64x2_t = vrshlq_u64 (arg0_uint64x2_t, arg1_int64x2_t); --} -- --/* { dg-final { scan-assembler "vrshl\.u64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshlQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRshlQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshlQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_uint8x16_t = vrshlq_u8 (arg0_uint8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vrshl\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshls16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRshls16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshls16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vrshl_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vrshl\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshls32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRshls32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshls32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vrshl_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vrshl\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshls64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRshls64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshls64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg0_int64x1_t; -- int64x1_t arg1_int64x1_t; -- -- out_int64x1_t = vrshl_s64 (arg0_int64x1_t, arg1_int64x1_t); --} -- --/* { dg-final { scan-assembler "vrshl\.s64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshls8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRshls8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshls8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vrshl_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vrshl\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshlu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRshlu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshlu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_uint16x4_t = vrshl_u16 (arg0_uint16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vrshl\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshlu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRshlu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshlu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_uint32x2_t = vrshl_u32 (arg0_uint32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vrshl\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshlu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRshlu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshlu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- int64x1_t arg1_int64x1_t; -- -- out_uint64x1_t = vrshl_u64 (arg0_uint64x1_t, arg1_int64x1_t); --} -- --/* { dg-final { scan-assembler "vrshl\.u64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshlu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRshlu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshlu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_uint8x8_t = vrshl_u8 (arg0_uint8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vrshl\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshrQ_ns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vRshrQ_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshrQ_ns16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_int16x8_t = vrshrq_n_s16 (arg0_int16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vrshr\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshrQ_ns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vRshrQ_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshrQ_ns32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_int32x4_t = vrshrq_n_s32 (arg0_int32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vrshr\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshrQ_ns64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vRshrQ_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshrQ_ns64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- -- out_int64x2_t = vrshrq_n_s64 (arg0_int64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vrshr\.s64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshrQ_ns8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vRshrQ_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshrQ_ns8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- -- out_int8x16_t = vrshrq_n_s8 (arg0_int8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vrshr\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshrQ_nu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vRshrQ_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshrQ_nu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_uint16x8_t = vrshrq_n_u16 (arg0_uint16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vrshr\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshrQ_nu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vRshrQ_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshrQ_nu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_uint32x4_t = vrshrq_n_u32 (arg0_uint32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vrshr\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshrQ_nu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vRshrQ_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshrQ_nu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_uint64x2_t = vrshrq_n_u64 (arg0_uint64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vrshr\.u64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshrQ_nu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vRshrQ_nu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshrQ_nu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_uint8x16_t = vrshrq_n_u8 (arg0_uint8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vrshr\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshr_ns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vRshr_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshr_ns16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- -- out_int16x4_t = vrshr_n_s16 (arg0_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vrshr\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshr_ns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vRshr_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshr_ns32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- -- out_int32x2_t = vrshr_n_s32 (arg0_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vrshr\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshr_ns64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vRshr_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshr_ns64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg0_int64x1_t; -- -- out_int64x1_t = vrshr_n_s64 (arg0_int64x1_t, 1); --} -- --/* { dg-final { scan-assembler "vrshr\.s64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshr_ns8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vRshr_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshr_ns8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- -- out_int8x8_t = vrshr_n_s8 (arg0_int8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vrshr\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshr_nu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vRshr_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshr_nu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_uint16x4_t = vrshr_n_u16 (arg0_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vrshr\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshr_nu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vRshr_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshr_nu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_uint32x2_t = vrshr_n_u32 (arg0_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vrshr\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshr_nu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vRshr_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshr_nu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- -- out_uint64x1_t = vrshr_n_u64 (arg0_uint64x1_t, 1); --} -- --/* { dg-final { scan-assembler "vrshr\.u64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshr_nu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vRshr_nu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshr_nu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_uint8x8_t = vrshr_n_u8 (arg0_uint8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vrshr\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshrn_ns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vRshrn_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshrn_ns16 (void) --{ -- int8x8_t out_int8x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_int8x8_t = vrshrn_n_s16 (arg0_int16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vrshrn\.i16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshrn_ns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vRshrn_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshrn_ns32 (void) --{ -- int16x4_t out_int16x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_int16x4_t = vrshrn_n_s32 (arg0_int32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vrshrn\.i32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshrn_ns64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vRshrn_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshrn_ns64 (void) --{ -- int32x2_t out_int32x2_t; -- int64x2_t arg0_int64x2_t; -- -- out_int32x2_t = vrshrn_n_s64 (arg0_int64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vrshrn\.i64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshrn_nu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vRshrn_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshrn_nu16 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_uint8x8_t = vrshrn_n_u16 (arg0_uint16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vrshrn\.i16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshrn_nu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vRshrn_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshrn_nu32 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_uint16x4_t = vrshrn_n_u32 (arg0_uint32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vrshrn\.i32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRshrn_nu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vRshrn_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRshrn_nu64 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_uint32x2_t = vrshrn_n_u64 (arg0_uint64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vrshrn\.i64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRsraQ_ns16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRsraQ_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRsraQ_ns16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vrsraq_n_s16 (arg0_int16x8_t, arg1_int16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vrsra\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRsraQ_ns32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRsraQ_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRsraQ_ns32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vrsraq_n_s32 (arg0_int32x4_t, arg1_int32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vrsra\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRsraQ_ns64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRsraQ_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRsraQ_ns64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_int64x2_t = vrsraq_n_s64 (arg0_int64x2_t, arg1_int64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vrsra\.s64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRsraQ_ns8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRsraQ_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRsraQ_ns8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vrsraq_n_s8 (arg0_int8x16_t, arg1_int8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vrsra\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRsraQ_nu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRsraQ_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRsraQ_nu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vrsraq_n_u16 (arg0_uint16x8_t, arg1_uint16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vrsra\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRsraQ_nu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRsraQ_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRsraQ_nu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vrsraq_n_u32 (arg0_uint32x4_t, arg1_uint32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vrsra\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRsraQ_nu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRsraQ_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRsraQ_nu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint64x2_t arg1_uint64x2_t; -- -- out_uint64x2_t = vrsraq_n_u64 (arg0_uint64x2_t, arg1_uint64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vrsra\.u64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRsraQ_nu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRsraQ_nu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRsraQ_nu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vrsraq_n_u8 (arg0_uint8x16_t, arg1_uint8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vrsra\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRsra_ns16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRsra_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRsra_ns16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vrsra_n_s16 (arg0_int16x4_t, arg1_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vrsra\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRsra_ns32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRsra_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRsra_ns32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vrsra_n_s32 (arg0_int32x2_t, arg1_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vrsra\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRsra_ns64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRsra_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRsra_ns64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg0_int64x1_t; -- int64x1_t arg1_int64x1_t; -- -- out_int64x1_t = vrsra_n_s64 (arg0_int64x1_t, arg1_int64x1_t, 1); --} -- --/* { dg-final { scan-assembler "vrsra\.s64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRsra_ns8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRsra_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRsra_ns8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vrsra_n_s8 (arg0_int8x8_t, arg1_int8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vrsra\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRsra_nu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRsra_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRsra_nu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vrsra_n_u16 (arg0_uint16x4_t, arg1_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vrsra\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRsra_nu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRsra_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRsra_nu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vrsra_n_u32 (arg0_uint32x2_t, arg1_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vrsra\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRsra_nu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRsra_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRsra_nu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- uint64x1_t arg1_uint64x1_t; -- -- out_uint64x1_t = vrsra_n_u64 (arg0_uint64x1_t, arg1_uint64x1_t, 1); --} -- --/* { dg-final { scan-assembler "vrsra\.u64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRsra_nu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRsra_nu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRsra_nu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vrsra_n_u8 (arg0_uint8x8_t, arg1_uint8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vrsra\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRsubhns16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRsubhns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRsubhns16 (void) --{ -- int8x8_t out_int8x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int8x8_t = vrsubhn_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vrsubhn\.i16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRsubhns32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRsubhns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRsubhns32 (void) --{ -- int16x4_t out_int16x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int16x4_t = vrsubhn_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vrsubhn\.i32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRsubhns64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRsubhns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRsubhns64 (void) --{ -- int32x2_t out_int32x2_t; -- int64x2_t arg0_int64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_int32x2_t = vrsubhn_s64 (arg0_int64x2_t, arg1_int64x2_t); --} -- --/* { dg-final { scan-assembler "vrsubhn\.i64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRsubhnu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRsubhnu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRsubhnu16 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint8x8_t = vrsubhn_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vrsubhn\.i16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRsubhnu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRsubhnu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRsubhnu32 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint16x4_t = vrsubhn_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vrsubhn\.i32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vRsubhnu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vRsubhnu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vRsubhnu64 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint64x2_t arg1_uint64x2_t; -- -- out_uint32x2_t = vrsubhn_u64 (arg0_uint64x2_t, arg1_uint64x2_t); --} -- --/* { dg-final { scan-assembler "vrsubhn\.i64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabaQs16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vabaQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabaQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- int16x8_t arg2_int16x8_t; -- -- out_int16x8_t = vabaq_s16 (arg0_int16x8_t, arg1_int16x8_t, arg2_int16x8_t); --} -- --/* { dg-final { scan-assembler "vaba\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabaQs32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vabaQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabaQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- int32x4_t arg2_int32x4_t; -- -- out_int32x4_t = vabaq_s32 (arg0_int32x4_t, arg1_int32x4_t, arg2_int32x4_t); --} -- --/* { dg-final { scan-assembler "vaba\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabaQs8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vabaQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabaQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- int8x16_t arg2_int8x16_t; -- -- out_int8x16_t = vabaq_s8 (arg0_int8x16_t, arg1_int8x16_t, arg2_int8x16_t); --} -- --/* { dg-final { scan-assembler "vaba\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabaQu16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vabaQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabaQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- uint16x8_t arg2_uint16x8_t; -- -- out_uint16x8_t = vabaq_u16 (arg0_uint16x8_t, arg1_uint16x8_t, arg2_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vaba\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabaQu32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vabaQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabaQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- uint32x4_t arg2_uint32x4_t; -- -- out_uint32x4_t = vabaq_u32 (arg0_uint32x4_t, arg1_uint32x4_t, arg2_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vaba\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabaQu8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vabaQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabaQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- uint8x16_t arg2_uint8x16_t; -- -- out_uint8x16_t = vabaq_u8 (arg0_uint8x16_t, arg1_uint8x16_t, arg2_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vaba\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabals16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vabals16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabals16 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int16x4_t arg1_int16x4_t; -- int16x4_t arg2_int16x4_t; -- -- out_int32x4_t = vabal_s16 (arg0_int32x4_t, arg1_int16x4_t, arg2_int16x4_t); --} -- --/* { dg-final { scan-assembler "vabal\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabals32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vabals32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabals32 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int32x2_t arg1_int32x2_t; -- int32x2_t arg2_int32x2_t; -- -- out_int64x2_t = vabal_s32 (arg0_int64x2_t, arg1_int32x2_t, arg2_int32x2_t); --} -- --/* { dg-final { scan-assembler "vabal\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabals8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vabals8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabals8 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int8x8_t arg1_int8x8_t; -- int8x8_t arg2_int8x8_t; -- -- out_int16x8_t = vabal_s8 (arg0_int16x8_t, arg1_int8x8_t, arg2_int8x8_t); --} -- --/* { dg-final { scan-assembler "vabal\.s8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabalu16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vabalu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabalu16 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint16x4_t arg1_uint16x4_t; -- uint16x4_t arg2_uint16x4_t; -- -- out_uint32x4_t = vabal_u16 (arg0_uint32x4_t, arg1_uint16x4_t, arg2_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vabal\.u16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabalu32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vabalu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabalu32 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint32x2_t arg1_uint32x2_t; -- uint32x2_t arg2_uint32x2_t; -- -- out_uint64x2_t = vabal_u32 (arg0_uint64x2_t, arg1_uint32x2_t, arg2_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vabal\.u32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabalu8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vabalu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabalu8 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint8x8_t arg1_uint8x8_t; -- uint8x8_t arg2_uint8x8_t; -- -- out_uint16x8_t = vabal_u8 (arg0_uint16x8_t, arg1_uint8x8_t, arg2_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vabal\.u8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabas16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vabas16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabas16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- int16x4_t arg2_int16x4_t; -- -- out_int16x4_t = vaba_s16 (arg0_int16x4_t, arg1_int16x4_t, arg2_int16x4_t); --} -- --/* { dg-final { scan-assembler "vaba\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabas32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vabas32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabas32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- int32x2_t arg2_int32x2_t; -- -- out_int32x2_t = vaba_s32 (arg0_int32x2_t, arg1_int32x2_t, arg2_int32x2_t); --} -- --/* { dg-final { scan-assembler "vaba\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabas8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vabas8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabas8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- int8x8_t arg2_int8x8_t; -- -- out_int8x8_t = vaba_s8 (arg0_int8x8_t, arg1_int8x8_t, arg2_int8x8_t); --} -- --/* { dg-final { scan-assembler "vaba\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabau16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vabau16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabau16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- uint16x4_t arg2_uint16x4_t; -- -- out_uint16x4_t = vaba_u16 (arg0_uint16x4_t, arg1_uint16x4_t, arg2_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vaba\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabau32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vabau32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabau32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- uint32x2_t arg2_uint32x2_t; -- -- out_uint32x2_t = vaba_u32 (arg0_uint32x2_t, arg1_uint32x2_t, arg2_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vaba\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabau8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vabau8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabau8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- uint8x8_t arg2_uint8x8_t; -- -- out_uint8x8_t = vaba_u8 (arg0_uint8x8_t, arg1_uint8x8_t, arg2_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vaba\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabdQf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vabdQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabdQf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- -- out_float32x4_t = vabdq_f32 (arg0_float32x4_t, arg1_float32x4_t); --} -- --/* { dg-final { scan-assembler "vabd\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabdQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vabdQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabdQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vabdq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vabd\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabdQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vabdQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabdQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vabdq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vabd\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabdQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vabdQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabdQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vabdq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vabd\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabdQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vabdQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabdQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vabdq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vabd\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabdQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vabdQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabdQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vabdq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vabd\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabdQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vabdQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabdQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vabdq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vabd\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabdf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vabdf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabdf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_float32x2_t = vabd_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vabd\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabdls16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vabdls16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabdls16 (void) --{ -- int32x4_t out_int32x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int32x4_t = vabdl_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vabdl\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabdls32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vabdls32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabdls32 (void) --{ -- int64x2_t out_int64x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int64x2_t = vabdl_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vabdl\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabdls8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vabdls8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabdls8 (void) --{ -- int16x8_t out_int16x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int16x8_t = vabdl_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vabdl\.s8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabdlu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vabdlu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabdlu16 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint32x4_t = vabdl_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vabdl\.u16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabdlu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vabdlu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabdlu32 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint64x2_t = vabdl_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vabdl\.u32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabdlu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vabdlu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabdlu8 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint16x8_t = vabdl_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vabdl\.u8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabds16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vabds16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabds16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vabd_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vabd\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabds32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vabds32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabds32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vabd_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vabd\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabds8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vabds8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabds8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vabd_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vabd\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabdu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vabdu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabdu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vabd_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vabd\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabdu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vabdu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabdu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vabd_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vabd\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabdu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vabdu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabdu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vabd_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vabd\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabsQf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vabsQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabsQf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- -- out_float32x4_t = vabsq_f32 (arg0_float32x4_t); --} -- --/* { dg-final { scan-assembler "vabs\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabsQs16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vabsQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabsQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_int16x8_t = vabsq_s16 (arg0_int16x8_t); --} -- --/* { dg-final { scan-assembler "vabs\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabsQs32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vabsQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabsQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_int32x4_t = vabsq_s32 (arg0_int32x4_t); --} -- --/* { dg-final { scan-assembler "vabs\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabsQs8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vabsQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabsQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- -- out_int8x16_t = vabsq_s8 (arg0_int8x16_t); --} -- --/* { dg-final { scan-assembler "vabs\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabsf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vabsf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabsf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- -- out_float32x2_t = vabs_f32 (arg0_float32x2_t); --} -- --/* { dg-final { scan-assembler "vabs\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabss16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vabss16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabss16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- -- out_int16x4_t = vabs_s16 (arg0_int16x4_t); --} -- --/* { dg-final { scan-assembler "vabs\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabss32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vabss32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabss32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- -- out_int32x2_t = vabs_s32 (arg0_int32x2_t); --} -- --/* { dg-final { scan-assembler "vabs\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vabss8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vabss8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vabss8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- -- out_int8x8_t = vabs_s8 (arg0_int8x8_t); --} -- --/* { dg-final { scan-assembler "vabs\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddQf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddQf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- -- out_float32x4_t = vaddq_f32 (arg0_float32x4_t, arg1_float32x4_t); --} -- --/* { dg-final { scan-assembler "vadd\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vaddq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vadd\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vaddq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vadd\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddQs64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddQs64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddQs64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_int64x2_t = vaddq_s64 (arg0_int64x2_t, arg1_int64x2_t); --} -- --/* { dg-final { scan-assembler "vadd\.i64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vaddq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vadd\.i8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vaddq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vadd\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vaddq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vadd\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddQu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddQu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddQu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint64x2_t arg1_uint64x2_t; -- -- out_uint64x2_t = vaddq_u64 (arg0_uint64x2_t, arg1_uint64x2_t); --} -- --/* { dg-final { scan-assembler "vadd\.i64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vaddq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vadd\.i8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_float32x2_t = vadd_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vadd\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddhns16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddhns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddhns16 (void) --{ -- int8x8_t out_int8x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int8x8_t = vaddhn_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vaddhn\.i16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddhns32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddhns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddhns32 (void) --{ -- int16x4_t out_int16x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int16x4_t = vaddhn_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vaddhn\.i32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddhns64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddhns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddhns64 (void) --{ -- int32x2_t out_int32x2_t; -- int64x2_t arg0_int64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_int32x2_t = vaddhn_s64 (arg0_int64x2_t, arg1_int64x2_t); --} -- --/* { dg-final { scan-assembler "vaddhn\.i64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddhnu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddhnu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddhnu16 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint8x8_t = vaddhn_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vaddhn\.i16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddhnu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddhnu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddhnu32 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint16x4_t = vaddhn_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vaddhn\.i32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddhnu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddhnu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddhnu64 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint64x2_t arg1_uint64x2_t; -- -- out_uint32x2_t = vaddhn_u64 (arg0_uint64x2_t, arg1_uint64x2_t); --} -- --/* { dg-final { scan-assembler "vaddhn\.i64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddls16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddls16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddls16 (void) --{ -- int32x4_t out_int32x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int32x4_t = vaddl_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vaddl\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddls32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddls32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddls32 (void) --{ -- int64x2_t out_int64x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int64x2_t = vaddl_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vaddl\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddls8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddls8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddls8 (void) --{ -- int16x8_t out_int16x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int16x8_t = vaddl_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vaddl\.s8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddlu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddlu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddlu16 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint32x4_t = vaddl_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vaddl\.u16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddlu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddlu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddlu32 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint64x2_t = vaddl_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vaddl\.u32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddlu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddlu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddlu8 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint16x8_t = vaddl_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vaddl\.u8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vadds16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vadds16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vadds16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vadd_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vadd\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vadds32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vadds32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vadds32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vadd_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vadd\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vadds64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vadds64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vadds64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg0_int64x1_t; -- int64x1_t arg1_int64x1_t; -- -- out_int64x1_t = vadd_s64 (arg0_int64x1_t, arg1_int64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vadds8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vadds8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vadds8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vadd_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vadd\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vadd_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vadd\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vadd_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vadd\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vaddu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- uint64x1_t arg1_uint64x1_t; -- -- out_uint64x1_t = vadd_u64 (arg0_uint64x1_t, arg1_uint64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vadd_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vadd\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddws16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddws16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddws16 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int32x4_t = vaddw_s16 (arg0_int32x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vaddw\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddws32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddws32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddws32 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int64x2_t = vaddw_s32 (arg0_int64x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vaddw\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddws8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddws8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddws8 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int16x8_t = vaddw_s8 (arg0_int16x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vaddw\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddwu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddwu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddwu16 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint32x4_t = vaddw_u16 (arg0_uint32x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vaddw\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddwu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddwu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddwu32 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint64x2_t = vaddw_u32 (arg0_uint64x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vaddw\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vaddwu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vaddwu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vaddwu8 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint16x8_t = vaddw_u8 (arg0_uint16x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vaddw\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vandQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vandQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vandQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vandq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vand\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vandQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vandQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vandQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vandq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vand\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vandQs64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vandQs64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vandQs64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_int64x2_t = vandq_s64 (arg0_int64x2_t, arg1_int64x2_t); --} -- --/* { dg-final { scan-assembler "vand\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vandQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vandQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vandQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vandq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vand\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vandQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vandQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vandQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vandq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vand\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vandQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vandQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vandQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vandq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vand\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vandQu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vandQu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vandQu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint64x2_t arg1_uint64x2_t; -- -- out_uint64x2_t = vandq_u64 (arg0_uint64x2_t, arg1_uint64x2_t); --} -- --/* { dg-final { scan-assembler "vand\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vandQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vandQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vandQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vandq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vand\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vands16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vands16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vands16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vand_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vand\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vands32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vands32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vands32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vand_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vand\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vands64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vands64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vands64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg0_int64x1_t; -- int64x1_t arg1_int64x1_t; -- -- out_int64x1_t = vand_s64 (arg0_int64x1_t, arg1_int64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vands8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vands8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vands8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vand_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vand\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vandu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vandu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vandu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vand_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vand\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vandu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vandu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vandu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vand_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vand\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vandu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vandu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vandu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- uint64x1_t arg1_uint64x1_t; -- -- out_uint64x1_t = vand_u64 (arg0_uint64x1_t, arg1_uint64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vandu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vandu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vandu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vand_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vand\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbicQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vbicQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --int16x8_t out_int16x8_t; --int16x8_t arg0_int16x8_t; --int16x8_t arg1_int16x8_t; --void test_vbicQs16 (void) --{ -- -- out_int16x8_t = vbicq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vbic\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbicQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vbicQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --int32x4_t out_int32x4_t; --int32x4_t arg0_int32x4_t; --int32x4_t arg1_int32x4_t; --void test_vbicQs32 (void) --{ -- -- out_int32x4_t = vbicq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vbic\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbicQs64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vbicQs64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --int64x2_t out_int64x2_t; --int64x2_t arg0_int64x2_t; --int64x2_t arg1_int64x2_t; --void test_vbicQs64 (void) --{ -- -- out_int64x2_t = vbicq_s64 (arg0_int64x2_t, arg1_int64x2_t); --} -- --/* { dg-final { scan-assembler "vbic\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbicQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vbicQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --int8x16_t out_int8x16_t; --int8x16_t arg0_int8x16_t; --int8x16_t arg1_int8x16_t; --void test_vbicQs8 (void) --{ -- -- out_int8x16_t = vbicq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vbic\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbicQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vbicQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --uint16x8_t out_uint16x8_t; --uint16x8_t arg0_uint16x8_t; --uint16x8_t arg1_uint16x8_t; --void test_vbicQu16 (void) --{ -- -- out_uint16x8_t = vbicq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vbic\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbicQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vbicQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --uint32x4_t out_uint32x4_t; --uint32x4_t arg0_uint32x4_t; --uint32x4_t arg1_uint32x4_t; --void test_vbicQu32 (void) --{ -- -- out_uint32x4_t = vbicq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vbic\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbicQu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vbicQu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --uint64x2_t out_uint64x2_t; --uint64x2_t arg0_uint64x2_t; --uint64x2_t arg1_uint64x2_t; --void test_vbicQu64 (void) --{ -- -- out_uint64x2_t = vbicq_u64 (arg0_uint64x2_t, arg1_uint64x2_t); --} -- --/* { dg-final { scan-assembler "vbic\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbicQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vbicQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --uint8x16_t out_uint8x16_t; --uint8x16_t arg0_uint8x16_t; --uint8x16_t arg1_uint8x16_t; --void test_vbicQu8 (void) --{ -- -- out_uint8x16_t = vbicq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vbic\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbics16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vbics16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --int16x4_t out_int16x4_t; --int16x4_t arg0_int16x4_t; --int16x4_t arg1_int16x4_t; --void test_vbics16 (void) --{ -- -- out_int16x4_t = vbic_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vbic\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbics32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vbics32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --int32x2_t out_int32x2_t; --int32x2_t arg0_int32x2_t; --int32x2_t arg1_int32x2_t; --void test_vbics32 (void) --{ -- -- out_int32x2_t = vbic_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vbic\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbics64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vbics64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --int64x1_t out_int64x1_t; --int64x1_t arg0_int64x1_t; --int64x1_t arg1_int64x1_t; --void test_vbics64 (void) --{ -- -- out_int64x1_t = vbic_s64 (arg0_int64x1_t, arg1_int64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbics8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vbics8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --int8x8_t out_int8x8_t; --int8x8_t arg0_int8x8_t; --int8x8_t arg1_int8x8_t; --void test_vbics8 (void) --{ -- -- out_int8x8_t = vbic_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vbic\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbicu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vbicu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --uint16x4_t out_uint16x4_t; --uint16x4_t arg0_uint16x4_t; --uint16x4_t arg1_uint16x4_t; --void test_vbicu16 (void) --{ -- -- out_uint16x4_t = vbic_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vbic\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbicu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vbicu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --uint32x2_t out_uint32x2_t; --uint32x2_t arg0_uint32x2_t; --uint32x2_t arg1_uint32x2_t; --void test_vbicu32 (void) --{ -- -- out_uint32x2_t = vbic_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vbic\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbicu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vbicu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --uint64x1_t out_uint64x1_t; --uint64x1_t arg0_uint64x1_t; --uint64x1_t arg1_uint64x1_t; --void test_vbicu64 (void) --{ -- -- out_uint64x1_t = vbic_u64 (arg0_uint64x1_t, arg1_uint64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbicu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vbicu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --uint8x8_t out_uint8x8_t; --uint8x8_t arg0_uint8x8_t; --uint8x8_t arg1_uint8x8_t; --void test_vbicu8 (void) --{ -- -- out_uint8x8_t = vbic_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vbic\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbslQf32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbslQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vbslQf32 (void) --{ -- float32x4_t out_float32x4_t; -- uint32x4_t arg0_uint32x4_t; -- float32x4_t arg1_float32x4_t; -- float32x4_t arg2_float32x4_t; -- -- out_float32x4_t = vbslq_f32 (arg0_uint32x4_t, arg1_float32x4_t, arg2_float32x4_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbslQp16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbslQp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vbslQp16 (void) --{ -- poly16x8_t out_poly16x8_t; -- uint16x8_t arg0_uint16x8_t; -- poly16x8_t arg1_poly16x8_t; -- poly16x8_t arg2_poly16x8_t; -- -- out_poly16x8_t = vbslq_p16 (arg0_uint16x8_t, arg1_poly16x8_t, arg2_poly16x8_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbslQp64.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbslQp64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vbslQp64 (void) --{ -- poly64x2_t out_poly64x2_t; -- uint64x2_t arg0_uint64x2_t; -- poly64x2_t arg1_poly64x2_t; -- poly64x2_t arg2_poly64x2_t; -- -- out_poly64x2_t = vbslq_p64 (arg0_uint64x2_t, arg1_poly64x2_t, arg2_poly64x2_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbslQp8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbslQp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vbslQp8 (void) --{ -- poly8x16_t out_poly8x16_t; -- uint8x16_t arg0_uint8x16_t; -- poly8x16_t arg1_poly8x16_t; -- poly8x16_t arg2_poly8x16_t; -- -- out_poly8x16_t = vbslq_p8 (arg0_uint8x16_t, arg1_poly8x16_t, arg2_poly8x16_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbslQs16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbslQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vbslQs16 (void) --{ -- int16x8_t out_int16x8_t; -- uint16x8_t arg0_uint16x8_t; -- int16x8_t arg1_int16x8_t; -- int16x8_t arg2_int16x8_t; -- -- out_int16x8_t = vbslq_s16 (arg0_uint16x8_t, arg1_int16x8_t, arg2_int16x8_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbslQs32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbslQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vbslQs32 (void) --{ -- int32x4_t out_int32x4_t; -- uint32x4_t arg0_uint32x4_t; -- int32x4_t arg1_int32x4_t; -- int32x4_t arg2_int32x4_t; -- -- out_int32x4_t = vbslq_s32 (arg0_uint32x4_t, arg1_int32x4_t, arg2_int32x4_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbslQs64.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbslQs64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vbslQs64 (void) --{ -- int64x2_t out_int64x2_t; -- uint64x2_t arg0_uint64x2_t; -- int64x2_t arg1_int64x2_t; -- int64x2_t arg2_int64x2_t; -- -- out_int64x2_t = vbslq_s64 (arg0_uint64x2_t, arg1_int64x2_t, arg2_int64x2_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbslQs8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbslQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vbslQs8 (void) --{ -- int8x16_t out_int8x16_t; -- uint8x16_t arg0_uint8x16_t; -- int8x16_t arg1_int8x16_t; -- int8x16_t arg2_int8x16_t; -- -- out_int8x16_t = vbslq_s8 (arg0_uint8x16_t, arg1_int8x16_t, arg2_int8x16_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbslQu16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbslQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vbslQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- uint16x8_t arg2_uint16x8_t; -- -- out_uint16x8_t = vbslq_u16 (arg0_uint16x8_t, arg1_uint16x8_t, arg2_uint16x8_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbslQu32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbslQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vbslQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- uint32x4_t arg2_uint32x4_t; -- -- out_uint32x4_t = vbslq_u32 (arg0_uint32x4_t, arg1_uint32x4_t, arg2_uint32x4_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbslQu64.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbslQu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vbslQu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint64x2_t arg1_uint64x2_t; -- uint64x2_t arg2_uint64x2_t; -- -- out_uint64x2_t = vbslq_u64 (arg0_uint64x2_t, arg1_uint64x2_t, arg2_uint64x2_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbslQu8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbslQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vbslQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- uint8x16_t arg2_uint8x16_t; -- -- out_uint8x16_t = vbslq_u8 (arg0_uint8x16_t, arg1_uint8x16_t, arg2_uint8x16_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbslf32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbslf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vbslf32 (void) --{ -- float32x2_t out_float32x2_t; -- uint32x2_t arg0_uint32x2_t; -- float32x2_t arg1_float32x2_t; -- float32x2_t arg2_float32x2_t; -- -- out_float32x2_t = vbsl_f32 (arg0_uint32x2_t, arg1_float32x2_t, arg2_float32x2_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbslp16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbslp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vbslp16 (void) --{ -- poly16x4_t out_poly16x4_t; -- uint16x4_t arg0_uint16x4_t; -- poly16x4_t arg1_poly16x4_t; -- poly16x4_t arg2_poly16x4_t; -- -- out_poly16x4_t = vbsl_p16 (arg0_uint16x4_t, arg1_poly16x4_t, arg2_poly16x4_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbslp64.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbslp64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vbslp64 (void) --{ -- poly64x1_t out_poly64x1_t; -- uint64x1_t arg0_uint64x1_t; -- poly64x1_t arg1_poly64x1_t; -- poly64x1_t arg2_poly64x1_t; -- -- out_poly64x1_t = vbsl_p64 (arg0_uint64x1_t, arg1_poly64x1_t, arg2_poly64x1_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbslp8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbslp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vbslp8 (void) --{ -- poly8x8_t out_poly8x8_t; -- uint8x8_t arg0_uint8x8_t; -- poly8x8_t arg1_poly8x8_t; -- poly8x8_t arg2_poly8x8_t; -- -- out_poly8x8_t = vbsl_p8 (arg0_uint8x8_t, arg1_poly8x8_t, arg2_poly8x8_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbsls16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbsls16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vbsls16 (void) --{ -- int16x4_t out_int16x4_t; -- uint16x4_t arg0_uint16x4_t; -- int16x4_t arg1_int16x4_t; -- int16x4_t arg2_int16x4_t; -- -- out_int16x4_t = vbsl_s16 (arg0_uint16x4_t, arg1_int16x4_t, arg2_int16x4_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbsls32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbsls32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vbsls32 (void) --{ -- int32x2_t out_int32x2_t; -- uint32x2_t arg0_uint32x2_t; -- int32x2_t arg1_int32x2_t; -- int32x2_t arg2_int32x2_t; -- -- out_int32x2_t = vbsl_s32 (arg0_uint32x2_t, arg1_int32x2_t, arg2_int32x2_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbsls64.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbsls64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vbsls64 (void) --{ -- int64x1_t out_int64x1_t; -- uint64x1_t arg0_uint64x1_t; -- int64x1_t arg1_int64x1_t; -- int64x1_t arg2_int64x1_t; -- -- out_int64x1_t = vbsl_s64 (arg0_uint64x1_t, arg1_int64x1_t, arg2_int64x1_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbsls8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbsls8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vbsls8 (void) --{ -- int8x8_t out_int8x8_t; -- uint8x8_t arg0_uint8x8_t; -- int8x8_t arg1_int8x8_t; -- int8x8_t arg2_int8x8_t; -- -- out_int8x8_t = vbsl_s8 (arg0_uint8x8_t, arg1_int8x8_t, arg2_int8x8_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbslu16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbslu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vbslu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- uint16x4_t arg2_uint16x4_t; -- -- out_uint16x4_t = vbsl_u16 (arg0_uint16x4_t, arg1_uint16x4_t, arg2_uint16x4_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbslu32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbslu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vbslu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- uint32x2_t arg2_uint32x2_t; -- -- out_uint32x2_t = vbsl_u32 (arg0_uint32x2_t, arg1_uint32x2_t, arg2_uint32x2_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbslu64.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbslu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vbslu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- uint64x1_t arg1_uint64x1_t; -- uint64x1_t arg2_uint64x1_t; -- -- out_uint64x1_t = vbsl_u64 (arg0_uint64x1_t, arg1_uint64x1_t, arg2_uint64x1_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vbslu8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vbslu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vbslu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- uint8x8_t arg2_uint8x8_t; -- -- out_uint8x8_t = vbsl_u8 (arg0_uint8x8_t, arg1_uint8x8_t, arg2_uint8x8_t); --} -- --/* { dg-final { scan-assembler "((vbsl)|(vbit)|(vbif))\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcageQf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcageQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcageQf32 (void) --{ -- uint32x4_t out_uint32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- -- out_uint32x4_t = vcageq_f32 (arg0_float32x4_t, arg1_float32x4_t); --} -- --/* { dg-final { scan-assembler "vacge\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcagef32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcagef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcagef32 (void) --{ -- uint32x2_t out_uint32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_uint32x2_t = vcage_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vacge\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcagtQf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcagtQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcagtQf32 (void) --{ -- uint32x4_t out_uint32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- -- out_uint32x4_t = vcagtq_f32 (arg0_float32x4_t, arg1_float32x4_t); --} -- --/* { dg-final { scan-assembler "vacgt\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcagtf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcagtf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcagtf32 (void) --{ -- uint32x2_t out_uint32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_uint32x2_t = vcagt_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vacgt\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcaleQf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcaleQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcaleQf32 (void) --{ -- uint32x4_t out_uint32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- -- out_uint32x4_t = vcaleq_f32 (arg0_float32x4_t, arg1_float32x4_t); --} -- --/* { dg-final { scan-assembler "vacge\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcalef32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcalef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcalef32 (void) --{ -- uint32x2_t out_uint32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_uint32x2_t = vcale_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vacge\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcaltQf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcaltQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcaltQf32 (void) --{ -- uint32x4_t out_uint32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- -- out_uint32x4_t = vcaltq_f32 (arg0_float32x4_t, arg1_float32x4_t); --} -- --/* { dg-final { scan-assembler "vacgt\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcaltf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcaltf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcaltf32 (void) --{ -- uint32x2_t out_uint32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_uint32x2_t = vcalt_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vacgt\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vceqQf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vceqQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vceqQf32 (void) --{ -- uint32x4_t out_uint32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- -- out_uint32x4_t = vceqq_f32 (arg0_float32x4_t, arg1_float32x4_t); --} -- --/* { dg-final { scan-assembler "vceq\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vceqQp8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vceqQp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vceqQp8 (void) --{ -- uint8x16_t out_uint8x16_t; -- poly8x16_t arg0_poly8x16_t; -- poly8x16_t arg1_poly8x16_t; -- -- out_uint8x16_t = vceqq_p8 (arg0_poly8x16_t, arg1_poly8x16_t); --} -- --/* { dg-final { scan-assembler "vceq\.i8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vceqQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vceqQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vceqQs16 (void) --{ -- uint16x8_t out_uint16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_uint16x8_t = vceqq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vceq\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vceqQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vceqQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vceqQs32 (void) --{ -- uint32x4_t out_uint32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_uint32x4_t = vceqq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vceq\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vceqQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vceqQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vceqQs8 (void) --{ -- uint8x16_t out_uint8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_uint8x16_t = vceqq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vceq\.i8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vceqQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vceqQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vceqQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vceqq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vceq\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vceqQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vceqQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vceqQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vceqq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vceq\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vceqQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vceqQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vceqQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vceqq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vceq\.i8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vceqf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vceqf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vceqf32 (void) --{ -- uint32x2_t out_uint32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_uint32x2_t = vceq_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vceq\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vceqp8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vceqp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vceqp8 (void) --{ -- uint8x8_t out_uint8x8_t; -- poly8x8_t arg0_poly8x8_t; -- poly8x8_t arg1_poly8x8_t; -- -- out_uint8x8_t = vceq_p8 (arg0_poly8x8_t, arg1_poly8x8_t); --} -- --/* { dg-final { scan-assembler "vceq\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vceqs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vceqs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vceqs16 (void) --{ -- uint16x4_t out_uint16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_uint16x4_t = vceq_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vceq\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vceqs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vceqs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vceqs32 (void) --{ -- uint32x2_t out_uint32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_uint32x2_t = vceq_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vceq\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vceqs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vceqs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vceqs8 (void) --{ -- uint8x8_t out_uint8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_uint8x8_t = vceq_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vceq\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcequ16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcequ16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcequ16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vceq_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vceq\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcequ32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcequ32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcequ32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vceq_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vceq\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcequ8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcequ8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcequ8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vceq_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vceq\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgeQf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgeQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgeQf32 (void) --{ -- uint32x4_t out_uint32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- -- out_uint32x4_t = vcgeq_f32 (arg0_float32x4_t, arg1_float32x4_t); --} -- --/* { dg-final { scan-assembler "vcge\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgeQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgeQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgeQs16 (void) --{ -- uint16x8_t out_uint16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_uint16x8_t = vcgeq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vcge\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgeQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgeQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgeQs32 (void) --{ -- uint32x4_t out_uint32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_uint32x4_t = vcgeq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vcge\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgeQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgeQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgeQs8 (void) --{ -- uint8x16_t out_uint8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_uint8x16_t = vcgeq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vcge\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgeQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgeQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgeQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vcgeq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vcge\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgeQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgeQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgeQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vcgeq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vcge\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgeQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgeQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgeQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vcgeq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vcge\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgef32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgef32 (void) --{ -- uint32x2_t out_uint32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_uint32x2_t = vcge_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vcge\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcges16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcges16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcges16 (void) --{ -- uint16x4_t out_uint16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_uint16x4_t = vcge_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vcge\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcges32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcges32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcges32 (void) --{ -- uint32x2_t out_uint32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_uint32x2_t = vcge_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vcge\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcges8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcges8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcges8 (void) --{ -- uint8x8_t out_uint8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_uint8x8_t = vcge_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vcge\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgeu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgeu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgeu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vcge_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vcge\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgeu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgeu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgeu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vcge_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vcge\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgeu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgeu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgeu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vcge_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vcge\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgtQf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgtQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgtQf32 (void) --{ -- uint32x4_t out_uint32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- -- out_uint32x4_t = vcgtq_f32 (arg0_float32x4_t, arg1_float32x4_t); --} -- --/* { dg-final { scan-assembler "vcgt\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgtQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgtQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgtQs16 (void) --{ -- uint16x8_t out_uint16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_uint16x8_t = vcgtq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vcgt\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgtQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgtQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgtQs32 (void) --{ -- uint32x4_t out_uint32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_uint32x4_t = vcgtq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vcgt\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgtQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgtQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgtQs8 (void) --{ -- uint8x16_t out_uint8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_uint8x16_t = vcgtq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vcgt\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgtQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgtQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgtQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vcgtq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vcgt\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgtQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgtQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgtQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vcgtq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vcgt\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgtQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgtQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgtQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vcgtq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vcgt\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgtf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgtf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgtf32 (void) --{ -- uint32x2_t out_uint32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_uint32x2_t = vcgt_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vcgt\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgts16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgts16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgts16 (void) --{ -- uint16x4_t out_uint16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_uint16x4_t = vcgt_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vcgt\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgts32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgts32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgts32 (void) --{ -- uint32x2_t out_uint32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_uint32x2_t = vcgt_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vcgt\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgts8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgts8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgts8 (void) --{ -- uint8x8_t out_uint8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_uint8x8_t = vcgt_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vcgt\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgtu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgtu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgtu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vcgt_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vcgt\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgtu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgtu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgtu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vcgt_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vcgt\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcgtu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcgtu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcgtu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vcgt_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vcgt\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcleQf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcleQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcleQf32 (void) --{ -- uint32x4_t out_uint32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- -- out_uint32x4_t = vcleq_f32 (arg0_float32x4_t, arg1_float32x4_t); --} -- --/* { dg-final { scan-assembler "vcge\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcleQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcleQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcleQs16 (void) --{ -- uint16x8_t out_uint16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_uint16x8_t = vcleq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vcge\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcleQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcleQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcleQs32 (void) --{ -- uint32x4_t out_uint32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_uint32x4_t = vcleq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vcge\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcleQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcleQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcleQs8 (void) --{ -- uint8x16_t out_uint8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_uint8x16_t = vcleq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vcge\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcleQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcleQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcleQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vcleq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vcge\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcleQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcleQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcleQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vcleq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vcge\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcleQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcleQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcleQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vcleq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vcge\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vclef32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vclef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vclef32 (void) --{ -- uint32x2_t out_uint32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_uint32x2_t = vcle_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vcge\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcles16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcles16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcles16 (void) --{ -- uint16x4_t out_uint16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_uint16x4_t = vcle_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vcge\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcles32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcles32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcles32 (void) --{ -- uint32x2_t out_uint32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_uint32x2_t = vcle_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vcge\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcles8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcles8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcles8 (void) --{ -- uint8x8_t out_uint8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_uint8x8_t = vcle_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vcge\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcleu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcleu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcleu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vcle_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vcge\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcleu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcleu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcleu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vcle_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vcge\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcleu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcleu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcleu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vcle_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vcge\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vclsQs16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vclsQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vclsQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_int16x8_t = vclsq_s16 (arg0_int16x8_t); --} -- --/* { dg-final { scan-assembler "vcls\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vclsQs32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vclsQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vclsQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_int32x4_t = vclsq_s32 (arg0_int32x4_t); --} -- --/* { dg-final { scan-assembler "vcls\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vclsQs8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vclsQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vclsQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- -- out_int8x16_t = vclsq_s8 (arg0_int8x16_t); --} -- --/* { dg-final { scan-assembler "vcls\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vclss16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vclss16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vclss16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- -- out_int16x4_t = vcls_s16 (arg0_int16x4_t); --} -- --/* { dg-final { scan-assembler "vcls\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vclss32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vclss32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vclss32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- -- out_int32x2_t = vcls_s32 (arg0_int32x2_t); --} -- --/* { dg-final { scan-assembler "vcls\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vclss8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vclss8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vclss8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- -- out_int8x8_t = vcls_s8 (arg0_int8x8_t); --} -- --/* { dg-final { scan-assembler "vcls\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcltQf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcltQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcltQf32 (void) --{ -- uint32x4_t out_uint32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- -- out_uint32x4_t = vcltq_f32 (arg0_float32x4_t, arg1_float32x4_t); --} -- --/* { dg-final { scan-assembler "vcgt\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcltQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcltQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcltQs16 (void) --{ -- uint16x8_t out_uint16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_uint16x8_t = vcltq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vcgt\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcltQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcltQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcltQs32 (void) --{ -- uint32x4_t out_uint32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_uint32x4_t = vcltq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vcgt\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcltQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcltQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcltQs8 (void) --{ -- uint8x16_t out_uint8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_uint8x16_t = vcltq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vcgt\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcltQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcltQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcltQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vcltq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vcgt\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcltQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcltQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcltQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vcltq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vcgt\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcltQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcltQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcltQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vcltq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vcgt\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcltf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcltf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcltf32 (void) --{ -- uint32x2_t out_uint32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_uint32x2_t = vclt_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vcgt\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vclts16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vclts16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vclts16 (void) --{ -- uint16x4_t out_uint16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_uint16x4_t = vclt_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vcgt\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vclts32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vclts32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vclts32 (void) --{ -- uint32x2_t out_uint32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_uint32x2_t = vclt_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vcgt\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vclts8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vclts8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vclts8 (void) --{ -- uint8x8_t out_uint8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_uint8x8_t = vclt_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vcgt\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcltu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcltu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcltu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vclt_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vcgt\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcltu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcltu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcltu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vclt_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vcgt\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcltu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vcltu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcltu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vclt_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vcgt\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vclzQs16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vclzQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vclzQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_int16x8_t = vclzq_s16 (arg0_int16x8_t); --} -- --/* { dg-final { scan-assembler "vclz\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vclzQs32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vclzQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vclzQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_int32x4_t = vclzq_s32 (arg0_int32x4_t); --} -- --/* { dg-final { scan-assembler "vclz\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vclzQs8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vclzQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vclzQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- -- out_int8x16_t = vclzq_s8 (arg0_int8x16_t); --} -- --/* { dg-final { scan-assembler "vclz\.i8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vclzQu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vclzQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vclzQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_uint16x8_t = vclzq_u16 (arg0_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vclz\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vclzQu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vclzQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vclzQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_uint32x4_t = vclzq_u32 (arg0_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vclz\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vclzQu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vclzQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vclzQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_uint8x16_t = vclzq_u8 (arg0_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vclz\.i8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vclzs16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vclzs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vclzs16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- -- out_int16x4_t = vclz_s16 (arg0_int16x4_t); --} -- --/* { dg-final { scan-assembler "vclz\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vclzs32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vclzs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vclzs32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- -- out_int32x2_t = vclz_s32 (arg0_int32x2_t); --} -- --/* { dg-final { scan-assembler "vclz\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vclzs8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vclzs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vclzs8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- -- out_int8x8_t = vclz_s8 (arg0_int8x8_t); --} -- --/* { dg-final { scan-assembler "vclz\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vclzu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vclzu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vclzu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_uint16x4_t = vclz_u16 (arg0_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vclz\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vclzu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vclzu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vclzu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_uint32x2_t = vclz_u32 (arg0_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vclz\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vclzu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vclzu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vclzu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_uint8x8_t = vclz_u8 (arg0_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vclz\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcntQp8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcntQp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcntQp8 (void) --{ -- poly8x16_t out_poly8x16_t; -- poly8x16_t arg0_poly8x16_t; -- -- out_poly8x16_t = vcntq_p8 (arg0_poly8x16_t); --} -- --/* { dg-final { scan-assembler "vcnt\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcntQs8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcntQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcntQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- -- out_int8x16_t = vcntq_s8 (arg0_int8x16_t); --} -- --/* { dg-final { scan-assembler "vcnt\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcntQu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcntQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcntQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_uint8x16_t = vcntq_u8 (arg0_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vcnt\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcntp8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcntp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcntp8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8x8_t arg0_poly8x8_t; -- -- out_poly8x8_t = vcnt_p8 (arg0_poly8x8_t); --} -- --/* { dg-final { scan-assembler "vcnt\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcnts8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcnts8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcnts8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- -- out_int8x8_t = vcnt_s8 (arg0_int8x8_t); --} -- --/* { dg-final { scan-assembler "vcnt\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcntu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcntu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcntu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_uint8x8_t = vcnt_u8 (arg0_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vcnt\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcombinef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcombinef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcombinef32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_float32x4_t = vcombine_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcombinep16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcombinep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcombinep16 (void) --{ -- poly16x8_t out_poly16x8_t; -- poly16x4_t arg0_poly16x4_t; -- poly16x4_t arg1_poly16x4_t; -- -- out_poly16x8_t = vcombine_p16 (arg0_poly16x4_t, arg1_poly16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcombinep64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcombinep64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vcombinep64 (void) --{ -- poly64x2_t out_poly64x2_t; -- poly64x1_t arg0_poly64x1_t; -- poly64x1_t arg1_poly64x1_t; -- -- out_poly64x2_t = vcombine_p64 (arg0_poly64x1_t, arg1_poly64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcombinep8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcombinep8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcombinep8 (void) --{ -- poly8x16_t out_poly8x16_t; -- poly8x8_t arg0_poly8x8_t; -- poly8x8_t arg1_poly8x8_t; -- -- out_poly8x16_t = vcombine_p8 (arg0_poly8x8_t, arg1_poly8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcombines16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcombines16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcombines16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x8_t = vcombine_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcombines32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcombines32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcombines32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x4_t = vcombine_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcombines64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcombines64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcombines64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x1_t arg0_int64x1_t; -- int64x1_t arg1_int64x1_t; -- -- out_int64x2_t = vcombine_s64 (arg0_int64x1_t, arg1_int64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcombines8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcombines8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcombines8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x16_t = vcombine_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcombineu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcombineu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcombineu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x8_t = vcombine_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcombineu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcombineu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcombineu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x4_t = vcombine_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcombineu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcombineu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcombineu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x1_t arg0_uint64x1_t; -- uint64x1_t arg1_uint64x1_t; -- -- out_uint64x2_t = vcombine_u64 (arg0_uint64x1_t, arg1_uint64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcombineu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcombineu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcombineu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x16_t = vcombine_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcreatef32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vcreatef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcreatef32 (void) --{ -- float32x2_t out_float32x2_t; -- uint64_t arg0_uint64_t; -- -- out_float32x2_t = vcreate_f32 (arg0_uint64_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcreatep16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vcreatep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcreatep16 (void) --{ -- poly16x4_t out_poly16x4_t; -- uint64_t arg0_uint64_t; -- -- out_poly16x4_t = vcreate_p16 (arg0_uint64_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcreatep64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vcreatep64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vcreatep64 (void) --{ -- poly64x1_t out_poly64x1_t; -- uint64_t arg0_uint64_t; -- -- out_poly64x1_t = vcreate_p64 (arg0_uint64_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcreatep8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vcreatep8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcreatep8 (void) --{ -- poly8x8_t out_poly8x8_t; -- uint64_t arg0_uint64_t; -- -- out_poly8x8_t = vcreate_p8 (arg0_uint64_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcreates16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vcreates16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcreates16 (void) --{ -- int16x4_t out_int16x4_t; -- uint64_t arg0_uint64_t; -- -- out_int16x4_t = vcreate_s16 (arg0_uint64_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcreates32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vcreates32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcreates32 (void) --{ -- int32x2_t out_int32x2_t; -- uint64_t arg0_uint64_t; -- -- out_int32x2_t = vcreate_s32 (arg0_uint64_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcreates64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vcreates64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcreates64 (void) --{ -- int64x1_t out_int64x1_t; -- uint64_t arg0_uint64_t; -- -- out_int64x1_t = vcreate_s64 (arg0_uint64_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcreates8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vcreates8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcreates8 (void) --{ -- int8x8_t out_int8x8_t; -- uint64_t arg0_uint64_t; -- -- out_int8x8_t = vcreate_s8 (arg0_uint64_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcreateu16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vcreateu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcreateu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint64_t arg0_uint64_t; -- -- out_uint16x4_t = vcreate_u16 (arg0_uint64_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcreateu32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vcreateu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcreateu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint64_t arg0_uint64_t; -- -- out_uint32x2_t = vcreate_u32 (arg0_uint64_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcreateu64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vcreateu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcreateu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64_t arg0_uint64_t; -- -- out_uint64x1_t = vcreate_u64 (arg0_uint64_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcreateu8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vcreateu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcreateu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint64_t arg0_uint64_t; -- -- out_uint8x8_t = vcreate_u8 (arg0_uint64_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtQ_nf32_s32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcvtQ_nf32_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcvtQ_nf32_s32 (void) --{ -- float32x4_t out_float32x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_float32x4_t = vcvtq_n_f32_s32 (arg0_int32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vcvt\.f32.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtQ_nf32_u32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcvtQ_nf32_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcvtQ_nf32_u32 (void) --{ -- float32x4_t out_float32x4_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_float32x4_t = vcvtq_n_f32_u32 (arg0_uint32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vcvt\.f32.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtQ_ns32_f32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcvtQ_ns32_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcvtQ_ns32_f32 (void) --{ -- int32x4_t out_int32x4_t; -- float32x4_t arg0_float32x4_t; -- -- out_int32x4_t = vcvtq_n_s32_f32 (arg0_float32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vcvt\.s32.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtQ_nu32_f32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcvtQ_nu32_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcvtQ_nu32_f32 (void) --{ -- uint32x4_t out_uint32x4_t; -- float32x4_t arg0_float32x4_t; -- -- out_uint32x4_t = vcvtq_n_u32_f32 (arg0_float32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vcvt\.u32.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtQf32_s32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcvtQf32_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcvtQf32_s32 (void) --{ -- float32x4_t out_float32x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_float32x4_t = vcvtq_f32_s32 (arg0_int32x4_t); --} -- --/* { dg-final { scan-assembler "vcvt\.f32.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtQf32_u32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcvtQf32_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcvtQf32_u32 (void) --{ -- float32x4_t out_float32x4_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_float32x4_t = vcvtq_f32_u32 (arg0_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vcvt\.f32.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtQs32_f32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcvtQs32_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcvtQs32_f32 (void) --{ -- int32x4_t out_int32x4_t; -- float32x4_t arg0_float32x4_t; -- -- out_int32x4_t = vcvtq_s32_f32 (arg0_float32x4_t); --} -- --/* { dg-final { scan-assembler "vcvt\.s32.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtQu32_f32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcvtQu32_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcvtQu32_f32 (void) --{ -- uint32x4_t out_uint32x4_t; -- float32x4_t arg0_float32x4_t; -- -- out_uint32x4_t = vcvtq_u32_f32 (arg0_float32x4_t); --} -- --/* { dg-final { scan-assembler "vcvt\.u32.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcvt_nf32_s32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcvt_nf32_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcvt_nf32_s32 (void) --{ -- float32x2_t out_float32x2_t; -- int32x2_t arg0_int32x2_t; -- -- out_float32x2_t = vcvt_n_f32_s32 (arg0_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vcvt\.f32.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcvt_nf32_u32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcvt_nf32_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcvt_nf32_u32 (void) --{ -- float32x2_t out_float32x2_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_float32x2_t = vcvt_n_f32_u32 (arg0_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vcvt\.f32.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcvt_ns32_f32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcvt_ns32_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcvt_ns32_f32 (void) --{ -- int32x2_t out_int32x2_t; -- float32x2_t arg0_float32x2_t; -- -- out_int32x2_t = vcvt_n_s32_f32 (arg0_float32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vcvt\.s32.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcvt_nu32_f32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcvt_nu32_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcvt_nu32_f32 (void) --{ -- uint32x2_t out_uint32x2_t; -- float32x2_t arg0_float32x2_t; -- -- out_uint32x2_t = vcvt_n_u32_f32 (arg0_float32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vcvt\.u32.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtf16_f32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcvtf16_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_fp16_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon_fp16 } */ -- --#include "arm_neon.h" -- --void test_vcvtf16_f32 (void) --{ -- float16x4_t out_float16x4_t; -- float32x4_t arg0_float32x4_t; -- -- out_float16x4_t = vcvt_f16_f32 (arg0_float32x4_t); --} -- --/* { dg-final { scan-assembler "vcvt\.f16.f32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtf32_f16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcvtf32_f16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_fp16_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon_fp16 } */ -- --#include "arm_neon.h" -- --void test_vcvtf32_f16 (void) --{ -- float32x4_t out_float32x4_t; -- float16x4_t arg0_float16x4_t; -- -- out_float32x4_t = vcvt_f32_f16 (arg0_float16x4_t); --} -- --/* { dg-final { scan-assembler "vcvt\.f32.f16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtf32_s32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcvtf32_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcvtf32_s32 (void) --{ -- float32x2_t out_float32x2_t; -- int32x2_t arg0_int32x2_t; -- -- out_float32x2_t = vcvt_f32_s32 (arg0_int32x2_t); --} -- --/* { dg-final { scan-assembler "vcvt\.f32.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtf32_u32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcvtf32_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcvtf32_u32 (void) --{ -- float32x2_t out_float32x2_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_float32x2_t = vcvt_f32_u32 (arg0_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vcvt\.f32.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcvts32_f32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcvts32_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcvts32_f32 (void) --{ -- int32x2_t out_int32x2_t; -- float32x2_t arg0_float32x2_t; -- -- out_int32x2_t = vcvt_s32_f32 (arg0_float32x2_t); --} -- --/* { dg-final { scan-assembler "vcvt\.s32.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vcvtu32_f32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vcvtu32_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vcvtu32_f32 (void) --{ -- uint32x2_t out_uint32x2_t; -- float32x2_t arg0_float32x2_t; -- -- out_uint32x2_t = vcvt_u32_f32 (arg0_float32x2_t); --} -- --/* { dg-final { scan-assembler "vcvt\.u32.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_lanef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdupQ_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdupQ_lanef32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x2_t arg0_float32x2_t; -- -- out_float32x4_t = vdupq_lane_f32 (arg0_float32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_lanep16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdupQ_lanep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdupQ_lanep16 (void) --{ -- poly16x8_t out_poly16x8_t; -- poly16x4_t arg0_poly16x4_t; -- -- out_poly16x8_t = vdupq_lane_p16 (arg0_poly16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_lanep64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vdupQ_lanep64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vdupQ_lanep64 (void) --{ -- poly64x2_t out_poly64x2_t; -- poly64x1_t arg0_poly64x1_t; -- -- out_poly64x2_t = vdupq_lane_p64 (arg0_poly64x1_t, 0); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_lanep8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdupQ_lanep8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdupQ_lanep8 (void) --{ -- poly8x16_t out_poly8x16_t; -- poly8x8_t arg0_poly8x8_t; -- -- out_poly8x16_t = vdupq_lane_p8 (arg0_poly8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_lanes16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdupQ_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdupQ_lanes16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x4_t arg0_int16x4_t; -- -- out_int16x8_t = vdupq_lane_s16 (arg0_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_lanes32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdupQ_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdupQ_lanes32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x2_t arg0_int32x2_t; -- -- out_int32x4_t = vdupq_lane_s32 (arg0_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_lanes64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vdupQ_lanes64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdupQ_lanes64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x1_t arg0_int64x1_t; -- -- out_int64x2_t = vdupq_lane_s64 (arg0_int64x1_t, 0); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_lanes8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdupQ_lanes8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdupQ_lanes8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x8_t arg0_int8x8_t; -- -- out_int8x16_t = vdupq_lane_s8 (arg0_int8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_laneu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdupQ_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdupQ_laneu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_uint16x8_t = vdupq_lane_u16 (arg0_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_laneu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdupQ_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdupQ_laneu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_uint32x4_t = vdupq_lane_u32 (arg0_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_laneu64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vdupQ_laneu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdupQ_laneu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x1_t arg0_uint64x1_t; -- -- out_uint64x2_t = vdupq_lane_u64 (arg0_uint64x1_t, 0); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_laneu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdupQ_laneu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdupQ_laneu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_uint8x16_t = vdupq_lane_u8 (arg0_uint8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_nf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdupQ_nf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdupQ_nf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32_t arg0_float32_t; -- -- out_float32x4_t = vdupq_n_f32 (arg0_float32_t); --} -- --/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_np16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdupQ_np16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdupQ_np16 (void) --{ -- poly16x8_t out_poly16x8_t; -- poly16_t arg0_poly16_t; -- -- out_poly16x8_t = vdupq_n_p16 (arg0_poly16_t); --} -- --/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_np64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vdupQ_np64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vdupQ_np64 (void) --{ -- poly64x2_t out_poly64x2_t; -- poly64_t arg0_poly64_t; -- -- out_poly64x2_t = vdupq_n_p64 (arg0_poly64_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_np8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdupQ_np8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdupQ_np8 (void) --{ -- poly8x16_t out_poly8x16_t; -- poly8_t arg0_poly8_t; -- -- out_poly8x16_t = vdupq_n_p8 (arg0_poly8_t); --} -- --/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_ns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdupQ_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdupQ_ns16 (void) --{ -- int16x8_t out_int16x8_t; -- int16_t arg0_int16_t; -- -- out_int16x8_t = vdupq_n_s16 (arg0_int16_t); --} -- --/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_ns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdupQ_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdupQ_ns32 (void) --{ -- int32x4_t out_int32x4_t; -- int32_t arg0_int32_t; -- -- out_int32x4_t = vdupq_n_s32 (arg0_int32_t); --} -- --/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_ns64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vdupQ_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdupQ_ns64 (void) --{ -- int64x2_t out_int64x2_t; -- int64_t arg0_int64_t; -- -- out_int64x2_t = vdupq_n_s64 (arg0_int64_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_ns8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdupQ_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdupQ_ns8 (void) --{ -- int8x16_t out_int8x16_t; -- int8_t arg0_int8_t; -- -- out_int8x16_t = vdupq_n_s8 (arg0_int8_t); --} -- --/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_nu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdupQ_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdupQ_nu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16_t arg0_uint16_t; -- -- out_uint16x8_t = vdupq_n_u16 (arg0_uint16_t); --} -- --/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_nu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdupQ_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdupQ_nu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32_t arg0_uint32_t; -- -- out_uint32x4_t = vdupq_n_u32 (arg0_uint32_t); --} -- --/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_nu64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vdupQ_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdupQ_nu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64_t arg0_uint64_t; -- -- out_uint64x2_t = vdupq_n_u64 (arg0_uint64_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdupQ_nu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdupQ_nu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdupQ_nu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8_t arg0_uint8_t; -- -- out_uint8x16_t = vdupq_n_u8 (arg0_uint8_t); --} -- --/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_lanef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdup_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdup_lanef32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- -- out_float32x2_t = vdup_lane_f32 (arg0_float32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_lanep16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdup_lanep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdup_lanep16 (void) --{ -- poly16x4_t out_poly16x4_t; -- poly16x4_t arg0_poly16x4_t; -- -- out_poly16x4_t = vdup_lane_p16 (arg0_poly16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_lanep64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vdup_lanep64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vdup_lanep64 (void) --{ -- poly64x1_t out_poly64x1_t; -- poly64x1_t arg0_poly64x1_t; -- -- out_poly64x1_t = vdup_lane_p64 (arg0_poly64x1_t, 0); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_lanep8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdup_lanep8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdup_lanep8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8x8_t arg0_poly8x8_t; -- -- out_poly8x8_t = vdup_lane_p8 (arg0_poly8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_lanes16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdup_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdup_lanes16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- -- out_int16x4_t = vdup_lane_s16 (arg0_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_lanes32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdup_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdup_lanes32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- -- out_int32x2_t = vdup_lane_s32 (arg0_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_lanes64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vdup_lanes64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdup_lanes64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg0_int64x1_t; -- -- out_int64x1_t = vdup_lane_s64 (arg0_int64x1_t, 0); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_lanes8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdup_lanes8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdup_lanes8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- -- out_int8x8_t = vdup_lane_s8 (arg0_int8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_laneu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdup_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdup_laneu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_uint16x4_t = vdup_lane_u16 (arg0_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_laneu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdup_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdup_laneu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_uint32x2_t = vdup_lane_u32 (arg0_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_laneu64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vdup_laneu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdup_laneu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- -- out_uint64x1_t = vdup_lane_u64 (arg0_uint64x1_t, 0); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_laneu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdup_laneu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdup_laneu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_uint8x8_t = vdup_lane_u8 (arg0_uint8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_nf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdup_nf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdup_nf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32_t arg0_float32_t; -- -- out_float32x2_t = vdup_n_f32 (arg0_float32_t); --} -- --/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_np16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdup_np16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdup_np16 (void) --{ -- poly16x4_t out_poly16x4_t; -- poly16_t arg0_poly16_t; -- -- out_poly16x4_t = vdup_n_p16 (arg0_poly16_t); --} -- --/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_np64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vdup_np64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vdup_np64 (void) --{ -- poly64x1_t out_poly64x1_t; -- poly64_t arg0_poly64_t; -- -- out_poly64x1_t = vdup_n_p64 (arg0_poly64_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_np8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdup_np8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdup_np8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8_t arg0_poly8_t; -- -- out_poly8x8_t = vdup_n_p8 (arg0_poly8_t); --} -- --/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_ns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdup_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdup_ns16 (void) --{ -- int16x4_t out_int16x4_t; -- int16_t arg0_int16_t; -- -- out_int16x4_t = vdup_n_s16 (arg0_int16_t); --} -- --/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_ns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdup_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdup_ns32 (void) --{ -- int32x2_t out_int32x2_t; -- int32_t arg0_int32_t; -- -- out_int32x2_t = vdup_n_s32 (arg0_int32_t); --} -- --/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_ns64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vdup_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdup_ns64 (void) --{ -- int64x1_t out_int64x1_t; -- int64_t arg0_int64_t; -- -- out_int64x1_t = vdup_n_s64 (arg0_int64_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_ns8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdup_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdup_ns8 (void) --{ -- int8x8_t out_int8x8_t; -- int8_t arg0_int8_t; -- -- out_int8x8_t = vdup_n_s8 (arg0_int8_t); --} -- --/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_nu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdup_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdup_nu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16_t arg0_uint16_t; -- -- out_uint16x4_t = vdup_n_u16 (arg0_uint16_t); --} -- --/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_nu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdup_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdup_nu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32_t arg0_uint32_t; -- -- out_uint32x2_t = vdup_n_u32 (arg0_uint32_t); --} -- --/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_nu64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vdup_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdup_nu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64_t arg0_uint64_t; -- -- out_uint64x1_t = vdup_n_u64 (arg0_uint64_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vdup_nu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vdup_nu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vdup_nu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8_t arg0_uint8_t; -- -- out_uint8x8_t = vdup_n_u8 (arg0_uint8_t); --} -- --/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vect-vcvt.c -+++ b/src//dev/null -@@ -1,27 +0,0 @@ --/* { dg-do compile } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -mvectorize-with-neon-double" } */ --/* { dg-add-options arm_neon } */ -- --#define N 32 -- --int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; --float fa[N]; --int ia[N]; -- --int convert() --{ -- int i; -- -- /* int -> float */ -- for (i = 0; i < N; i++) -- fa[i] = (float) ib[i]; -- -- /* float -> int */ -- for (i = 0; i < N; i++) -- ia[i] = (int) fa[i]; -- -- return 0; --} -- --/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vect-vcvtq.c -+++ b/src//dev/null -@@ -1,27 +0,0 @@ --/* { dg-do compile } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */ --/* { dg-add-options arm_neon } */ -- --#define N 32 -- --int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; --float fa[N]; --int ia[N]; -- --int convert() --{ -- int i; -- -- /* int -> float */ -- for (i = 0; i < N; i++) -- fa[i] = (float) ib[i]; -- -- /* float -> int */ -- for (i = 0; i < N; i++) -- ia[i] = (int) fa[i]; -- -- return 0; --} -- --/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/veorQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `veorQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_veorQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = veorq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "veor\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/veorQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `veorQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_veorQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = veorq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "veor\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/veorQs64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `veorQs64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_veorQs64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_int64x2_t = veorq_s64 (arg0_int64x2_t, arg1_int64x2_t); --} -- --/* { dg-final { scan-assembler "veor\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/veorQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `veorQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_veorQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = veorq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "veor\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/veorQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `veorQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_veorQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = veorq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "veor\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/veorQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `veorQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_veorQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = veorq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "veor\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/veorQu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `veorQu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_veorQu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint64x2_t arg1_uint64x2_t; -- -- out_uint64x2_t = veorq_u64 (arg0_uint64x2_t, arg1_uint64x2_t); --} -- --/* { dg-final { scan-assembler "veor\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/veorQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `veorQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_veorQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = veorq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "veor\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/veors16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `veors16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_veors16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = veor_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "veor\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/veors32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `veors32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_veors32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = veor_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "veor\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/veors64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `veors64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_veors64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg0_int64x1_t; -- int64x1_t arg1_int64x1_t; -- -- out_int64x1_t = veor_s64 (arg0_int64x1_t, arg1_int64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/veors8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `veors8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_veors8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = veor_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "veor\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/veoru16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `veoru16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_veoru16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = veor_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "veor\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/veoru32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `veoru32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_veoru32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = veor_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "veor\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/veoru64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `veoru64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_veoru64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- uint64x1_t arg1_uint64x1_t; -- -- out_uint64x1_t = veor_u64 (arg0_uint64x1_t, arg1_uint64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/veoru8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `veoru8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_veoru8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = veor_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "veor\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vextQf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vextQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vextQf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- -- out_float32x4_t = vextq_f32 (arg0_float32x4_t, arg1_float32x4_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vextQp16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vextQp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vextQp16 (void) --{ -- poly16x8_t out_poly16x8_t; -- poly16x8_t arg0_poly16x8_t; -- poly16x8_t arg1_poly16x8_t; -- -- out_poly16x8_t = vextq_p16 (arg0_poly16x8_t, arg1_poly16x8_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vextQp64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vextQp64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vextQp64 (void) --{ -- poly64x2_t out_poly64x2_t; -- poly64x2_t arg0_poly64x2_t; -- poly64x2_t arg1_poly64x2_t; -- -- out_poly64x2_t = vextq_p64 (arg0_poly64x2_t, arg1_poly64x2_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vextQp8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vextQp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vextQp8 (void) --{ -- poly8x16_t out_poly8x16_t; -- poly8x16_t arg0_poly8x16_t; -- poly8x16_t arg1_poly8x16_t; -- -- out_poly8x16_t = vextq_p8 (arg0_poly8x16_t, arg1_poly8x16_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vextQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vextQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vextQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vextq_s16 (arg0_int16x8_t, arg1_int16x8_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vextQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vextQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vextQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vextq_s32 (arg0_int32x4_t, arg1_int32x4_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vextQs64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vextQs64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vextQs64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_int64x2_t = vextq_s64 (arg0_int64x2_t, arg1_int64x2_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vextQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vextQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vextQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vextq_s8 (arg0_int8x16_t, arg1_int8x16_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vextQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vextQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vextQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vextq_u16 (arg0_uint16x8_t, arg1_uint16x8_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vextQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vextQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vextQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vextq_u32 (arg0_uint32x4_t, arg1_uint32x4_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vextQu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vextQu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vextQu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint64x2_t arg1_uint64x2_t; -- -- out_uint64x2_t = vextq_u64 (arg0_uint64x2_t, arg1_uint64x2_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vextQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vextQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vextQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vextq_u8 (arg0_uint8x16_t, arg1_uint8x16_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vextf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vextf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vextf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_float32x2_t = vext_f32 (arg0_float32x2_t, arg1_float32x2_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vextp16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vextp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vextp16 (void) --{ -- poly16x4_t out_poly16x4_t; -- poly16x4_t arg0_poly16x4_t; -- poly16x4_t arg1_poly16x4_t; -- -- out_poly16x4_t = vext_p16 (arg0_poly16x4_t, arg1_poly16x4_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vextp64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vextp64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vextp64 (void) --{ -- poly64x1_t out_poly64x1_t; -- poly64x1_t arg0_poly64x1_t; -- poly64x1_t arg1_poly64x1_t; -- -- out_poly64x1_t = vext_p64 (arg0_poly64x1_t, arg1_poly64x1_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vextp8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vextp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vextp8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8x8_t arg0_poly8x8_t; -- poly8x8_t arg1_poly8x8_t; -- -- out_poly8x8_t = vext_p8 (arg0_poly8x8_t, arg1_poly8x8_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vexts16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vexts16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vexts16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vext_s16 (arg0_int16x4_t, arg1_int16x4_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vexts32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vexts32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vexts32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vext_s32 (arg0_int32x2_t, arg1_int32x2_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vexts64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vexts64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vexts64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg0_int64x1_t; -- int64x1_t arg1_int64x1_t; -- -- out_int64x1_t = vext_s64 (arg0_int64x1_t, arg1_int64x1_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vexts8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vexts8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vexts8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vext_s8 (arg0_int8x8_t, arg1_int8x8_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vextu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vextu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vextu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vext_u16 (arg0_uint16x4_t, arg1_uint16x4_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vextu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vextu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vextu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vext_u32 (arg0_uint32x2_t, arg1_uint32x2_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vextu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vextu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vextu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- uint64x1_t arg1_uint64x1_t; -- -- out_uint64x1_t = vext_u64 (arg0_uint64x1_t, arg1_uint64x1_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vextu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vextu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vextu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vext_u8 (arg0_uint8x8_t, arg1_uint8x8_t, 0); --} -- --/* { dg-final { scan-assembler "vext\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vfmaQf32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vfmaQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neonv2_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neonv2 } */ -- --#include "arm_neon.h" -- --void test_vfmaQf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- float32x4_t arg2_float32x4_t; -- -- out_float32x4_t = vfmaq_f32 (arg0_float32x4_t, arg1_float32x4_t, arg2_float32x4_t); --} -- --/* { dg-final { scan-assembler "vfma\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vfmaf32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vfmaf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neonv2_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neonv2 } */ -- --#include "arm_neon.h" -- --void test_vfmaf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- float32x2_t arg2_float32x2_t; -- -- out_float32x2_t = vfma_f32 (arg0_float32x2_t, arg1_float32x2_t, arg2_float32x2_t); --} -- --/* { dg-final { scan-assembler "vfma\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vfmsQf32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vfmsQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neonv2_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neonv2 } */ -- --#include "arm_neon.h" -- --void test_vfmsQf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- float32x4_t arg2_float32x4_t; -- -- out_float32x4_t = vfmsq_f32 (arg0_float32x4_t, arg1_float32x4_t, arg2_float32x4_t); --} -- --/* { dg-final { scan-assembler "vfms\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vfmsf32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vfmsf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neonv2_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neonv2 } */ -- --#include "arm_neon.h" -- --void test_vfmsf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- float32x2_t arg2_float32x2_t; -- -- out_float32x2_t = vfms_f32 (arg0_float32x2_t, arg1_float32x2_t, arg2_float32x2_t); --} -- --/* { dg-final { scan-assembler "vfms\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vfp-shift-a2t2.c -+++ b/src//dev/null -@@ -1,27 +0,0 @@ --/* Check that NEON vector shifts support immediate values == size. /* -- --/* { dg-do compile } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps" } */ --/* { dg-add-options arm_neon } */ -- --#include <arm_neon.h> -- --uint16x8_t test_vshll_n_u8 (uint8x8_t a) --{ -- return vshll_n_u8(a, 8); --} -- --uint32x4_t test_vshll_n_u16 (uint16x4_t a) --{ -- return vshll_n_u16(a, 16); --} -- --uint64x2_t test_vshll_n_u32 (uint32x2_t a) --{ -- return vshll_n_u32(a, 32); --} -- --/* { dg-final { scan-assembler "vshll\.u16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vshll\.u32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vshll\.u8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vgetQ_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vgetQ_lanef32 (void) --{ -- float32_t out_float32_t; -- float32x4_t arg0_float32x4_t; -- -- out_float32_t = vgetq_lane_f32 (arg0_float32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanep16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vgetQ_lanep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vgetQ_lanep16 (void) --{ -- poly16_t out_poly16_t; -- poly16x8_t arg0_poly16x8_t; -- -- out_poly16_t = vgetq_lane_p16 (arg0_poly16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.u16\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanep8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vgetQ_lanep8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vgetQ_lanep8 (void) --{ -- poly8_t out_poly8_t; -- poly8x16_t arg0_poly8x16_t; -- -- out_poly8_t = vgetq_lane_p8 (arg0_poly8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.u8\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanes16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vgetQ_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vgetQ_lanes16 (void) --{ -- int16_t out_int16_t; -- int16x8_t arg0_int16x8_t; -- -- out_int16_t = vgetq_lane_s16 (arg0_int16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.s16\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanes32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vgetQ_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vgetQ_lanes32 (void) --{ -- int32_t out_int32_t; -- int32x4_t arg0_int32x4_t; -- -- out_int32_t = vgetq_lane_s32 (arg0_int32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanes64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vgetQ_lanes64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vgetQ_lanes64 (void) --{ -- register int64_t out_int64_t asm ("r0"); -- int64x2_t arg0_int64x2_t; -- -- out_int64_t = vgetq_lane_s64 (arg0_int64x2_t, 0); --} -- --/* { dg-final { scan-assembler "((vmov)|(fmrrd))\[ \]+\[rR\]\[0-9\]+, \[rR\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vgetQ_lanes8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vgetQ_lanes8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vgetQ_lanes8 (void) --{ -- int8_t out_int8_t; -- int8x16_t arg0_int8x16_t; -- -- out_int8_t = vgetq_lane_s8 (arg0_int8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.s8\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vgetQ_laneu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vgetQ_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vgetQ_laneu16 (void) --{ -- uint16_t out_uint16_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_uint16_t = vgetq_lane_u16 (arg0_uint16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.u16\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vgetQ_laneu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vgetQ_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vgetQ_laneu32 (void) --{ -- uint32_t out_uint32_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_uint32_t = vgetq_lane_u32 (arg0_uint32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vgetQ_laneu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vgetQ_laneu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vgetQ_laneu64 (void) --{ -- register uint64_t out_uint64_t asm ("r0"); -- uint64x2_t arg0_uint64x2_t; -- -- out_uint64_t = vgetq_lane_u64 (arg0_uint64x2_t, 0); --} -- --/* { dg-final { scan-assembler "((vmov)|(fmrrd))\[ \]+\[rR\]\[0-9\]+, \[rR\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vgetQ_laneu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vgetQ_laneu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vgetQ_laneu8 (void) --{ -- uint8_t out_uint8_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_uint8_t = vgetq_lane_u8 (arg0_uint8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.u8\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_highf32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vget_highf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_highf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x4_t arg0_float32x4_t; -- -- out_float32x2_t = vget_high_f32 (arg0_float32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_highp16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vget_highp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_highp16 (void) --{ -- poly16x4_t out_poly16x4_t; -- poly16x8_t arg0_poly16x8_t; -- -- out_poly16x4_t = vget_high_p16 (arg0_poly16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_highp64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vget_highp64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vget_highp64 (void) --{ -- poly64x1_t out_poly64x1_t; -- poly64x2_t arg0_poly64x2_t; -- -- out_poly64x1_t = vget_high_p64 (arg0_poly64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_highp8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vget_highp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_highp8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8x16_t arg0_poly8x16_t; -- -- out_poly8x8_t = vget_high_p8 (arg0_poly8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_highs16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vget_highs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_highs16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x8_t arg0_int16x8_t; -- -- out_int16x4_t = vget_high_s16 (arg0_int16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_highs32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vget_highs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_highs32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x4_t arg0_int32x4_t; -- -- out_int32x2_t = vget_high_s32 (arg0_int32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_highs64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vget_highs64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_highs64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x2_t arg0_int64x2_t; -- -- out_int64x1_t = vget_high_s64 (arg0_int64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_highs8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vget_highs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_highs8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x16_t arg0_int8x16_t; -- -- out_int8x8_t = vget_high_s8 (arg0_int8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_highu16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vget_highu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_highu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_uint16x4_t = vget_high_u16 (arg0_uint16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_highu32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vget_highu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_highu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_uint32x2_t = vget_high_u32 (arg0_uint32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_highu64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vget_highu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_highu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_uint64x1_t = vget_high_u64 (arg0_uint64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_highu8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vget_highu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_highu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_uint8x8_t = vget_high_u8 (arg0_uint8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_lanef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vget_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_lanef32 (void) --{ -- float32_t out_float32_t; -- float32x2_t arg0_float32x2_t; -- -- out_float32_t = vget_lane_f32 (arg0_float32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_lanep16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vget_lanep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_lanep16 (void) --{ -- poly16_t out_poly16_t; -- poly16x4_t arg0_poly16x4_t; -- -- out_poly16_t = vget_lane_p16 (arg0_poly16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.u16\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_lanep8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vget_lanep8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_lanep8 (void) --{ -- poly8_t out_poly8_t; -- poly8x8_t arg0_poly8x8_t; -- -- out_poly8_t = vget_lane_p8 (arg0_poly8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.u8\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_lanes16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vget_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_lanes16 (void) --{ -- int16_t out_int16_t; -- int16x4_t arg0_int16x4_t; -- -- out_int16_t = vget_lane_s16 (arg0_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.s16\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_lanes32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vget_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_lanes32 (void) --{ -- int32_t out_int32_t; -- int32x2_t arg0_int32x2_t; -- -- out_int32_t = vget_lane_s32 (arg0_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_lanes64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vget_lanes64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_lanes64 (void) --{ -- int64_t out_int64_t; -- int64x1_t arg0_int64x1_t; -- -- out_int64_t = vget_lane_s64 (arg0_int64x1_t, 0); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_lanes8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vget_lanes8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_lanes8 (void) --{ -- int8_t out_int8_t; -- int8x8_t arg0_int8x8_t; -- -- out_int8_t = vget_lane_s8 (arg0_int8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.s8\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_laneu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vget_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_laneu16 (void) --{ -- uint16_t out_uint16_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_uint16_t = vget_lane_u16 (arg0_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.u16\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_laneu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vget_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_laneu32 (void) --{ -- uint32_t out_uint32_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_uint32_t = vget_lane_u32 (arg0_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.32\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_laneu64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vget_laneu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_laneu64 (void) --{ -- uint64_t out_uint64_t; -- uint64x1_t arg0_uint64x1_t; -- -- out_uint64_t = vget_lane_u64 (arg0_uint64x1_t, 0); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_laneu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vget_laneu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_laneu8 (void) --{ -- uint8_t out_uint8_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_uint8_t = vget_lane_u8 (arg0_uint8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.u8\[ \]+\[rR\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_lowf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vget_lowf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_lowf32 (void) --{ -- register float32x2_t out_float32x2_t asm ("d18"); -- float32x4_t arg0_float32x4_t; -- -- out_float32x2_t = vget_low_f32 (arg0_float32x4_t); --} -- --/* { dg-final { scan-assembler "vmov\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_lowp16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vget_lowp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_lowp16 (void) --{ -- register poly16x4_t out_poly16x4_t asm ("d18"); -- poly16x8_t arg0_poly16x8_t; -- -- out_poly16x4_t = vget_low_p16 (arg0_poly16x8_t); --} -- --/* { dg-final { scan-assembler "vmov\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_lowp64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vget_lowp64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vget_lowp64 (void) --{ -- poly64x1_t out_poly64x1_t; -- poly64x2_t arg0_poly64x2_t; -- -- out_poly64x1_t = vget_low_p64 (arg0_poly64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_lowp8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vget_lowp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_lowp8 (void) --{ -- register poly8x8_t out_poly8x8_t asm ("d18"); -- poly8x16_t arg0_poly8x16_t; -- -- out_poly8x8_t = vget_low_p8 (arg0_poly8x16_t); --} -- --/* { dg-final { scan-assembler "vmov\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_lows16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vget_lows16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_lows16 (void) --{ -- register int16x4_t out_int16x4_t asm ("d18"); -- int16x8_t arg0_int16x8_t; -- -- out_int16x4_t = vget_low_s16 (arg0_int16x8_t); --} -- --/* { dg-final { scan-assembler "vmov\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_lows32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vget_lows32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_lows32 (void) --{ -- register int32x2_t out_int32x2_t asm ("d18"); -- int32x4_t arg0_int32x4_t; -- -- out_int32x2_t = vget_low_s32 (arg0_int32x4_t); --} -- --/* { dg-final { scan-assembler "vmov\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_lows64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vget_lows64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_lows64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x2_t arg0_int64x2_t; -- -- out_int64x1_t = vget_low_s64 (arg0_int64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_lows8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vget_lows8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_lows8 (void) --{ -- register int8x8_t out_int8x8_t asm ("d18"); -- int8x16_t arg0_int8x16_t; -- -- out_int8x8_t = vget_low_s8 (arg0_int8x16_t); --} -- --/* { dg-final { scan-assembler "vmov\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_lowu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vget_lowu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_lowu16 (void) --{ -- register uint16x4_t out_uint16x4_t asm ("d18"); -- uint16x8_t arg0_uint16x8_t; -- -- out_uint16x4_t = vget_low_u16 (arg0_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vmov\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_lowu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vget_lowu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_lowu32 (void) --{ -- register uint32x2_t out_uint32x2_t asm ("d18"); -- uint32x4_t arg0_uint32x4_t; -- -- out_uint32x2_t = vget_low_u32 (arg0_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vmov\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_lowu64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vget_lowu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_lowu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_uint64x1_t = vget_low_u64 (arg0_uint64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vget_lowu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vget_lowu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vget_lowu8 (void) --{ -- register uint8x8_t out_uint8x8_t asm ("d18"); -- uint8x16_t arg0_uint8x16_t; -- -- out_uint8x8_t = vget_low_u8 (arg0_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vmov\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhaddQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhaddQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhaddQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vhaddq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vhadd\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhaddQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhaddQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhaddQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vhaddq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vhadd\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhaddQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhaddQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhaddQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vhaddq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vhadd\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhaddQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhaddQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhaddQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vhaddq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vhadd\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhaddQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhaddQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhaddQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vhaddq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vhadd\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhaddQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhaddQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhaddQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vhaddq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vhadd\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhadds16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhadds16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhadds16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vhadd_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vhadd\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhadds32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhadds32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhadds32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vhadd_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vhadd\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhadds8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhadds8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhadds8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vhadd_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vhadd\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhaddu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhaddu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhaddu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vhadd_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vhadd\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhaddu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhaddu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhaddu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vhadd_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vhadd\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhaddu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhaddu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhaddu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vhadd_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vhadd\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhsubQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhsubQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhsubQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vhsubq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vhsub\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhsubQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhsubQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhsubQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vhsubq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vhsub\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhsubQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhsubQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhsubQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vhsubq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vhsub\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhsubQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhsubQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhsubQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vhsubq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vhsub\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhsubQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhsubQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhsubQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vhsubq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vhsub\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhsubQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhsubQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhsubQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vhsubq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vhsub\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhsubs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhsubs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhsubs16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vhsub_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vhsub\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhsubs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhsubs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhsubs32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vhsub_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vhsub\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhsubs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhsubs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhsubs8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vhsub_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vhsub\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhsubu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhsubu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhsubu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vhsub_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vhsub\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhsubu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhsubu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhsubu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vhsub_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vhsub\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vhsubu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vhsubu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vhsubu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vhsub_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vhsub\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupf32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Q_dupf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Q_dupf32 (void) --{ -- float32x4_t out_float32x4_t; -- -- out_float32x4_t = vld1q_dup_f32 (0); --} -- --/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Q_dupp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Q_dupp16 (void) --{ -- poly16x8_t out_poly16x8_t; -- -- out_poly16x8_t = vld1q_dup_p16 (0); --} -- --/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Q_dupp64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vld1Q_dupp64 (void) --{ -- poly64x2_t out_poly64x2_t; -- -- out_poly64x2_t = vld1q_dup_p64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupp8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Q_dupp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Q_dupp8 (void) --{ -- poly8x16_t out_poly8x16_t; -- -- out_poly8x16_t = vld1q_dup_p8 (0); --} -- --/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Q_dups16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Q_dups16 (void) --{ -- int16x8_t out_int16x8_t; -- -- out_int16x8_t = vld1q_dup_s16 (0); --} -- --/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Q_dups32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Q_dups32 (void) --{ -- int32x4_t out_int32x4_t; -- -- out_int32x4_t = vld1q_dup_s32 (0); --} -- --/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Q_dups64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Q_dups64 (void) --{ -- int64x2_t out_int64x2_t; -- -- out_int64x2_t = vld1q_dup_s64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dups8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Q_dups8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Q_dups8 (void) --{ -- int8x16_t out_int8x16_t; -- -- out_int8x16_t = vld1q_dup_s8 (0); --} -- --/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Q_dupu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Q_dupu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- -- out_uint16x8_t = vld1q_dup_u16 (0); --} -- --/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Q_dupu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Q_dupu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- -- out_uint32x4_t = vld1q_dup_u32 (0); --} -- --/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Q_dupu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Q_dupu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- -- out_uint64x2_t = vld1q_dup_u64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_dupu8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Q_dupu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Q_dupu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- -- out_uint8x16_t = vld1q_dup_u8 (0); --} -- --/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1Q_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Q_lanef32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg1_float32x4_t; -- -- out_float32x4_t = vld1q_lane_f32 (0, arg1_float32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanep16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1Q_lanep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Q_lanep16 (void) --{ -- poly16x8_t out_poly16x8_t; -- poly16x8_t arg1_poly16x8_t; -- -- out_poly16x8_t = vld1q_lane_p16 (0, arg1_poly16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanep64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1Q_lanep64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vld1Q_lanep64 (void) --{ -- poly64x2_t out_poly64x2_t; -- poly64x2_t arg1_poly64x2_t; -- -- out_poly64x2_t = vld1q_lane_p64 (0, arg1_poly64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanep8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1Q_lanep8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Q_lanep8 (void) --{ -- poly8x16_t out_poly8x16_t; -- poly8x16_t arg1_poly8x16_t; -- -- out_poly8x16_t = vld1q_lane_p8 (0, arg1_poly8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1Q_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Q_lanes16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vld1q_lane_s16 (0, arg1_int16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1Q_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Q_lanes32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vld1q_lane_s32 (0, arg1_int32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1Q_lanes64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Q_lanes64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_int64x2_t = vld1q_lane_s64 (0, arg1_int64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_lanes8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1Q_lanes8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Q_lanes8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vld1q_lane_s8 (0, arg1_int8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1Q_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Q_laneu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vld1q_lane_u16 (0, arg1_uint16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1Q_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Q_laneu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vld1q_lane_u32 (0, arg1_uint32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1Q_laneu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Q_laneu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg1_uint64x2_t; -- -- out_uint64x2_t = vld1q_lane_u64 (0, arg1_uint64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Q_laneu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1Q_laneu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Q_laneu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vld1q_lane_u8 (0, arg1_uint8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qf32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Qf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Qf32 (void) --{ -- float32x4_t out_float32x4_t; -- -- out_float32x4_t = vld1q_f32 (0); --} -- --/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qp16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Qp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Qp16 (void) --{ -- poly16x8_t out_poly16x8_t; -- -- out_poly16x8_t = vld1q_p16 (0); --} -- --/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qp64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Qp64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vld1Qp64 (void) --{ -- poly64x2_t out_poly64x2_t; -- -- out_poly64x2_t = vld1q_p64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qp8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Qp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Qp8 (void) --{ -- poly8x16_t out_poly8x16_t; -- -- out_poly8x16_t = vld1q_p8 (0); --} -- --/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qs16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Qs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Qs16 (void) --{ -- int16x8_t out_int16x8_t; -- -- out_int16x8_t = vld1q_s16 (0); --} -- --/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qs32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Qs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Qs32 (void) --{ -- int32x4_t out_int32x4_t; -- -- out_int32x4_t = vld1q_s32 (0); --} -- --/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qs64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Qs64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Qs64 (void) --{ -- int64x2_t out_int64x2_t; -- -- out_int64x2_t = vld1q_s64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qs8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Qs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Qs8 (void) --{ -- int8x16_t out_int8x16_t; -- -- out_int8x16_t = vld1q_s8 (0); --} -- --/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qu16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Qu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Qu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- -- out_uint16x8_t = vld1q_u16 (0); --} -- --/* { dg-final { scan-assembler "vld1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qu32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Qu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Qu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- -- out_uint32x4_t = vld1q_u32 (0); --} -- --/* { dg-final { scan-assembler "vld1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qu64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Qu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Qu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- -- out_uint64x2_t = vld1q_u64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1Qu8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1Qu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1Qu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- -- out_uint8x16_t = vld1q_u8 (0); --} -- --/* { dg-final { scan-assembler "vld1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupf32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1_dupf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1_dupf32 (void) --{ -- float32x2_t out_float32x2_t; -- -- out_float32x2_t = vld1_dup_f32 (0); --} -- --/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupp16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1_dupp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1_dupp16 (void) --{ -- poly16x4_t out_poly16x4_t; -- -- out_poly16x4_t = vld1_dup_p16 (0); --} -- --/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupp64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1_dupp64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vld1_dupp64 (void) --{ -- poly64x1_t out_poly64x1_t; -- -- out_poly64x1_t = vld1_dup_p64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupp8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1_dupp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1_dupp8 (void) --{ -- poly8x8_t out_poly8x8_t; -- -- out_poly8x8_t = vld1_dup_p8 (0); --} -- --/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1_dups16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1_dups16 (void) --{ -- int16x4_t out_int16x4_t; -- -- out_int16x4_t = vld1_dup_s16 (0); --} -- --/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1_dups32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1_dups32 (void) --{ -- int32x2_t out_int32x2_t; -- -- out_int32x2_t = vld1_dup_s32 (0); --} -- --/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1_dups64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1_dups64 (void) --{ -- int64x1_t out_int64x1_t; -- -- out_int64x1_t = vld1_dup_s64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dups8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1_dups8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1_dups8 (void) --{ -- int8x8_t out_int8x8_t; -- -- out_int8x8_t = vld1_dup_s8 (0); --} -- --/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1_dupu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1_dupu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- -- out_uint16x4_t = vld1_dup_u16 (0); --} -- --/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1_dupu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1_dupu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- -- out_uint32x2_t = vld1_dup_u32 (0); --} -- --/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1_dupu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1_dupu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- -- out_uint64x1_t = vld1_dup_u64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_dupu8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1_dupu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1_dupu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- -- out_uint8x8_t = vld1_dup_u8 (0); --} -- --/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\\\]\\\})|(\[dD\]\[0-9\]+\\\[\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1_lanef32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_float32x2_t = vld1_lane_f32 (0, arg1_float32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanep16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1_lanep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1_lanep16 (void) --{ -- poly16x4_t out_poly16x4_t; -- poly16x4_t arg1_poly16x4_t; -- -- out_poly16x4_t = vld1_lane_p16 (0, arg1_poly16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanep64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1_lanep64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vld1_lanep64 (void) --{ -- poly64x1_t out_poly64x1_t; -- poly64x1_t arg1_poly64x1_t; -- -- out_poly64x1_t = vld1_lane_p64 (0, arg1_poly64x1_t, 0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanep8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1_lanep8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1_lanep8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8x8_t arg1_poly8x8_t; -- -- out_poly8x8_t = vld1_lane_p8 (0, arg1_poly8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1_lanes16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vld1_lane_s16 (0, arg1_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1_lanes32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vld1_lane_s32 (0, arg1_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1_lanes64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1_lanes64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg1_int64x1_t; -- -- out_int64x1_t = vld1_lane_s64 (0, arg1_int64x1_t, 0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_lanes8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1_lanes8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1_lanes8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vld1_lane_s8 (0, arg1_int8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1_laneu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vld1_lane_u16 (0, arg1_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1_laneu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vld1_lane_u32 (0, arg1_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1_laneu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1_laneu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg1_uint64x1_t; -- -- out_uint64x1_t = vld1_lane_u64 (0, arg1_uint64x1_t, 0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1_laneu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld1_laneu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1_laneu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vld1_lane_u8 (0, arg1_uint8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1f32 (void) --{ -- float32x2_t out_float32x2_t; -- -- out_float32x2_t = vld1_f32 (0); --} -- --/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1p16 (void) --{ -- poly16x4_t out_poly16x4_t; -- -- out_poly16x4_t = vld1_p16 (0); --} -- --/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vld1p64 (void) --{ -- poly64x1_t out_poly64x1_t; -- -- out_poly64x1_t = vld1_p64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1p8 (void) --{ -- poly8x8_t out_poly8x8_t; -- -- out_poly8x8_t = vld1_p8 (0); --} -- --/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1s16 (void) --{ -- int16x4_t out_int16x4_t; -- -- out_int16x4_t = vld1_s16 (0); --} -- --/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1s32 (void) --{ -- int32x2_t out_int32x2_t; -- -- out_int32x2_t = vld1_s32 (0); --} -- --/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1s64 (void) --{ -- int64x1_t out_int64x1_t; -- -- out_int64x1_t = vld1_s64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1s8 (void) --{ -- int8x8_t out_int8x8_t; -- -- out_int8x8_t = vld1_s8 (0); --} -- --/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1u16 (void) --{ -- uint16x4_t out_uint16x4_t; -- -- out_uint16x4_t = vld1_u16 (0); --} -- --/* { dg-final { scan-assembler "vld1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1u32 (void) --{ -- uint32x2_t out_uint32x2_t; -- -- out_uint32x2_t = vld1_u32 (0); --} -- --/* { dg-final { scan-assembler "vld1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1u64 (void) --{ -- uint64x1_t out_uint64x1_t; -- -- out_uint64x1_t = vld1_u64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld1u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld1u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld1u8 (void) --{ -- uint8x8_t out_uint8x8_t; -- -- out_uint8x8_t = vld1_u8 (0); --} -- --/* { dg-final { scan-assembler "vld1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2Q_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2Q_lanef32 (void) --{ -- float32x4x2_t out_float32x4x2_t; -- float32x4x2_t arg1_float32x4x2_t; -- -- out_float32x4x2_t = vld2q_lane_f32 (0, arg1_float32x4x2_t, 1); --} -- --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanep16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2Q_lanep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2Q_lanep16 (void) --{ -- poly16x8x2_t out_poly16x8x2_t; -- poly16x8x2_t arg1_poly16x8x2_t; -- -- out_poly16x8x2_t = vld2q_lane_p16 (0, arg1_poly16x8x2_t, 1); --} -- --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanes16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2Q_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2Q_lanes16 (void) --{ -- int16x8x2_t out_int16x8x2_t; -- int16x8x2_t arg1_int16x8x2_t; -- -- out_int16x8x2_t = vld2q_lane_s16 (0, arg1_int16x8x2_t, 1); --} -- --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Q_lanes32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2Q_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2Q_lanes32 (void) --{ -- int32x4x2_t out_int32x4x2_t; -- int32x4x2_t arg1_int32x4x2_t; -- -- out_int32x4x2_t = vld2q_lane_s32 (0, arg1_int32x4x2_t, 1); --} -- --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Q_laneu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2Q_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2Q_laneu16 (void) --{ -- uint16x8x2_t out_uint16x8x2_t; -- uint16x8x2_t arg1_uint16x8x2_t; -- -- out_uint16x8x2_t = vld2q_lane_u16 (0, arg1_uint16x8x2_t, 1); --} -- --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Q_laneu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2Q_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2Q_laneu32 (void) --{ -- uint32x4x2_t out_uint32x4x2_t; -- uint32x4x2_t arg1_uint32x4x2_t; -- -- out_uint32x4x2_t = vld2q_lane_u32 (0, arg1_uint32x4x2_t, 1); --} -- --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Qf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2Qf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2Qf32 (void) --{ -- float32x4x2_t out_float32x4x2_t; -- -- out_float32x4x2_t = vld2q_f32 (0); --} -- --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Qp16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2Qp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2Qp16 (void) --{ -- poly16x8x2_t out_poly16x8x2_t; -- -- out_poly16x8x2_t = vld2q_p16 (0); --} -- --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Qp8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2Qp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2Qp8 (void) --{ -- poly8x16x2_t out_poly8x16x2_t; -- -- out_poly8x16x2_t = vld2q_p8 (0); --} -- --/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Qs16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2Qs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2Qs16 (void) --{ -- int16x8x2_t out_int16x8x2_t; -- -- out_int16x8x2_t = vld2q_s16 (0); --} -- --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Qs32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2Qs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2Qs32 (void) --{ -- int32x4x2_t out_int32x4x2_t; -- -- out_int32x4x2_t = vld2q_s32 (0); --} -- --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Qs8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2Qs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2Qs8 (void) --{ -- int8x16x2_t out_int8x16x2_t; -- -- out_int8x16x2_t = vld2q_s8 (0); --} -- --/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Qu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2Qu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2Qu16 (void) --{ -- uint16x8x2_t out_uint16x8x2_t; -- -- out_uint16x8x2_t = vld2q_u16 (0); --} -- --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Qu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2Qu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2Qu32 (void) --{ -- uint32x4x2_t out_uint32x4x2_t; -- -- out_uint32x4x2_t = vld2q_u32 (0); --} -- --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2Qu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2Qu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2Qu8 (void) --{ -- uint8x16x2_t out_uint8x16x2_t; -- -- out_uint8x16x2_t = vld2q_u8 (0); --} -- --/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupf32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2_dupf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2_dupf32 (void) --{ -- float32x2x2_t out_float32x2x2_t; -- -- out_float32x2x2_t = vld2_dup_f32 (0); --} -- --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupp16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2_dupp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2_dupp16 (void) --{ -- poly16x4x2_t out_poly16x4x2_t; -- -- out_poly16x4x2_t = vld2_dup_p16 (0); --} -- --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupp64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2_dupp64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vld2_dupp64 (void) --{ -- poly64x1x2_t out_poly64x1x2_t; -- -- out_poly64x1x2_t = vld2_dup_p64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupp8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2_dupp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2_dupp8 (void) --{ -- poly8x8x2_t out_poly8x8x2_t; -- -- out_poly8x8x2_t = vld2_dup_p8 (0); --} -- --/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2_dups16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2_dups16 (void) --{ -- int16x4x2_t out_int16x4x2_t; -- -- out_int16x4x2_t = vld2_dup_s16 (0); --} -- --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2_dups32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2_dups32 (void) --{ -- int32x2x2_t out_int32x2x2_t; -- -- out_int32x2x2_t = vld2_dup_s32 (0); --} -- --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2_dups64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2_dups64 (void) --{ -- int64x1x2_t out_int64x1x2_t; -- -- out_int64x1x2_t = vld2_dup_s64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dups8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2_dups8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2_dups8 (void) --{ -- int8x8x2_t out_int8x8x2_t; -- -- out_int8x8x2_t = vld2_dup_s8 (0); --} -- --/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2_dupu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2_dupu16 (void) --{ -- uint16x4x2_t out_uint16x4x2_t; -- -- out_uint16x4x2_t = vld2_dup_u16 (0); --} -- --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2_dupu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2_dupu32 (void) --{ -- uint32x2x2_t out_uint32x2x2_t; -- -- out_uint32x2x2_t = vld2_dup_u32 (0); --} -- --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2_dupu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2_dupu64 (void) --{ -- uint64x1x2_t out_uint64x1x2_t; -- -- out_uint64x1x2_t = vld2_dup_u64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_dupu8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2_dupu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2_dupu8 (void) --{ -- uint8x8x2_t out_uint8x8x2_t; -- -- out_uint8x8x2_t = vld2_dup_u8 (0); --} -- --/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2_lanef32 (void) --{ -- float32x2x2_t out_float32x2x2_t; -- float32x2x2_t arg1_float32x2x2_t; -- -- out_float32x2x2_t = vld2_lane_f32 (0, arg1_float32x2x2_t, 1); --} -- --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanep16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2_lanep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2_lanep16 (void) --{ -- poly16x4x2_t out_poly16x4x2_t; -- poly16x4x2_t arg1_poly16x4x2_t; -- -- out_poly16x4x2_t = vld2_lane_p16 (0, arg1_poly16x4x2_t, 1); --} -- --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanep8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2_lanep8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2_lanep8 (void) --{ -- poly8x8x2_t out_poly8x8x2_t; -- poly8x8x2_t arg1_poly8x8x2_t; -- -- out_poly8x8x2_t = vld2_lane_p8 (0, arg1_poly8x8x2_t, 1); --} -- --/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanes16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2_lanes16 (void) --{ -- int16x4x2_t out_int16x4x2_t; -- int16x4x2_t arg1_int16x4x2_t; -- -- out_int16x4x2_t = vld2_lane_s16 (0, arg1_int16x4x2_t, 1); --} -- --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanes32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2_lanes32 (void) --{ -- int32x2x2_t out_int32x2x2_t; -- int32x2x2_t arg1_int32x2x2_t; -- -- out_int32x2x2_t = vld2_lane_s32 (0, arg1_int32x2x2_t, 1); --} -- --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_lanes8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2_lanes8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2_lanes8 (void) --{ -- int8x8x2_t out_int8x8x2_t; -- int8x8x2_t arg1_int8x8x2_t; -- -- out_int8x8x2_t = vld2_lane_s8 (0, arg1_int8x8x2_t, 1); --} -- --/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_laneu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2_laneu16 (void) --{ -- uint16x4x2_t out_uint16x4x2_t; -- uint16x4x2_t arg1_uint16x4x2_t; -- -- out_uint16x4x2_t = vld2_lane_u16 (0, arg1_uint16x4x2_t, 1); --} -- --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_laneu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2_laneu32 (void) --{ -- uint32x2x2_t out_uint32x2x2_t; -- uint32x2x2_t arg1_uint32x2x2_t; -- -- out_uint32x2x2_t = vld2_lane_u32 (0, arg1_uint32x2x2_t, 1); --} -- --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2_laneu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld2_laneu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2_laneu8 (void) --{ -- uint8x8x2_t out_uint8x8x2_t; -- uint8x8x2_t arg1_uint8x8x2_t; -- -- out_uint8x8x2_t = vld2_lane_u8 (0, arg1_uint8x8x2_t, 1); --} -- --/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2f32 (void) --{ -- float32x2x2_t out_float32x2x2_t; -- -- out_float32x2x2_t = vld2_f32 (0); --} -- --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2p16 (void) --{ -- poly16x4x2_t out_poly16x4x2_t; -- -- out_poly16x4x2_t = vld2_p16 (0); --} -- --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vld2p64 (void) --{ -- poly64x1x2_t out_poly64x1x2_t; -- -- out_poly64x1x2_t = vld2_p64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2p8 (void) --{ -- poly8x8x2_t out_poly8x8x2_t; -- -- out_poly8x8x2_t = vld2_p8 (0); --} -- --/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2s16 (void) --{ -- int16x4x2_t out_int16x4x2_t; -- -- out_int16x4x2_t = vld2_s16 (0); --} -- --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2s32 (void) --{ -- int32x2x2_t out_int32x2x2_t; -- -- out_int32x2x2_t = vld2_s32 (0); --} -- --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2s64 (void) --{ -- int64x1x2_t out_int64x1x2_t; -- -- out_int64x1x2_t = vld2_s64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2s8 (void) --{ -- int8x8x2_t out_int8x8x2_t; -- -- out_int8x8x2_t = vld2_s8 (0); --} -- --/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2u16 (void) --{ -- uint16x4x2_t out_uint16x4x2_t; -- -- out_uint16x4x2_t = vld2_u16 (0); --} -- --/* { dg-final { scan-assembler "vld2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2u32 (void) --{ -- uint32x2x2_t out_uint32x2x2_t; -- -- out_uint32x2x2_t = vld2_u32 (0); --} -- --/* { dg-final { scan-assembler "vld2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2u64 (void) --{ -- uint64x1x2_t out_uint64x1x2_t; -- -- out_uint64x1x2_t = vld2_u64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld2u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld2u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld2u8 (void) --{ -- uint8x8x2_t out_uint8x8x2_t; -- -- out_uint8x8x2_t = vld2_u8 (0); --} -- --/* { dg-final { scan-assembler "vld2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3Q_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3Q_lanef32 (void) --{ -- float32x4x3_t out_float32x4x3_t; -- float32x4x3_t arg1_float32x4x3_t; -- -- out_float32x4x3_t = vld3q_lane_f32 (0, arg1_float32x4x3_t, 1); --} -- --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanep16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3Q_lanep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3Q_lanep16 (void) --{ -- poly16x8x3_t out_poly16x8x3_t; -- poly16x8x3_t arg1_poly16x8x3_t; -- -- out_poly16x8x3_t = vld3q_lane_p16 (0, arg1_poly16x8x3_t, 1); --} -- --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3Q_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3Q_lanes16 (void) --{ -- int16x8x3_t out_int16x8x3_t; -- int16x8x3_t arg1_int16x8x3_t; -- -- out_int16x8x3_t = vld3q_lane_s16 (0, arg1_int16x8x3_t, 1); --} -- --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_lanes32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3Q_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3Q_lanes32 (void) --{ -- int32x4x3_t out_int32x4x3_t; -- int32x4x3_t arg1_int32x4x3_t; -- -- out_int32x4x3_t = vld3q_lane_s32 (0, arg1_int32x4x3_t, 1); --} -- --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3Q_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3Q_laneu16 (void) --{ -- uint16x8x3_t out_uint16x8x3_t; -- uint16x8x3_t arg1_uint16x8x3_t; -- -- out_uint16x8x3_t = vld3q_lane_u16 (0, arg1_uint16x8x3_t, 1); --} -- --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Q_laneu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3Q_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3Q_laneu32 (void) --{ -- uint32x4x3_t out_uint32x4x3_t; -- uint32x4x3_t arg1_uint32x4x3_t; -- -- out_uint32x4x3_t = vld3q_lane_u32 (0, arg1_uint32x4x3_t, 1); --} -- --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3Qf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3Qf32 (void) --{ -- float32x4x3_t out_float32x4x3_t; -- -- out_float32x4x3_t = vld3q_f32 (0); --} -- --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qp16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3Qp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3Qp16 (void) --{ -- poly16x8x3_t out_poly16x8x3_t; -- -- out_poly16x8x3_t = vld3q_p16 (0); --} -- --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qp8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3Qp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3Qp8 (void) --{ -- poly8x16x3_t out_poly8x16x3_t; -- -- out_poly8x16x3_t = vld3q_p8 (0); --} -- --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qs16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3Qs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3Qs16 (void) --{ -- int16x8x3_t out_int16x8x3_t; -- -- out_int16x8x3_t = vld3q_s16 (0); --} -- --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qs32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3Qs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3Qs32 (void) --{ -- int32x4x3_t out_int32x4x3_t; -- -- out_int32x4x3_t = vld3q_s32 (0); --} -- --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qs8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3Qs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3Qs8 (void) --{ -- int8x16x3_t out_int8x16x3_t; -- -- out_int8x16x3_t = vld3q_s8 (0); --} -- --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3Qu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3Qu16 (void) --{ -- uint16x8x3_t out_uint16x8x3_t; -- -- out_uint16x8x3_t = vld3q_u16 (0); --} -- --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3Qu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3Qu32 (void) --{ -- uint32x4x3_t out_uint32x4x3_t; -- -- out_uint32x4x3_t = vld3q_u32 (0); --} -- --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3Qu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3Qu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3Qu8 (void) --{ -- uint8x16x3_t out_uint8x16x3_t; -- -- out_uint8x16x3_t = vld3q_u8 (0); --} -- --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupf32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3_dupf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3_dupf32 (void) --{ -- float32x2x3_t out_float32x2x3_t; -- -- out_float32x2x3_t = vld3_dup_f32 (0); --} -- --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupp16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3_dupp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3_dupp16 (void) --{ -- poly16x4x3_t out_poly16x4x3_t; -- -- out_poly16x4x3_t = vld3_dup_p16 (0); --} -- --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupp64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3_dupp64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vld3_dupp64 (void) --{ -- poly64x1x3_t out_poly64x1x3_t; -- -- out_poly64x1x3_t = vld3_dup_p64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupp8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3_dupp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3_dupp8 (void) --{ -- poly8x8x3_t out_poly8x8x3_t; -- -- out_poly8x8x3_t = vld3_dup_p8 (0); --} -- --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dups16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3_dups16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3_dups16 (void) --{ -- int16x4x3_t out_int16x4x3_t; -- -- out_int16x4x3_t = vld3_dup_s16 (0); --} -- --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dups32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3_dups32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3_dups32 (void) --{ -- int32x2x3_t out_int32x2x3_t; -- -- out_int32x2x3_t = vld3_dup_s32 (0); --} -- --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dups64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3_dups64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3_dups64 (void) --{ -- int64x1x3_t out_int64x1x3_t; -- -- out_int64x1x3_t = vld3_dup_s64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dups8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3_dups8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3_dups8 (void) --{ -- int8x8x3_t out_int8x8x3_t; -- -- out_int8x8x3_t = vld3_dup_s8 (0); --} -- --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupu16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3_dupu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3_dupu16 (void) --{ -- uint16x4x3_t out_uint16x4x3_t; -- -- out_uint16x4x3_t = vld3_dup_u16 (0); --} -- --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupu32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3_dupu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3_dupu32 (void) --{ -- uint32x2x3_t out_uint32x2x3_t; -- -- out_uint32x2x3_t = vld3_dup_u32 (0); --} -- --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupu64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3_dupu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3_dupu64 (void) --{ -- uint64x1x3_t out_uint64x1x3_t; -- -- out_uint64x1x3_t = vld3_dup_u64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_dupu8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3_dupu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3_dupu8 (void) --{ -- uint8x8x3_t out_uint8x8x3_t; -- -- out_uint8x8x3_t = vld3_dup_u8 (0); --} -- --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_lanef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3_lanef32 (void) --{ -- float32x2x3_t out_float32x2x3_t; -- float32x2x3_t arg1_float32x2x3_t; -- -- out_float32x2x3_t = vld3_lane_f32 (0, arg1_float32x2x3_t, 1); --} -- --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_lanep16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3_lanep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3_lanep16 (void) --{ -- poly16x4x3_t out_poly16x4x3_t; -- poly16x4x3_t arg1_poly16x4x3_t; -- -- out_poly16x4x3_t = vld3_lane_p16 (0, arg1_poly16x4x3_t, 1); --} -- --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_lanep8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3_lanep8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3_lanep8 (void) --{ -- poly8x8x3_t out_poly8x8x3_t; -- poly8x8x3_t arg1_poly8x8x3_t; -- -- out_poly8x8x3_t = vld3_lane_p8 (0, arg1_poly8x8x3_t, 1); --} -- --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_lanes16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3_lanes16 (void) --{ -- int16x4x3_t out_int16x4x3_t; -- int16x4x3_t arg1_int16x4x3_t; -- -- out_int16x4x3_t = vld3_lane_s16 (0, arg1_int16x4x3_t, 1); --} -- --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_lanes32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3_lanes32 (void) --{ -- int32x2x3_t out_int32x2x3_t; -- int32x2x3_t arg1_int32x2x3_t; -- -- out_int32x2x3_t = vld3_lane_s32 (0, arg1_int32x2x3_t, 1); --} -- --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_lanes8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3_lanes8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3_lanes8 (void) --{ -- int8x8x3_t out_int8x8x3_t; -- int8x8x3_t arg1_int8x8x3_t; -- -- out_int8x8x3_t = vld3_lane_s8 (0, arg1_int8x8x3_t, 1); --} -- --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_laneu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3_laneu16 (void) --{ -- uint16x4x3_t out_uint16x4x3_t; -- uint16x4x3_t arg1_uint16x4x3_t; -- -- out_uint16x4x3_t = vld3_lane_u16 (0, arg1_uint16x4x3_t, 1); --} -- --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_laneu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3_laneu32 (void) --{ -- uint32x2x3_t out_uint32x2x3_t; -- uint32x2x3_t arg1_uint32x2x3_t; -- -- out_uint32x2x3_t = vld3_lane_u32 (0, arg1_uint32x2x3_t, 1); --} -- --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3_laneu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld3_laneu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3_laneu8 (void) --{ -- uint8x8x3_t out_uint8x8x3_t; -- uint8x8x3_t arg1_uint8x8x3_t; -- -- out_uint8x8x3_t = vld3_lane_u8 (0, arg1_uint8x8x3_t, 1); --} -- --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3f32 (void) --{ -- float32x2x3_t out_float32x2x3_t; -- -- out_float32x2x3_t = vld3_f32 (0); --} -- --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3p16 (void) --{ -- poly16x4x3_t out_poly16x4x3_t; -- -- out_poly16x4x3_t = vld3_p16 (0); --} -- --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vld3p64 (void) --{ -- poly64x1x3_t out_poly64x1x3_t; -- -- out_poly64x1x3_t = vld3_p64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3p8 (void) --{ -- poly8x8x3_t out_poly8x8x3_t; -- -- out_poly8x8x3_t = vld3_p8 (0); --} -- --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3s16 (void) --{ -- int16x4x3_t out_int16x4x3_t; -- -- out_int16x4x3_t = vld3_s16 (0); --} -- --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3s32 (void) --{ -- int32x2x3_t out_int32x2x3_t; -- -- out_int32x2x3_t = vld3_s32 (0); --} -- --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3s64 (void) --{ -- int64x1x3_t out_int64x1x3_t; -- -- out_int64x1x3_t = vld3_s64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3s8 (void) --{ -- int8x8x3_t out_int8x8x3_t; -- -- out_int8x8x3_t = vld3_s8 (0); --} -- --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3u16 (void) --{ -- uint16x4x3_t out_uint16x4x3_t; -- -- out_uint16x4x3_t = vld3_u16 (0); --} -- --/* { dg-final { scan-assembler "vld3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3u32 (void) --{ -- uint32x2x3_t out_uint32x2x3_t; -- -- out_uint32x2x3_t = vld3_u32 (0); --} -- --/* { dg-final { scan-assembler "vld3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3u64 (void) --{ -- uint64x1x3_t out_uint64x1x3_t; -- -- out_uint64x1x3_t = vld3_u64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld3u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld3u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld3u8 (void) --{ -- uint8x8x3_t out_uint8x8x3_t; -- -- out_uint8x8x3_t = vld3_u8 (0); --} -- --/* { dg-final { scan-assembler "vld3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4Q_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4Q_lanef32 (void) --{ -- float32x4x4_t out_float32x4x4_t; -- float32x4x4_t arg1_float32x4x4_t; -- -- out_float32x4x4_t = vld4q_lane_f32 (0, arg1_float32x4x4_t, 1); --} -- --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanep16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4Q_lanep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4Q_lanep16 (void) --{ -- poly16x8x4_t out_poly16x8x4_t; -- poly16x8x4_t arg1_poly16x8x4_t; -- -- out_poly16x8x4_t = vld4q_lane_p16 (0, arg1_poly16x8x4_t, 1); --} -- --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4Q_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4Q_lanes16 (void) --{ -- int16x8x4_t out_int16x8x4_t; -- int16x8x4_t arg1_int16x8x4_t; -- -- out_int16x8x4_t = vld4q_lane_s16 (0, arg1_int16x8x4_t, 1); --} -- --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_lanes32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4Q_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4Q_lanes32 (void) --{ -- int32x4x4_t out_int32x4x4_t; -- int32x4x4_t arg1_int32x4x4_t; -- -- out_int32x4x4_t = vld4q_lane_s32 (0, arg1_int32x4x4_t, 1); --} -- --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4Q_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4Q_laneu16 (void) --{ -- uint16x8x4_t out_uint16x8x4_t; -- uint16x8x4_t arg1_uint16x8x4_t; -- -- out_uint16x8x4_t = vld4q_lane_u16 (0, arg1_uint16x8x4_t, 1); --} -- --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Q_laneu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4Q_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4Q_laneu32 (void) --{ -- uint32x4x4_t out_uint32x4x4_t; -- uint32x4x4_t arg1_uint32x4x4_t; -- -- out_uint32x4x4_t = vld4q_lane_u32 (0, arg1_uint32x4x4_t, 1); --} -- --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4Qf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4Qf32 (void) --{ -- float32x4x4_t out_float32x4x4_t; -- -- out_float32x4x4_t = vld4q_f32 (0); --} -- --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qp16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4Qp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4Qp16 (void) --{ -- poly16x8x4_t out_poly16x8x4_t; -- -- out_poly16x8x4_t = vld4q_p16 (0); --} -- --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qp8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4Qp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4Qp8 (void) --{ -- poly8x16x4_t out_poly8x16x4_t; -- -- out_poly8x16x4_t = vld4q_p8 (0); --} -- --/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qs16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4Qs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4Qs16 (void) --{ -- int16x8x4_t out_int16x8x4_t; -- -- out_int16x8x4_t = vld4q_s16 (0); --} -- --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qs32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4Qs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4Qs32 (void) --{ -- int32x4x4_t out_int32x4x4_t; -- -- out_int32x4x4_t = vld4q_s32 (0); --} -- --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qs8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4Qs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4Qs8 (void) --{ -- int8x16x4_t out_int8x16x4_t; -- -- out_int8x16x4_t = vld4q_s8 (0); --} -- --/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4Qu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4Qu16 (void) --{ -- uint16x8x4_t out_uint16x8x4_t; -- -- out_uint16x8x4_t = vld4q_u16 (0); --} -- --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4Qu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4Qu32 (void) --{ -- uint32x4x4_t out_uint32x4x4_t; -- -- out_uint32x4x4_t = vld4q_u32 (0); --} -- --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4Qu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4Qu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4Qu8 (void) --{ -- uint8x16x4_t out_uint8x16x4_t; -- -- out_uint8x16x4_t = vld4q_u8 (0); --} -- --/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupf32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4_dupf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4_dupf32 (void) --{ -- float32x2x4_t out_float32x2x4_t; -- -- out_float32x2x4_t = vld4_dup_f32 (0); --} -- --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupp16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4_dupp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4_dupp16 (void) --{ -- poly16x4x4_t out_poly16x4x4_t; -- -- out_poly16x4x4_t = vld4_dup_p16 (0); --} -- --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupp64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4_dupp64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vld4_dupp64 (void) --{ -- poly64x1x4_t out_poly64x1x4_t; -- -- out_poly64x1x4_t = vld4_dup_p64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupp8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4_dupp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4_dupp8 (void) --{ -- poly8x8x4_t out_poly8x8x4_t; -- -- out_poly8x8x4_t = vld4_dup_p8 (0); --} -- --/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dups16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4_dups16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4_dups16 (void) --{ -- int16x4x4_t out_int16x4x4_t; -- -- out_int16x4x4_t = vld4_dup_s16 (0); --} -- --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dups32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4_dups32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4_dups32 (void) --{ -- int32x2x4_t out_int32x2x4_t; -- -- out_int32x2x4_t = vld4_dup_s32 (0); --} -- --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dups64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4_dups64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4_dups64 (void) --{ -- int64x1x4_t out_int64x1x4_t; -- -- out_int64x1x4_t = vld4_dup_s64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dups8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4_dups8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4_dups8 (void) --{ -- int8x8x4_t out_int8x8x4_t; -- -- out_int8x8x4_t = vld4_dup_s8 (0); --} -- --/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupu16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4_dupu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4_dupu16 (void) --{ -- uint16x4x4_t out_uint16x4x4_t; -- -- out_uint16x4x4_t = vld4_dup_u16 (0); --} -- --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupu32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4_dupu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4_dupu32 (void) --{ -- uint32x2x4_t out_uint32x2x4_t; -- -- out_uint32x2x4_t = vld4_dup_u32 (0); --} -- --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupu64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4_dupu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4_dupu64 (void) --{ -- uint64x1x4_t out_uint64x1x4_t; -- -- out_uint64x1x4_t = vld4_dup_u64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_dupu8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4_dupu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4_dupu8 (void) --{ -- uint8x8x4_t out_uint8x8x4_t; -- -- out_uint8x8x4_t = vld4_dup_u8 (0); --} -- --/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\\\]-\[dD\]\[0-9\]+\\\[\\\])|(\[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\], \[dD\]\[0-9\]+\\\[\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_lanef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4_lanef32 (void) --{ -- float32x2x4_t out_float32x2x4_t; -- float32x2x4_t arg1_float32x2x4_t; -- -- out_float32x2x4_t = vld4_lane_f32 (0, arg1_float32x2x4_t, 1); --} -- --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_lanep16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4_lanep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4_lanep16 (void) --{ -- poly16x4x4_t out_poly16x4x4_t; -- poly16x4x4_t arg1_poly16x4x4_t; -- -- out_poly16x4x4_t = vld4_lane_p16 (0, arg1_poly16x4x4_t, 1); --} -- --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_lanep8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4_lanep8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4_lanep8 (void) --{ -- poly8x8x4_t out_poly8x8x4_t; -- poly8x8x4_t arg1_poly8x8x4_t; -- -- out_poly8x8x4_t = vld4_lane_p8 (0, arg1_poly8x8x4_t, 1); --} -- --/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_lanes16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4_lanes16 (void) --{ -- int16x4x4_t out_int16x4x4_t; -- int16x4x4_t arg1_int16x4x4_t; -- -- out_int16x4x4_t = vld4_lane_s16 (0, arg1_int16x4x4_t, 1); --} -- --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_lanes32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4_lanes32 (void) --{ -- int32x2x4_t out_int32x2x4_t; -- int32x2x4_t arg1_int32x2x4_t; -- -- out_int32x2x4_t = vld4_lane_s32 (0, arg1_int32x2x4_t, 1); --} -- --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_lanes8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4_lanes8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4_lanes8 (void) --{ -- int8x8x4_t out_int8x8x4_t; -- int8x8x4_t arg1_int8x8x4_t; -- -- out_int8x8x4_t = vld4_lane_s8 (0, arg1_int8x8x4_t, 1); --} -- --/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_laneu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4_laneu16 (void) --{ -- uint16x4x4_t out_uint16x4x4_t; -- uint16x4x4_t arg1_uint16x4x4_t; -- -- out_uint16x4x4_t = vld4_lane_u16 (0, arg1_uint16x4x4_t, 1); --} -- --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_laneu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4_laneu32 (void) --{ -- uint32x2x4_t out_uint32x2x4_t; -- uint32x2x4_t arg1_uint32x2x4_t; -- -- out_uint32x2x4_t = vld4_lane_u32 (0, arg1_uint32x2x4_t, 1); --} -- --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4_laneu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vld4_laneu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4_laneu8 (void) --{ -- uint8x8x4_t out_uint8x8x4_t; -- uint8x8x4_t arg1_uint8x8x4_t; -- -- out_uint8x8x4_t = vld4_lane_u8 (0, arg1_uint8x8x4_t, 1); --} -- --/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4f32 (void) --{ -- float32x2x4_t out_float32x2x4_t; -- -- out_float32x2x4_t = vld4_f32 (0); --} -- --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4p16 (void) --{ -- poly16x4x4_t out_poly16x4x4_t; -- -- out_poly16x4x4_t = vld4_p16 (0); --} -- --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vld4p64 (void) --{ -- poly64x1x4_t out_poly64x1x4_t; -- -- out_poly64x1x4_t = vld4_p64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4p8 (void) --{ -- poly8x8x4_t out_poly8x8x4_t; -- -- out_poly8x8x4_t = vld4_p8 (0); --} -- --/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4s16 (void) --{ -- int16x4x4_t out_int16x4x4_t; -- -- out_int16x4x4_t = vld4_s16 (0); --} -- --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4s32 (void) --{ -- int32x2x4_t out_int32x2x4_t; -- -- out_int32x2x4_t = vld4_s32 (0); --} -- --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4s64 (void) --{ -- int64x1x4_t out_int64x1x4_t; -- -- out_int64x1x4_t = vld4_s64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4s8 (void) --{ -- int8x8x4_t out_int8x8x4_t; -- -- out_int8x8x4_t = vld4_s8 (0); --} -- --/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4u16 (void) --{ -- uint16x4x4_t out_uint16x4x4_t; -- -- out_uint16x4x4_t = vld4_u16 (0); --} -- --/* { dg-final { scan-assembler "vld4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4u32 (void) --{ -- uint32x2x4_t out_uint32x2x4_t; -- -- out_uint32x2x4_t = vld4_u32 (0); --} -- --/* { dg-final { scan-assembler "vld4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4u64 (void) --{ -- uint64x1x4_t out_uint64x1x4_t; -- -- out_uint64x1x4_t = vld4_u64 (0); --} -- --/* { dg-final { scan-assembler "vld1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vld4u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vld4u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vld4u8 (void) --{ -- uint8x8x4_t out_uint8x8x4_t; -- -- out_uint8x8x4_t = vld4_u8 (0); --} -- --/* { dg-final { scan-assembler "vld4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmaxQf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmaxQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmaxQf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- -- out_float32x4_t = vmaxq_f32 (arg0_float32x4_t, arg1_float32x4_t); --} -- --/* { dg-final { scan-assembler "vmax\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmaxQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmaxQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmaxQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vmaxq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vmax\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmaxQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmaxQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmaxQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vmaxq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vmax\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmaxQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmaxQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmaxQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vmaxq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vmax\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmaxQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmaxQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmaxQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vmaxq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vmax\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmaxQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmaxQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmaxQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vmaxq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vmax\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmaxQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmaxQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmaxQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vmaxq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vmax\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmaxf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmaxf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmaxf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_float32x2_t = vmax_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vmax\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmaxs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmaxs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmaxs16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vmax_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vmax\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmaxs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmaxs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmaxs32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vmax_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vmax\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmaxs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmaxs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmaxs8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vmax_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vmax\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmaxu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmaxu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmaxu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vmax_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vmax\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmaxu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmaxu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmaxu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vmax_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vmax\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmaxu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmaxu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmaxu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vmax_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vmax\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vminQf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vminQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vminQf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- -- out_float32x4_t = vminq_f32 (arg0_float32x4_t, arg1_float32x4_t); --} -- --/* { dg-final { scan-assembler "vmin\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vminQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vminQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vminQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vminq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vmin\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vminQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vminQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vminQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vminq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vmin\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vminQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vminQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vminQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vminq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vmin\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vminQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vminQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vminQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vminq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vmin\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vminQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vminQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vminQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vminq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vmin\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vminQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vminQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vminQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vminq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vmin\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vminf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vminf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vminf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_float32x2_t = vmin_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vmin\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmins16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmins16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmins16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vmin_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vmin\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmins32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmins32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmins32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vmin_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vmin\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmins8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmins8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmins8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vmin_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vmin\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vminu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vminu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vminu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vmin_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vmin\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vminu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vminu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vminu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vmin_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vmin\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vminu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vminu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vminu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vmin_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vmin\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlaQ_lanef32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlaQ_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlaQ_lanef32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- float32x2_t arg2_float32x2_t; -- -- out_float32x4_t = vmlaq_lane_f32 (arg0_float32x4_t, arg1_float32x4_t, arg2_float32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmla\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlaQ_lanes16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlaQ_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlaQ_lanes16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- int16x4_t arg2_int16x4_t; -- -- out_int16x8_t = vmlaq_lane_s16 (arg0_int16x8_t, arg1_int16x8_t, arg2_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmla\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlaQ_lanes32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlaQ_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlaQ_lanes32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- int32x2_t arg2_int32x2_t; -- -- out_int32x4_t = vmlaq_lane_s32 (arg0_int32x4_t, arg1_int32x4_t, arg2_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmla\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlaQ_laneu16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlaQ_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlaQ_laneu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- uint16x4_t arg2_uint16x4_t; -- -- out_uint16x8_t = vmlaq_lane_u16 (arg0_uint16x8_t, arg1_uint16x8_t, arg2_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmla\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlaQ_laneu32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlaQ_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlaQ_laneu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- uint32x2_t arg2_uint32x2_t; -- -- out_uint32x4_t = vmlaq_lane_u32 (arg0_uint32x4_t, arg1_uint32x4_t, arg2_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmla\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlaQ_nf32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlaQ_nf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlaQ_nf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- float32_t arg2_float32_t; -- -- out_float32x4_t = vmlaq_n_f32 (arg0_float32x4_t, arg1_float32x4_t, arg2_float32_t); --} -- --/* { dg-final { scan-assembler "vmla\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlaQ_ns16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlaQ_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlaQ_ns16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- int16_t arg2_int16_t; -- -- out_int16x8_t = vmlaq_n_s16 (arg0_int16x8_t, arg1_int16x8_t, arg2_int16_t); --} -- --/* { dg-final { scan-assembler "vmla\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlaQ_ns32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlaQ_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlaQ_ns32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- int32_t arg2_int32_t; -- -- out_int32x4_t = vmlaq_n_s32 (arg0_int32x4_t, arg1_int32x4_t, arg2_int32_t); --} -- --/* { dg-final { scan-assembler "vmla\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlaQ_nu16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlaQ_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlaQ_nu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- uint16_t arg2_uint16_t; -- -- out_uint16x8_t = vmlaq_n_u16 (arg0_uint16x8_t, arg1_uint16x8_t, arg2_uint16_t); --} -- --/* { dg-final { scan-assembler "vmla\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlaQ_nu32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlaQ_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlaQ_nu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- uint32_t arg2_uint32_t; -- -- out_uint32x4_t = vmlaq_n_u32 (arg0_uint32x4_t, arg1_uint32x4_t, arg2_uint32_t); --} -- --/* { dg-final { scan-assembler "vmla\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlaQf32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlaQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlaQf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- float32x4_t arg2_float32x4_t; -- -- out_float32x4_t = vmlaq_f32 (arg0_float32x4_t, arg1_float32x4_t, arg2_float32x4_t); --} -- --/* { dg-final { scan-assembler "vmla\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlaQs16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlaQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlaQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- int16x8_t arg2_int16x8_t; -- -- out_int16x8_t = vmlaq_s16 (arg0_int16x8_t, arg1_int16x8_t, arg2_int16x8_t); --} -- --/* { dg-final { scan-assembler "vmla\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlaQs32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlaQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlaQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- int32x4_t arg2_int32x4_t; -- -- out_int32x4_t = vmlaq_s32 (arg0_int32x4_t, arg1_int32x4_t, arg2_int32x4_t); --} -- --/* { dg-final { scan-assembler "vmla\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlaQs8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlaQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlaQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- int8x16_t arg2_int8x16_t; -- -- out_int8x16_t = vmlaq_s8 (arg0_int8x16_t, arg1_int8x16_t, arg2_int8x16_t); --} -- --/* { dg-final { scan-assembler "vmla\.i8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlaQu16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlaQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlaQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- uint16x8_t arg2_uint16x8_t; -- -- out_uint16x8_t = vmlaq_u16 (arg0_uint16x8_t, arg1_uint16x8_t, arg2_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vmla\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlaQu32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlaQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlaQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- uint32x4_t arg2_uint32x4_t; -- -- out_uint32x4_t = vmlaq_u32 (arg0_uint32x4_t, arg1_uint32x4_t, arg2_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vmla\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlaQu8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlaQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlaQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- uint8x16_t arg2_uint8x16_t; -- -- out_uint8x16_t = vmlaq_u8 (arg0_uint8x16_t, arg1_uint8x16_t, arg2_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vmla\.i8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmla_lanef32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmla_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmla_lanef32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- float32x2_t arg2_float32x2_t; -- -- out_float32x2_t = vmla_lane_f32 (arg0_float32x2_t, arg1_float32x2_t, arg2_float32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmla\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmla_lanes16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmla_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmla_lanes16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- int16x4_t arg2_int16x4_t; -- -- out_int16x4_t = vmla_lane_s16 (arg0_int16x4_t, arg1_int16x4_t, arg2_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmla\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmla_lanes32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmla_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmla_lanes32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- int32x2_t arg2_int32x2_t; -- -- out_int32x2_t = vmla_lane_s32 (arg0_int32x2_t, arg1_int32x2_t, arg2_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmla\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmla_laneu16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmla_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmla_laneu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- uint16x4_t arg2_uint16x4_t; -- -- out_uint16x4_t = vmla_lane_u16 (arg0_uint16x4_t, arg1_uint16x4_t, arg2_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmla\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmla_laneu32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmla_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmla_laneu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- uint32x2_t arg2_uint32x2_t; -- -- out_uint32x2_t = vmla_lane_u32 (arg0_uint32x2_t, arg1_uint32x2_t, arg2_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmla\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmla_nf32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmla_nf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmla_nf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- float32_t arg2_float32_t; -- -- out_float32x2_t = vmla_n_f32 (arg0_float32x2_t, arg1_float32x2_t, arg2_float32_t); --} -- --/* { dg-final { scan-assembler "vmla\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmla_ns16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmla_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmla_ns16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- int16_t arg2_int16_t; -- -- out_int16x4_t = vmla_n_s16 (arg0_int16x4_t, arg1_int16x4_t, arg2_int16_t); --} -- --/* { dg-final { scan-assembler "vmla\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmla_ns32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmla_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmla_ns32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- int32_t arg2_int32_t; -- -- out_int32x2_t = vmla_n_s32 (arg0_int32x2_t, arg1_int32x2_t, arg2_int32_t); --} -- --/* { dg-final { scan-assembler "vmla\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmla_nu16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmla_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmla_nu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- uint16_t arg2_uint16_t; -- -- out_uint16x4_t = vmla_n_u16 (arg0_uint16x4_t, arg1_uint16x4_t, arg2_uint16_t); --} -- --/* { dg-final { scan-assembler "vmla\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmla_nu32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmla_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmla_nu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- uint32_t arg2_uint32_t; -- -- out_uint32x2_t = vmla_n_u32 (arg0_uint32x2_t, arg1_uint32x2_t, arg2_uint32_t); --} -- --/* { dg-final { scan-assembler "vmla\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlaf32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlaf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlaf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- float32x2_t arg2_float32x2_t; -- -- out_float32x2_t = vmla_f32 (arg0_float32x2_t, arg1_float32x2_t, arg2_float32x2_t); --} -- --/* { dg-final { scan-assembler "vmla\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlal_lanes16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlal_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlal_lanes16 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int16x4_t arg1_int16x4_t; -- int16x4_t arg2_int16x4_t; -- -- out_int32x4_t = vmlal_lane_s16 (arg0_int32x4_t, arg1_int16x4_t, arg2_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmlal\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlal_lanes32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlal_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlal_lanes32 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int32x2_t arg1_int32x2_t; -- int32x2_t arg2_int32x2_t; -- -- out_int64x2_t = vmlal_lane_s32 (arg0_int64x2_t, arg1_int32x2_t, arg2_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmlal\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlal_laneu16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlal_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlal_laneu16 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint16x4_t arg1_uint16x4_t; -- uint16x4_t arg2_uint16x4_t; -- -- out_uint32x4_t = vmlal_lane_u16 (arg0_uint32x4_t, arg1_uint16x4_t, arg2_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmlal\.u16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlal_laneu32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlal_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlal_laneu32 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint32x2_t arg1_uint32x2_t; -- uint32x2_t arg2_uint32x2_t; -- -- out_uint64x2_t = vmlal_lane_u32 (arg0_uint64x2_t, arg1_uint32x2_t, arg2_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmlal\.u32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlal_ns16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlal_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlal_ns16 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int16x4_t arg1_int16x4_t; -- int16_t arg2_int16_t; -- -- out_int32x4_t = vmlal_n_s16 (arg0_int32x4_t, arg1_int16x4_t, arg2_int16_t); --} -- --/* { dg-final { scan-assembler "vmlal\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlal_ns32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlal_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlal_ns32 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int32x2_t arg1_int32x2_t; -- int32_t arg2_int32_t; -- -- out_int64x2_t = vmlal_n_s32 (arg0_int64x2_t, arg1_int32x2_t, arg2_int32_t); --} -- --/* { dg-final { scan-assembler "vmlal\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlal_nu16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlal_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlal_nu16 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint16x4_t arg1_uint16x4_t; -- uint16_t arg2_uint16_t; -- -- out_uint32x4_t = vmlal_n_u16 (arg0_uint32x4_t, arg1_uint16x4_t, arg2_uint16_t); --} -- --/* { dg-final { scan-assembler "vmlal\.u16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlal_nu32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlal_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlal_nu32 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint32x2_t arg1_uint32x2_t; -- uint32_t arg2_uint32_t; -- -- out_uint64x2_t = vmlal_n_u32 (arg0_uint64x2_t, arg1_uint32x2_t, arg2_uint32_t); --} -- --/* { dg-final { scan-assembler "vmlal\.u32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlals16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlals16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlals16 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int16x4_t arg1_int16x4_t; -- int16x4_t arg2_int16x4_t; -- -- out_int32x4_t = vmlal_s16 (arg0_int32x4_t, arg1_int16x4_t, arg2_int16x4_t); --} -- --/* { dg-final { scan-assembler "vmlal\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlals32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlals32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlals32 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int32x2_t arg1_int32x2_t; -- int32x2_t arg2_int32x2_t; -- -- out_int64x2_t = vmlal_s32 (arg0_int64x2_t, arg1_int32x2_t, arg2_int32x2_t); --} -- --/* { dg-final { scan-assembler "vmlal\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlals8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlals8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlals8 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int8x8_t arg1_int8x8_t; -- int8x8_t arg2_int8x8_t; -- -- out_int16x8_t = vmlal_s8 (arg0_int16x8_t, arg1_int8x8_t, arg2_int8x8_t); --} -- --/* { dg-final { scan-assembler "vmlal\.s8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlalu16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlalu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlalu16 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint16x4_t arg1_uint16x4_t; -- uint16x4_t arg2_uint16x4_t; -- -- out_uint32x4_t = vmlal_u16 (arg0_uint32x4_t, arg1_uint16x4_t, arg2_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vmlal\.u16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlalu32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlalu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlalu32 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint32x2_t arg1_uint32x2_t; -- uint32x2_t arg2_uint32x2_t; -- -- out_uint64x2_t = vmlal_u32 (arg0_uint64x2_t, arg1_uint32x2_t, arg2_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vmlal\.u32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlalu8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlalu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlalu8 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint8x8_t arg1_uint8x8_t; -- uint8x8_t arg2_uint8x8_t; -- -- out_uint16x8_t = vmlal_u8 (arg0_uint16x8_t, arg1_uint8x8_t, arg2_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vmlal\.u8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlas16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlas16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlas16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- int16x4_t arg2_int16x4_t; -- -- out_int16x4_t = vmla_s16 (arg0_int16x4_t, arg1_int16x4_t, arg2_int16x4_t); --} -- --/* { dg-final { scan-assembler "vmla\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlas32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlas32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlas32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- int32x2_t arg2_int32x2_t; -- -- out_int32x2_t = vmla_s32 (arg0_int32x2_t, arg1_int32x2_t, arg2_int32x2_t); --} -- --/* { dg-final { scan-assembler "vmla\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlas8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlas8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlas8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- int8x8_t arg2_int8x8_t; -- -- out_int8x8_t = vmla_s8 (arg0_int8x8_t, arg1_int8x8_t, arg2_int8x8_t); --} -- --/* { dg-final { scan-assembler "vmla\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlau16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlau16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlau16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- uint16x4_t arg2_uint16x4_t; -- -- out_uint16x4_t = vmla_u16 (arg0_uint16x4_t, arg1_uint16x4_t, arg2_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vmla\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlau32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlau32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlau32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- uint32x2_t arg2_uint32x2_t; -- -- out_uint32x2_t = vmla_u32 (arg0_uint32x2_t, arg1_uint32x2_t, arg2_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vmla\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlau8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlau8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlau8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- uint8x8_t arg2_uint8x8_t; -- -- out_uint8x8_t = vmla_u8 (arg0_uint8x8_t, arg1_uint8x8_t, arg2_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vmla\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsQ_lanef32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsQ_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsQ_lanef32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- float32x2_t arg2_float32x2_t; -- -- out_float32x4_t = vmlsq_lane_f32 (arg0_float32x4_t, arg1_float32x4_t, arg2_float32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmls\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsQ_lanes16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsQ_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsQ_lanes16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- int16x4_t arg2_int16x4_t; -- -- out_int16x8_t = vmlsq_lane_s16 (arg0_int16x8_t, arg1_int16x8_t, arg2_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmls\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsQ_lanes32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsQ_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsQ_lanes32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- int32x2_t arg2_int32x2_t; -- -- out_int32x4_t = vmlsq_lane_s32 (arg0_int32x4_t, arg1_int32x4_t, arg2_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmls\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsQ_laneu16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsQ_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsQ_laneu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- uint16x4_t arg2_uint16x4_t; -- -- out_uint16x8_t = vmlsq_lane_u16 (arg0_uint16x8_t, arg1_uint16x8_t, arg2_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmls\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsQ_laneu32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsQ_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsQ_laneu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- uint32x2_t arg2_uint32x2_t; -- -- out_uint32x4_t = vmlsq_lane_u32 (arg0_uint32x4_t, arg1_uint32x4_t, arg2_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmls\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsQ_nf32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsQ_nf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsQ_nf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- float32_t arg2_float32_t; -- -- out_float32x4_t = vmlsq_n_f32 (arg0_float32x4_t, arg1_float32x4_t, arg2_float32_t); --} -- --/* { dg-final { scan-assembler "vmls\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsQ_ns16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsQ_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsQ_ns16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- int16_t arg2_int16_t; -- -- out_int16x8_t = vmlsq_n_s16 (arg0_int16x8_t, arg1_int16x8_t, arg2_int16_t); --} -- --/* { dg-final { scan-assembler "vmls\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsQ_ns32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsQ_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsQ_ns32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- int32_t arg2_int32_t; -- -- out_int32x4_t = vmlsq_n_s32 (arg0_int32x4_t, arg1_int32x4_t, arg2_int32_t); --} -- --/* { dg-final { scan-assembler "vmls\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsQ_nu16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsQ_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsQ_nu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- uint16_t arg2_uint16_t; -- -- out_uint16x8_t = vmlsq_n_u16 (arg0_uint16x8_t, arg1_uint16x8_t, arg2_uint16_t); --} -- --/* { dg-final { scan-assembler "vmls\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsQ_nu32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsQ_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsQ_nu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- uint32_t arg2_uint32_t; -- -- out_uint32x4_t = vmlsq_n_u32 (arg0_uint32x4_t, arg1_uint32x4_t, arg2_uint32_t); --} -- --/* { dg-final { scan-assembler "vmls\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsQf32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsQf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- float32x4_t arg2_float32x4_t; -- -- out_float32x4_t = vmlsq_f32 (arg0_float32x4_t, arg1_float32x4_t, arg2_float32x4_t); --} -- --/* { dg-final { scan-assembler "vmls\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsQs16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- int16x8_t arg2_int16x8_t; -- -- out_int16x8_t = vmlsq_s16 (arg0_int16x8_t, arg1_int16x8_t, arg2_int16x8_t); --} -- --/* { dg-final { scan-assembler "vmls\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsQs32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- int32x4_t arg2_int32x4_t; -- -- out_int32x4_t = vmlsq_s32 (arg0_int32x4_t, arg1_int32x4_t, arg2_int32x4_t); --} -- --/* { dg-final { scan-assembler "vmls\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsQs8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- int8x16_t arg2_int8x16_t; -- -- out_int8x16_t = vmlsq_s8 (arg0_int8x16_t, arg1_int8x16_t, arg2_int8x16_t); --} -- --/* { dg-final { scan-assembler "vmls\.i8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsQu16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- uint16x8_t arg2_uint16x8_t; -- -- out_uint16x8_t = vmlsq_u16 (arg0_uint16x8_t, arg1_uint16x8_t, arg2_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vmls\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsQu32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- uint32x4_t arg2_uint32x4_t; -- -- out_uint32x4_t = vmlsq_u32 (arg0_uint32x4_t, arg1_uint32x4_t, arg2_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vmls\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsQu8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- uint8x16_t arg2_uint8x16_t; -- -- out_uint8x16_t = vmlsq_u8 (arg0_uint8x16_t, arg1_uint8x16_t, arg2_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vmls\.i8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmls_lanef32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmls_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmls_lanef32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- float32x2_t arg2_float32x2_t; -- -- out_float32x2_t = vmls_lane_f32 (arg0_float32x2_t, arg1_float32x2_t, arg2_float32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmls\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmls_lanes16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmls_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmls_lanes16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- int16x4_t arg2_int16x4_t; -- -- out_int16x4_t = vmls_lane_s16 (arg0_int16x4_t, arg1_int16x4_t, arg2_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmls\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmls_lanes32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmls_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmls_lanes32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- int32x2_t arg2_int32x2_t; -- -- out_int32x2_t = vmls_lane_s32 (arg0_int32x2_t, arg1_int32x2_t, arg2_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmls\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmls_laneu16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmls_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmls_laneu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- uint16x4_t arg2_uint16x4_t; -- -- out_uint16x4_t = vmls_lane_u16 (arg0_uint16x4_t, arg1_uint16x4_t, arg2_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmls\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmls_laneu32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmls_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmls_laneu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- uint32x2_t arg2_uint32x2_t; -- -- out_uint32x2_t = vmls_lane_u32 (arg0_uint32x2_t, arg1_uint32x2_t, arg2_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmls\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmls_nf32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmls_nf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmls_nf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- float32_t arg2_float32_t; -- -- out_float32x2_t = vmls_n_f32 (arg0_float32x2_t, arg1_float32x2_t, arg2_float32_t); --} -- --/* { dg-final { scan-assembler "vmls\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmls_ns16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmls_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmls_ns16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- int16_t arg2_int16_t; -- -- out_int16x4_t = vmls_n_s16 (arg0_int16x4_t, arg1_int16x4_t, arg2_int16_t); --} -- --/* { dg-final { scan-assembler "vmls\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmls_ns32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmls_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmls_ns32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- int32_t arg2_int32_t; -- -- out_int32x2_t = vmls_n_s32 (arg0_int32x2_t, arg1_int32x2_t, arg2_int32_t); --} -- --/* { dg-final { scan-assembler "vmls\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmls_nu16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmls_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmls_nu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- uint16_t arg2_uint16_t; -- -- out_uint16x4_t = vmls_n_u16 (arg0_uint16x4_t, arg1_uint16x4_t, arg2_uint16_t); --} -- --/* { dg-final { scan-assembler "vmls\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmls_nu32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmls_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmls_nu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- uint32_t arg2_uint32_t; -- -- out_uint32x2_t = vmls_n_u32 (arg0_uint32x2_t, arg1_uint32x2_t, arg2_uint32_t); --} -- --/* { dg-final { scan-assembler "vmls\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsf32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- float32x2_t arg2_float32x2_t; -- -- out_float32x2_t = vmls_f32 (arg0_float32x2_t, arg1_float32x2_t, arg2_float32x2_t); --} -- --/* { dg-final { scan-assembler "vmls\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsl_lanes16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsl_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsl_lanes16 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int16x4_t arg1_int16x4_t; -- int16x4_t arg2_int16x4_t; -- -- out_int32x4_t = vmlsl_lane_s16 (arg0_int32x4_t, arg1_int16x4_t, arg2_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmlsl\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsl_lanes32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsl_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsl_lanes32 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int32x2_t arg1_int32x2_t; -- int32x2_t arg2_int32x2_t; -- -- out_int64x2_t = vmlsl_lane_s32 (arg0_int64x2_t, arg1_int32x2_t, arg2_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmlsl\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsl_laneu16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsl_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsl_laneu16 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint16x4_t arg1_uint16x4_t; -- uint16x4_t arg2_uint16x4_t; -- -- out_uint32x4_t = vmlsl_lane_u16 (arg0_uint32x4_t, arg1_uint16x4_t, arg2_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmlsl\.u16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsl_laneu32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsl_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsl_laneu32 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint32x2_t arg1_uint32x2_t; -- uint32x2_t arg2_uint32x2_t; -- -- out_uint64x2_t = vmlsl_lane_u32 (arg0_uint64x2_t, arg1_uint32x2_t, arg2_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmlsl\.u32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsl_ns16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsl_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsl_ns16 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int16x4_t arg1_int16x4_t; -- int16_t arg2_int16_t; -- -- out_int32x4_t = vmlsl_n_s16 (arg0_int32x4_t, arg1_int16x4_t, arg2_int16_t); --} -- --/* { dg-final { scan-assembler "vmlsl\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsl_ns32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsl_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsl_ns32 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int32x2_t arg1_int32x2_t; -- int32_t arg2_int32_t; -- -- out_int64x2_t = vmlsl_n_s32 (arg0_int64x2_t, arg1_int32x2_t, arg2_int32_t); --} -- --/* { dg-final { scan-assembler "vmlsl\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsl_nu16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsl_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsl_nu16 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint16x4_t arg1_uint16x4_t; -- uint16_t arg2_uint16_t; -- -- out_uint32x4_t = vmlsl_n_u16 (arg0_uint32x4_t, arg1_uint16x4_t, arg2_uint16_t); --} -- --/* { dg-final { scan-assembler "vmlsl\.u16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsl_nu32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsl_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsl_nu32 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint32x2_t arg1_uint32x2_t; -- uint32_t arg2_uint32_t; -- -- out_uint64x2_t = vmlsl_n_u32 (arg0_uint64x2_t, arg1_uint32x2_t, arg2_uint32_t); --} -- --/* { dg-final { scan-assembler "vmlsl\.u32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsls16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsls16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsls16 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int16x4_t arg1_int16x4_t; -- int16x4_t arg2_int16x4_t; -- -- out_int32x4_t = vmlsl_s16 (arg0_int32x4_t, arg1_int16x4_t, arg2_int16x4_t); --} -- --/* { dg-final { scan-assembler "vmlsl\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsls32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsls32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsls32 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int32x2_t arg1_int32x2_t; -- int32x2_t arg2_int32x2_t; -- -- out_int64x2_t = vmlsl_s32 (arg0_int64x2_t, arg1_int32x2_t, arg2_int32x2_t); --} -- --/* { dg-final { scan-assembler "vmlsl\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsls8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsls8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsls8 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int8x8_t arg1_int8x8_t; -- int8x8_t arg2_int8x8_t; -- -- out_int16x8_t = vmlsl_s8 (arg0_int16x8_t, arg1_int8x8_t, arg2_int8x8_t); --} -- --/* { dg-final { scan-assembler "vmlsl\.s8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlslu16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlslu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlslu16 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint16x4_t arg1_uint16x4_t; -- uint16x4_t arg2_uint16x4_t; -- -- out_uint32x4_t = vmlsl_u16 (arg0_uint32x4_t, arg1_uint16x4_t, arg2_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vmlsl\.u16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlslu32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlslu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlslu32 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint32x2_t arg1_uint32x2_t; -- uint32x2_t arg2_uint32x2_t; -- -- out_uint64x2_t = vmlsl_u32 (arg0_uint64x2_t, arg1_uint32x2_t, arg2_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vmlsl\.u32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlslu8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlslu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlslu8 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint8x8_t arg1_uint8x8_t; -- uint8x8_t arg2_uint8x8_t; -- -- out_uint16x8_t = vmlsl_u8 (arg0_uint16x8_t, arg1_uint8x8_t, arg2_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vmlsl\.u8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlss16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlss16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlss16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- int16x4_t arg2_int16x4_t; -- -- out_int16x4_t = vmls_s16 (arg0_int16x4_t, arg1_int16x4_t, arg2_int16x4_t); --} -- --/* { dg-final { scan-assembler "vmls\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlss32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlss32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlss32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- int32x2_t arg2_int32x2_t; -- -- out_int32x2_t = vmls_s32 (arg0_int32x2_t, arg1_int32x2_t, arg2_int32x2_t); --} -- --/* { dg-final { scan-assembler "vmls\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlss8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlss8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlss8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- int8x8_t arg2_int8x8_t; -- -- out_int8x8_t = vmls_s8 (arg0_int8x8_t, arg1_int8x8_t, arg2_int8x8_t); --} -- --/* { dg-final { scan-assembler "vmls\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsu16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- uint16x4_t arg2_uint16x4_t; -- -- out_uint16x4_t = vmls_u16 (arg0_uint16x4_t, arg1_uint16x4_t, arg2_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vmls\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsu32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- uint32x2_t arg2_uint32x2_t; -- -- out_uint32x2_t = vmls_u32 (arg0_uint32x2_t, arg1_uint32x2_t, arg2_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vmls\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmlsu8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vmlsu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmlsu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- uint8x8_t arg2_uint8x8_t; -- -- out_uint8x8_t = vmls_u8 (arg0_uint8x8_t, arg1_uint8x8_t, arg2_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vmls\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovQ_nf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmovQ_nf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovQ_nf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32_t arg0_float32_t; -- -- out_float32x4_t = vmovq_n_f32 (arg0_float32_t); --} -- --/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovQ_np16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmovQ_np16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovQ_np16 (void) --{ -- poly16x8_t out_poly16x8_t; -- poly16_t arg0_poly16_t; -- -- out_poly16x8_t = vmovq_n_p16 (arg0_poly16_t); --} -- --/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovQ_np8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmovQ_np8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovQ_np8 (void) --{ -- poly8x16_t out_poly8x16_t; -- poly8_t arg0_poly8_t; -- -- out_poly8x16_t = vmovq_n_p8 (arg0_poly8_t); --} -- --/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovQ_ns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmovQ_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovQ_ns16 (void) --{ -- int16x8_t out_int16x8_t; -- int16_t arg0_int16_t; -- -- out_int16x8_t = vmovq_n_s16 (arg0_int16_t); --} -- --/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovQ_ns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmovQ_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovQ_ns32 (void) --{ -- int32x4_t out_int32x4_t; -- int32_t arg0_int32_t; -- -- out_int32x4_t = vmovq_n_s32 (arg0_int32_t); --} -- --/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovQ_ns64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vmovQ_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovQ_ns64 (void) --{ -- int64x2_t out_int64x2_t; -- int64_t arg0_int64_t; -- -- out_int64x2_t = vmovq_n_s64 (arg0_int64_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovQ_ns8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmovQ_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovQ_ns8 (void) --{ -- int8x16_t out_int8x16_t; -- int8_t arg0_int8_t; -- -- out_int8x16_t = vmovq_n_s8 (arg0_int8_t); --} -- --/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovQ_nu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmovQ_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovQ_nu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16_t arg0_uint16_t; -- -- out_uint16x8_t = vmovq_n_u16 (arg0_uint16_t); --} -- --/* { dg-final { scan-assembler "vdup\.16\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovQ_nu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmovQ_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovQ_nu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32_t arg0_uint32_t; -- -- out_uint32x4_t = vmovq_n_u32 (arg0_uint32_t); --} -- --/* { dg-final { scan-assembler "vdup\.32\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovQ_nu64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vmovQ_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovQ_nu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64_t arg0_uint64_t; -- -- out_uint64x2_t = vmovq_n_u64 (arg0_uint64_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovQ_nu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmovQ_nu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovQ_nu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8_t arg0_uint8_t; -- -- out_uint8x16_t = vmovq_n_u8 (arg0_uint8_t); --} -- --/* { dg-final { scan-assembler "vdup\.8\[ \]+\[qQ\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmov_nf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmov_nf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmov_nf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32_t arg0_float32_t; -- -- out_float32x2_t = vmov_n_f32 (arg0_float32_t); --} -- --/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmov_np16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmov_np16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmov_np16 (void) --{ -- poly16x4_t out_poly16x4_t; -- poly16_t arg0_poly16_t; -- -- out_poly16x4_t = vmov_n_p16 (arg0_poly16_t); --} -- --/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmov_np8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmov_np8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmov_np8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8_t arg0_poly8_t; -- -- out_poly8x8_t = vmov_n_p8 (arg0_poly8_t); --} -- --/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmov_ns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmov_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmov_ns16 (void) --{ -- int16x4_t out_int16x4_t; -- int16_t arg0_int16_t; -- -- out_int16x4_t = vmov_n_s16 (arg0_int16_t); --} -- --/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmov_ns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmov_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmov_ns32 (void) --{ -- int32x2_t out_int32x2_t; -- int32_t arg0_int32_t; -- -- out_int32x2_t = vmov_n_s32 (arg0_int32_t); --} -- --/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmov_ns64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vmov_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmov_ns64 (void) --{ -- int64x1_t out_int64x1_t; -- int64_t arg0_int64_t; -- -- out_int64x1_t = vmov_n_s64 (arg0_int64_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmov_ns8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmov_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmov_ns8 (void) --{ -- int8x8_t out_int8x8_t; -- int8_t arg0_int8_t; -- -- out_int8x8_t = vmov_n_s8 (arg0_int8_t); --} -- --/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmov_nu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmov_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmov_nu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16_t arg0_uint16_t; -- -- out_uint16x4_t = vmov_n_u16 (arg0_uint16_t); --} -- --/* { dg-final { scan-assembler "vdup\.16\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmov_nu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmov_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmov_nu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32_t arg0_uint32_t; -- -- out_uint32x2_t = vmov_n_u32 (arg0_uint32_t); --} -- --/* { dg-final { scan-assembler "vdup\.32\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmov_nu64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vmov_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmov_nu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64_t arg0_uint64_t; -- -- out_uint64x1_t = vmov_n_u64 (arg0_uint64_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmov_nu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmov_nu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmov_nu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8_t arg0_uint8_t; -- -- out_uint8x8_t = vmov_n_u8 (arg0_uint8_t); --} -- --/* { dg-final { scan-assembler "vdup\.8\[ \]+\[dD\]\[0-9\]+, (\[rR\]\[0-9\]+|\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovls16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmovls16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovls16 (void) --{ -- int32x4_t out_int32x4_t; -- int16x4_t arg0_int16x4_t; -- -- out_int32x4_t = vmovl_s16 (arg0_int16x4_t); --} -- --/* { dg-final { scan-assembler "vmovl\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovls32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmovls32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovls32 (void) --{ -- int64x2_t out_int64x2_t; -- int32x2_t arg0_int32x2_t; -- -- out_int64x2_t = vmovl_s32 (arg0_int32x2_t); --} -- --/* { dg-final { scan-assembler "vmovl\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovls8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmovls8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovls8 (void) --{ -- int16x8_t out_int16x8_t; -- int8x8_t arg0_int8x8_t; -- -- out_int16x8_t = vmovl_s8 (arg0_int8x8_t); --} -- --/* { dg-final { scan-assembler "vmovl\.s8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovlu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmovlu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovlu16 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_uint32x4_t = vmovl_u16 (arg0_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vmovl\.u16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovlu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmovlu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovlu32 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_uint64x2_t = vmovl_u32 (arg0_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vmovl\.u32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovlu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmovlu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovlu8 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_uint16x8_t = vmovl_u8 (arg0_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vmovl\.u8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmovns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovns16 (void) --{ -- int8x8_t out_int8x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_int8x8_t = vmovn_s16 (arg0_int16x8_t); --} -- --/* { dg-final { scan-assembler "vmovn\.i16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmovns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovns32 (void) --{ -- int16x4_t out_int16x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_int16x4_t = vmovn_s32 (arg0_int32x4_t); --} -- --/* { dg-final { scan-assembler "vmovn\.i32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovns64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmovns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovns64 (void) --{ -- int32x2_t out_int32x2_t; -- int64x2_t arg0_int64x2_t; -- -- out_int32x2_t = vmovn_s64 (arg0_int64x2_t); --} -- --/* { dg-final { scan-assembler "vmovn\.i64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovnu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmovnu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovnu16 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_uint8x8_t = vmovn_u16 (arg0_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vmovn\.i16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovnu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmovnu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovnu32 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_uint16x4_t = vmovn_u32 (arg0_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vmovn\.i32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmovnu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmovnu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmovnu64 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_uint32x2_t = vmovn_u64 (arg0_uint64x2_t); --} -- --/* { dg-final { scan-assembler "vmovn\.i64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulQ_lanef32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulQ_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulQ_lanef32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x2_t arg1_float32x2_t; -- -- out_float32x4_t = vmulq_lane_f32 (arg0_float32x4_t, arg1_float32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmul\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulQ_lanes16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulQ_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulQ_lanes16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x8_t = vmulq_lane_s16 (arg0_int16x8_t, arg1_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmul\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulQ_lanes32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulQ_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulQ_lanes32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x4_t = vmulq_lane_s32 (arg0_int32x4_t, arg1_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmul\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulQ_laneu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulQ_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulQ_laneu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x8_t = vmulq_lane_u16 (arg0_uint16x8_t, arg1_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmul\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulQ_laneu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulQ_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulQ_laneu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x4_t = vmulq_lane_u32 (arg0_uint32x4_t, arg1_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmul\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulQ_nf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulQ_nf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulQ_nf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- float32_t arg1_float32_t; -- -- out_float32x4_t = vmulq_n_f32 (arg0_float32x4_t, arg1_float32_t); --} -- --/* { dg-final { scan-assembler "vmul\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulQ_ns16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulQ_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulQ_ns16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16_t arg1_int16_t; -- -- out_int16x8_t = vmulq_n_s16 (arg0_int16x8_t, arg1_int16_t); --} -- --/* { dg-final { scan-assembler "vmul\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulQ_ns32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulQ_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulQ_ns32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32_t arg1_int32_t; -- -- out_int32x4_t = vmulq_n_s32 (arg0_int32x4_t, arg1_int32_t); --} -- --/* { dg-final { scan-assembler "vmul\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulQ_nu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulQ_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulQ_nu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16_t arg1_uint16_t; -- -- out_uint16x8_t = vmulq_n_u16 (arg0_uint16x8_t, arg1_uint16_t); --} -- --/* { dg-final { scan-assembler "vmul\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulQ_nu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulQ_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulQ_nu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32_t arg1_uint32_t; -- -- out_uint32x4_t = vmulq_n_u32 (arg0_uint32x4_t, arg1_uint32_t); --} -- --/* { dg-final { scan-assembler "vmul\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulQf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulQf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- -- out_float32x4_t = vmulq_f32 (arg0_float32x4_t, arg1_float32x4_t); --} -- --/* { dg-final { scan-assembler "vmul\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulQp8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulQp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulQp8 (void) --{ -- poly8x16_t out_poly8x16_t; -- poly8x16_t arg0_poly8x16_t; -- poly8x16_t arg1_poly8x16_t; -- -- out_poly8x16_t = vmulq_p8 (arg0_poly8x16_t, arg1_poly8x16_t); --} -- --/* { dg-final { scan-assembler "vmul\.p8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vmulq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vmul\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vmulq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vmul\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vmulq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vmul\.i8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vmulq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vmul\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vmulq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vmul\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vmulq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vmul\.i8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmul_lanef32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmul_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmul_lanef32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_float32x2_t = vmul_lane_f32 (arg0_float32x2_t, arg1_float32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmul\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmul_lanes16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmul_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmul_lanes16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vmul_lane_s16 (arg0_int16x4_t, arg1_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmul\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmul_lanes32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmul_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmul_lanes32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vmul_lane_s32 (arg0_int32x2_t, arg1_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmul\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmul_laneu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmul_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmul_laneu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vmul_lane_u16 (arg0_uint16x4_t, arg1_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmul\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmul_laneu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmul_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmul_laneu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vmul_lane_u32 (arg0_uint32x2_t, arg1_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmul\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmul_nf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmul_nf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmul_nf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- float32_t arg1_float32_t; -- -- out_float32x2_t = vmul_n_f32 (arg0_float32x2_t, arg1_float32_t); --} -- --/* { dg-final { scan-assembler "vmul\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmul_ns16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmul_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmul_ns16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16_t arg1_int16_t; -- -- out_int16x4_t = vmul_n_s16 (arg0_int16x4_t, arg1_int16_t); --} -- --/* { dg-final { scan-assembler "vmul\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmul_ns32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmul_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmul_ns32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32_t arg1_int32_t; -- -- out_int32x2_t = vmul_n_s32 (arg0_int32x2_t, arg1_int32_t); --} -- --/* { dg-final { scan-assembler "vmul\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmul_nu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmul_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmul_nu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16_t arg1_uint16_t; -- -- out_uint16x4_t = vmul_n_u16 (arg0_uint16x4_t, arg1_uint16_t); --} -- --/* { dg-final { scan-assembler "vmul\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmul_nu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmul_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmul_nu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32_t arg1_uint32_t; -- -- out_uint32x2_t = vmul_n_u32 (arg0_uint32x2_t, arg1_uint32_t); --} -- --/* { dg-final { scan-assembler "vmul\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_float32x2_t = vmul_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vmul\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmull_lanes16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmull_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmull_lanes16 (void) --{ -- int32x4_t out_int32x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int32x4_t = vmull_lane_s16 (arg0_int16x4_t, arg1_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmull\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmull_lanes32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmull_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmull_lanes32 (void) --{ -- int64x2_t out_int64x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int64x2_t = vmull_lane_s32 (arg0_int32x2_t, arg1_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmull\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmull_laneu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmull_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmull_laneu16 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint32x4_t = vmull_lane_u16 (arg0_uint16x4_t, arg1_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmull\.u16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmull_laneu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmull_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmull_laneu32 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint64x2_t = vmull_lane_u32 (arg0_uint32x2_t, arg1_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmull\.u32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmull_ns16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmull_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmull_ns16 (void) --{ -- int32x4_t out_int32x4_t; -- int16x4_t arg0_int16x4_t; -- int16_t arg1_int16_t; -- -- out_int32x4_t = vmull_n_s16 (arg0_int16x4_t, arg1_int16_t); --} -- --/* { dg-final { scan-assembler "vmull\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmull_ns32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmull_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmull_ns32 (void) --{ -- int64x2_t out_int64x2_t; -- int32x2_t arg0_int32x2_t; -- int32_t arg1_int32_t; -- -- out_int64x2_t = vmull_n_s32 (arg0_int32x2_t, arg1_int32_t); --} -- --/* { dg-final { scan-assembler "vmull\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmull_nu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmull_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmull_nu16 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16_t arg1_uint16_t; -- -- out_uint32x4_t = vmull_n_u16 (arg0_uint16x4_t, arg1_uint16_t); --} -- --/* { dg-final { scan-assembler "vmull\.u16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmull_nu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmull_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmull_nu32 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32_t arg1_uint32_t; -- -- out_uint64x2_t = vmull_n_u32 (arg0_uint32x2_t, arg1_uint32_t); --} -- --/* { dg-final { scan-assembler "vmull\.u32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmullp8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmullp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmullp8 (void) --{ -- poly16x8_t out_poly16x8_t; -- poly8x8_t arg0_poly8x8_t; -- poly8x8_t arg1_poly8x8_t; -- -- out_poly16x8_t = vmull_p8 (arg0_poly8x8_t, arg1_poly8x8_t); --} -- --/* { dg-final { scan-assembler "vmull\.p8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulls16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulls16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulls16 (void) --{ -- int32x4_t out_int32x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int32x4_t = vmull_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vmull\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulls32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulls32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulls32 (void) --{ -- int64x2_t out_int64x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int64x2_t = vmull_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vmull\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulls8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulls8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulls8 (void) --{ -- int16x8_t out_int16x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int16x8_t = vmull_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vmull\.s8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmullu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmullu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmullu16 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint32x4_t = vmull_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vmull\.u16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmullu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmullu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmullu32 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint64x2_t = vmull_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vmull\.u32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmullu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmullu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmullu8 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint16x8_t = vmull_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vmull\.u8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulp8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulp8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8x8_t arg0_poly8x8_t; -- poly8x8_t arg1_poly8x8_t; -- -- out_poly8x8_t = vmul_p8 (arg0_poly8x8_t, arg1_poly8x8_t); --} -- --/* { dg-final { scan-assembler "vmul\.p8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmuls16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmuls16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmuls16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vmul_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vmul\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmuls32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmuls32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmuls32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vmul_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vmul\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmuls8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmuls8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmuls8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vmul_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vmul\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vmul_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vmul\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vmul_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vmul\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmulu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vmulu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmulu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vmul_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vmul\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmvnQp8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmvnQp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmvnQp8 (void) --{ -- poly8x16_t out_poly8x16_t; -- poly8x16_t arg0_poly8x16_t; -- -- out_poly8x16_t = vmvnq_p8 (arg0_poly8x16_t); --} -- --/* { dg-final { scan-assembler "vmvn\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmvnQs16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmvnQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmvnQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_int16x8_t = vmvnq_s16 (arg0_int16x8_t); --} -- --/* { dg-final { scan-assembler "vmvn\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmvnQs32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmvnQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmvnQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_int32x4_t = vmvnq_s32 (arg0_int32x4_t); --} -- --/* { dg-final { scan-assembler "vmvn\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmvnQs8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmvnQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmvnQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- -- out_int8x16_t = vmvnq_s8 (arg0_int8x16_t); --} -- --/* { dg-final { scan-assembler "vmvn\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmvnQu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmvnQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmvnQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_uint16x8_t = vmvnq_u16 (arg0_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vmvn\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmvnQu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmvnQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmvnQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_uint32x4_t = vmvnq_u32 (arg0_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vmvn\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmvnQu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmvnQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmvnQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_uint8x16_t = vmvnq_u8 (arg0_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vmvn\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmvnp8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmvnp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmvnp8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8x8_t arg0_poly8x8_t; -- -- out_poly8x8_t = vmvn_p8 (arg0_poly8x8_t); --} -- --/* { dg-final { scan-assembler "vmvn\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmvns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmvns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmvns16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- -- out_int16x4_t = vmvn_s16 (arg0_int16x4_t); --} -- --/* { dg-final { scan-assembler "vmvn\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmvns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmvns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmvns32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- -- out_int32x2_t = vmvn_s32 (arg0_int32x2_t); --} -- --/* { dg-final { scan-assembler "vmvn\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmvns8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmvns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmvns8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- -- out_int8x8_t = vmvn_s8 (arg0_int8x8_t); --} -- --/* { dg-final { scan-assembler "vmvn\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmvnu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmvnu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmvnu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_uint16x4_t = vmvn_u16 (arg0_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vmvn\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmvnu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmvnu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmvnu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_uint32x2_t = vmvn_u32 (arg0_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vmvn\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vmvnu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vmvnu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vmvnu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_uint8x8_t = vmvn_u8 (arg0_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vmvn\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vnegQf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vnegQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vnegQf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- -- out_float32x4_t = vnegq_f32 (arg0_float32x4_t); --} -- --/* { dg-final { scan-assembler "vneg\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vnegQs16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vnegQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vnegQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_int16x8_t = vnegq_s16 (arg0_int16x8_t); --} -- --/* { dg-final { scan-assembler "vneg\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vnegQs32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vnegQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vnegQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_int32x4_t = vnegq_s32 (arg0_int32x4_t); --} -- --/* { dg-final { scan-assembler "vneg\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vnegQs8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vnegQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vnegQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- -- out_int8x16_t = vnegq_s8 (arg0_int8x16_t); --} -- --/* { dg-final { scan-assembler "vneg\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vnegf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vnegf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vnegf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- -- out_float32x2_t = vneg_f32 (arg0_float32x2_t); --} -- --/* { dg-final { scan-assembler "vneg\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vnegs16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vnegs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vnegs16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- -- out_int16x4_t = vneg_s16 (arg0_int16x4_t); --} -- --/* { dg-final { scan-assembler "vneg\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vnegs32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vnegs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vnegs32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- -- out_int32x2_t = vneg_s32 (arg0_int32x2_t); --} -- --/* { dg-final { scan-assembler "vneg\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vnegs8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vnegs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vnegs8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- -- out_int8x8_t = vneg_s8 (arg0_int8x8_t); --} -- --/* { dg-final { scan-assembler "vneg\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vornQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vornQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --int16x8_t out_int16x8_t; --int16x8_t arg0_int16x8_t; --int16x8_t arg1_int16x8_t; --void test_vornQs16 (void) --{ -- -- out_int16x8_t = vornq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vorn\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vornQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vornQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --int32x4_t out_int32x4_t; --int32x4_t arg0_int32x4_t; --int32x4_t arg1_int32x4_t; --void test_vornQs32 (void) --{ -- -- out_int32x4_t = vornq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vorn\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vornQs64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vornQs64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --int64x2_t out_int64x2_t; --int64x2_t arg0_int64x2_t; --int64x2_t arg1_int64x2_t; --void test_vornQs64 (void) --{ -- -- out_int64x2_t = vornq_s64 (arg0_int64x2_t, arg1_int64x2_t); --} -- --/* { dg-final { scan-assembler "vorn\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vornQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vornQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --int8x16_t out_int8x16_t; --int8x16_t arg0_int8x16_t; --int8x16_t arg1_int8x16_t; --void test_vornQs8 (void) --{ -- -- out_int8x16_t = vornq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vorn\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vornQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vornQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --uint16x8_t out_uint16x8_t; --uint16x8_t arg0_uint16x8_t; --uint16x8_t arg1_uint16x8_t; --void test_vornQu16 (void) --{ -- -- out_uint16x8_t = vornq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vorn\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vornQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vornQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --uint32x4_t out_uint32x4_t; --uint32x4_t arg0_uint32x4_t; --uint32x4_t arg1_uint32x4_t; --void test_vornQu32 (void) --{ -- -- out_uint32x4_t = vornq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vorn\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vornQu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vornQu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --uint64x2_t out_uint64x2_t; --uint64x2_t arg0_uint64x2_t; --uint64x2_t arg1_uint64x2_t; --void test_vornQu64 (void) --{ -- -- out_uint64x2_t = vornq_u64 (arg0_uint64x2_t, arg1_uint64x2_t); --} -- --/* { dg-final { scan-assembler "vorn\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vornQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vornQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --uint8x16_t out_uint8x16_t; --uint8x16_t arg0_uint8x16_t; --uint8x16_t arg1_uint8x16_t; --void test_vornQu8 (void) --{ -- -- out_uint8x16_t = vornq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vorn\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vorns16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vorns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --int16x4_t out_int16x4_t; --int16x4_t arg0_int16x4_t; --int16x4_t arg1_int16x4_t; --void test_vorns16 (void) --{ -- -- out_int16x4_t = vorn_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vorn\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vorns32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vorns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --int32x2_t out_int32x2_t; --int32x2_t arg0_int32x2_t; --int32x2_t arg1_int32x2_t; --void test_vorns32 (void) --{ -- -- out_int32x2_t = vorn_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vorn\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vorns64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vorns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --int64x1_t out_int64x1_t; --int64x1_t arg0_int64x1_t; --int64x1_t arg1_int64x1_t; --void test_vorns64 (void) --{ -- -- out_int64x1_t = vorn_s64 (arg0_int64x1_t, arg1_int64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vorns8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vorns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --int8x8_t out_int8x8_t; --int8x8_t arg0_int8x8_t; --int8x8_t arg1_int8x8_t; --void test_vorns8 (void) --{ -- -- out_int8x8_t = vorn_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vorn\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vornu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vornu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --uint16x4_t out_uint16x4_t; --uint16x4_t arg0_uint16x4_t; --uint16x4_t arg1_uint16x4_t; --void test_vornu16 (void) --{ -- -- out_uint16x4_t = vorn_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vorn\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vornu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vornu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --uint32x2_t out_uint32x2_t; --uint32x2_t arg0_uint32x2_t; --uint32x2_t arg1_uint32x2_t; --void test_vornu32 (void) --{ -- -- out_uint32x2_t = vorn_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vorn\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vornu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vornu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --uint64x1_t out_uint64x1_t; --uint64x1_t arg0_uint64x1_t; --uint64x1_t arg1_uint64x1_t; --void test_vornu64 (void) --{ -- -- out_uint64x1_t = vorn_u64 (arg0_uint64x1_t, arg1_uint64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vornu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vornu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O2" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --uint8x8_t out_uint8x8_t; --uint8x8_t arg0_uint8x8_t; --uint8x8_t arg1_uint8x8_t; --void test_vornu8 (void) --{ -- -- out_uint8x8_t = vorn_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vorn\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vorrQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vorrQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vorrQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vorrq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vorr\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vorrQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vorrQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vorrQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vorrq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vorr\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vorrQs64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vorrQs64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vorrQs64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_int64x2_t = vorrq_s64 (arg0_int64x2_t, arg1_int64x2_t); --} -- --/* { dg-final { scan-assembler "vorr\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vorrQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vorrQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vorrQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vorrq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vorr\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vorrQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vorrQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vorrQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vorrq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vorr\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vorrQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vorrQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vorrQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vorrq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vorr\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vorrQu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vorrQu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vorrQu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint64x2_t arg1_uint64x2_t; -- -- out_uint64x2_t = vorrq_u64 (arg0_uint64x2_t, arg1_uint64x2_t); --} -- --/* { dg-final { scan-assembler "vorr\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vorrQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vorrQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vorrQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vorrq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vorr\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vorrs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vorrs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vorrs16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vorr_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vorr\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vorrs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vorrs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vorrs32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vorr_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vorr\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vorrs64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vorrs64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vorrs64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg0_int64x1_t; -- int64x1_t arg1_int64x1_t; -- -- out_int64x1_t = vorr_s64 (arg0_int64x1_t, arg1_int64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vorrs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vorrs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vorrs8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vorr_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vorr\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vorru16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vorru16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vorru16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vorr_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vorr\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vorru32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vorru32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vorru32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vorr_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vorr\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vorru64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vorru64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vorru64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- uint64x1_t arg1_uint64x1_t; -- -- out_uint64x1_t = vorr_u64 (arg0_uint64x1_t, arg1_uint64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vorru8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vorru8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vorru8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vorr_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vorr\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpadalQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpadalQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpadalQs16 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int16x8_t arg1_int16x8_t; -- -- out_int32x4_t = vpadalq_s16 (arg0_int32x4_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vpadal\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpadalQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpadalQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpadalQs32 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int32x4_t arg1_int32x4_t; -- -- out_int64x2_t = vpadalq_s32 (arg0_int64x2_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vpadal\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpadalQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpadalQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpadalQs8 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int8x16_t arg1_int8x16_t; -- -- out_int16x8_t = vpadalq_s8 (arg0_int16x8_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vpadal\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpadalQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpadalQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpadalQu16 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint32x4_t = vpadalq_u16 (arg0_uint32x4_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vpadal\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpadalQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpadalQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpadalQu32 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint64x2_t = vpadalq_u32 (arg0_uint64x2_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vpadal\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpadalQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpadalQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpadalQu8 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint16x8_t = vpadalq_u8 (arg0_uint16x8_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vpadal\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpadals16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpadals16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpadals16 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int16x4_t arg1_int16x4_t; -- -- out_int32x2_t = vpadal_s16 (arg0_int32x2_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vpadal\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpadals32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpadals32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpadals32 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg0_int64x1_t; -- int32x2_t arg1_int32x2_t; -- -- out_int64x1_t = vpadal_s32 (arg0_int64x1_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vpadal\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpadals8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpadals8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpadals8 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int8x8_t arg1_int8x8_t; -- -- out_int16x4_t = vpadal_s8 (arg0_int16x4_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vpadal\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpadalu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpadalu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpadalu16 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint32x2_t = vpadal_u16 (arg0_uint32x2_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vpadal\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpadalu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpadalu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpadalu32 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint64x1_t = vpadal_u32 (arg0_uint64x1_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vpadal\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpadalu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpadalu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpadalu8 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint16x4_t = vpadal_u8 (arg0_uint16x4_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vpadal\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpaddf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpaddf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpaddf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_float32x2_t = vpadd_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vpadd\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpaddlQs16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vpaddlQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpaddlQs16 (void) --{ -- int32x4_t out_int32x4_t; -- int16x8_t arg0_int16x8_t; -- -- out_int32x4_t = vpaddlq_s16 (arg0_int16x8_t); --} -- --/* { dg-final { scan-assembler "vpaddl\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpaddlQs32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vpaddlQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpaddlQs32 (void) --{ -- int64x2_t out_int64x2_t; -- int32x4_t arg0_int32x4_t; -- -- out_int64x2_t = vpaddlq_s32 (arg0_int32x4_t); --} -- --/* { dg-final { scan-assembler "vpaddl\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpaddlQs8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vpaddlQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpaddlQs8 (void) --{ -- int16x8_t out_int16x8_t; -- int8x16_t arg0_int8x16_t; -- -- out_int16x8_t = vpaddlq_s8 (arg0_int8x16_t); --} -- --/* { dg-final { scan-assembler "vpaddl\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpaddlQu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vpaddlQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpaddlQu16 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_uint32x4_t = vpaddlq_u16 (arg0_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vpaddl\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpaddlQu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vpaddlQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpaddlQu32 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_uint64x2_t = vpaddlq_u32 (arg0_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vpaddl\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpaddlQu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vpaddlQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpaddlQu8 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_uint16x8_t = vpaddlq_u8 (arg0_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vpaddl\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpaddls16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vpaddls16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpaddls16 (void) --{ -- int32x2_t out_int32x2_t; -- int16x4_t arg0_int16x4_t; -- -- out_int32x2_t = vpaddl_s16 (arg0_int16x4_t); --} -- --/* { dg-final { scan-assembler "vpaddl\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpaddls32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vpaddls32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpaddls32 (void) --{ -- int64x1_t out_int64x1_t; -- int32x2_t arg0_int32x2_t; -- -- out_int64x1_t = vpaddl_s32 (arg0_int32x2_t); --} -- --/* { dg-final { scan-assembler "vpaddl\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpaddls8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vpaddls8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpaddls8 (void) --{ -- int16x4_t out_int16x4_t; -- int8x8_t arg0_int8x8_t; -- -- out_int16x4_t = vpaddl_s8 (arg0_int8x8_t); --} -- --/* { dg-final { scan-assembler "vpaddl\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpaddlu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vpaddlu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpaddlu16 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_uint32x2_t = vpaddl_u16 (arg0_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vpaddl\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpaddlu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vpaddlu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpaddlu32 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_uint64x1_t = vpaddl_u32 (arg0_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vpaddl\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpaddlu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vpaddlu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpaddlu8 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_uint16x4_t = vpaddl_u8 (arg0_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vpaddl\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpadds16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpadds16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpadds16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vpadd_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vpadd\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpadds32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpadds32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpadds32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vpadd_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vpadd\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpadds8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpadds8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpadds8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vpadd_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vpadd\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpaddu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpaddu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpaddu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vpadd_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vpadd\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpaddu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpaddu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpaddu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vpadd_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vpadd\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpaddu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpaddu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpaddu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vpadd_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vpadd\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpmaxf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpmaxf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpmaxf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_float32x2_t = vpmax_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vpmax\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpmaxs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpmaxs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpmaxs16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vpmax_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vpmax\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpmaxs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpmaxs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpmaxs32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vpmax_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vpmax\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpmaxs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpmaxs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpmaxs8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vpmax_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vpmax\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpmaxu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpmaxu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpmaxu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vpmax_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vpmax\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpmaxu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpmaxu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpmaxu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vpmax_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vpmax\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpmaxu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpmaxu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpmaxu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vpmax_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vpmax\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpminf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpminf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpminf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_float32x2_t = vpmin_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vpmin\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpmins16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpmins16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpmins16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vpmin_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vpmin\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpmins32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpmins32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpmins32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vpmin_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vpmin\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpmins8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpmins8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpmins8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vpmin_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vpmin\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpminu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpminu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpminu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vpmin_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vpmin\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpminu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpminu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpminu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vpmin_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vpmin\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vpminu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vpminu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vpminu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vpmin_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vpmin\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRdmulhQ_lanes16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRdmulhQ_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRdmulhQ_lanes16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x8_t = vqrdmulhq_lane_s16 (arg0_int16x8_t, arg1_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vqrdmulh\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRdmulhQ_lanes32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRdmulhQ_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRdmulhQ_lanes32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x4_t = vqrdmulhq_lane_s32 (arg0_int32x4_t, arg1_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vqrdmulh\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRdmulhQ_ns16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRdmulhQ_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRdmulhQ_ns16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16_t arg1_int16_t; -- -- out_int16x8_t = vqrdmulhq_n_s16 (arg0_int16x8_t, arg1_int16_t); --} -- --/* { dg-final { scan-assembler "vqrdmulh\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRdmulhQ_ns32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRdmulhQ_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRdmulhQ_ns32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32_t arg1_int32_t; -- -- out_int32x4_t = vqrdmulhq_n_s32 (arg0_int32x4_t, arg1_int32_t); --} -- --/* { dg-final { scan-assembler "vqrdmulh\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRdmulhQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRdmulhQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRdmulhQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vqrdmulhq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vqrdmulh\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRdmulhQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRdmulhQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRdmulhQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vqrdmulhq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vqrdmulh\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRdmulh_lanes16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRdmulh_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRdmulh_lanes16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vqrdmulh_lane_s16 (arg0_int16x4_t, arg1_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vqrdmulh\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRdmulh_lanes32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRdmulh_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRdmulh_lanes32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vqrdmulh_lane_s32 (arg0_int32x2_t, arg1_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vqrdmulh\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRdmulh_ns16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRdmulh_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRdmulh_ns16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16_t arg1_int16_t; -- -- out_int16x4_t = vqrdmulh_n_s16 (arg0_int16x4_t, arg1_int16_t); --} -- --/* { dg-final { scan-assembler "vqrdmulh\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRdmulh_ns32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRdmulh_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRdmulh_ns32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32_t arg1_int32_t; -- -- out_int32x2_t = vqrdmulh_n_s32 (arg0_int32x2_t, arg1_int32_t); --} -- --/* { dg-final { scan-assembler "vqrdmulh\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRdmulhs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRdmulhs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRdmulhs16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vqrdmulh_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vqrdmulh\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRdmulhs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRdmulhs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRdmulhs32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vqrdmulh_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vqrdmulh\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshlQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRshlQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshlQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vqrshlq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vqrshl\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshlQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRshlQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshlQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vqrshlq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vqrshl\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshlQs64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRshlQs64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshlQs64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_int64x2_t = vqrshlq_s64 (arg0_int64x2_t, arg1_int64x2_t); --} -- --/* { dg-final { scan-assembler "vqrshl\.s64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshlQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRshlQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshlQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vqrshlq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vqrshl\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshlQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRshlQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshlQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_uint16x8_t = vqrshlq_u16 (arg0_uint16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vqrshl\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshlQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRshlQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshlQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_uint32x4_t = vqrshlq_u32 (arg0_uint32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vqrshl\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshlQu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRshlQu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshlQu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_uint64x2_t = vqrshlq_u64 (arg0_uint64x2_t, arg1_int64x2_t); --} -- --/* { dg-final { scan-assembler "vqrshl\.u64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshlQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRshlQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshlQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_uint8x16_t = vqrshlq_u8 (arg0_uint8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vqrshl\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshls16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRshls16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshls16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vqrshl_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vqrshl\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshls32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRshls32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshls32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vqrshl_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vqrshl\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshls64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRshls64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshls64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg0_int64x1_t; -- int64x1_t arg1_int64x1_t; -- -- out_int64x1_t = vqrshl_s64 (arg0_int64x1_t, arg1_int64x1_t); --} -- --/* { dg-final { scan-assembler "vqrshl\.s64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshls8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRshls8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshls8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vqrshl_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vqrshl\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshlu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRshlu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshlu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_uint16x4_t = vqrshl_u16 (arg0_uint16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vqrshl\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshlu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRshlu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshlu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_uint32x2_t = vqrshl_u32 (arg0_uint32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vqrshl\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshlu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRshlu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshlu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- int64x1_t arg1_int64x1_t; -- -- out_uint64x1_t = vqrshl_u64 (arg0_uint64x1_t, arg1_int64x1_t); --} -- --/* { dg-final { scan-assembler "vqrshl\.u64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshlu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqRshlu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshlu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_uint8x8_t = vqrshl_u8 (arg0_uint8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vqrshl\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshrn_ns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqRshrn_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshrn_ns16 (void) --{ -- int8x8_t out_int8x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_int8x8_t = vqrshrn_n_s16 (arg0_int16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vqrshrn\.s16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshrn_ns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqRshrn_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshrn_ns32 (void) --{ -- int16x4_t out_int16x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_int16x4_t = vqrshrn_n_s32 (arg0_int32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vqrshrn\.s32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshrn_ns64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqRshrn_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshrn_ns64 (void) --{ -- int32x2_t out_int32x2_t; -- int64x2_t arg0_int64x2_t; -- -- out_int32x2_t = vqrshrn_n_s64 (arg0_int64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vqrshrn\.s64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshrn_nu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqRshrn_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshrn_nu16 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_uint8x8_t = vqrshrn_n_u16 (arg0_uint16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vqrshrn\.u16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshrn_nu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqRshrn_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshrn_nu32 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_uint16x4_t = vqrshrn_n_u32 (arg0_uint32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vqrshrn\.u32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshrn_nu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqRshrn_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshrn_nu64 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_uint32x2_t = vqrshrn_n_u64 (arg0_uint64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vqrshrn\.u64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshrun_ns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqRshrun_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshrun_ns16 (void) --{ -- uint8x8_t out_uint8x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_uint8x8_t = vqrshrun_n_s16 (arg0_int16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vqrshrun\.s16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshrun_ns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqRshrun_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshrun_ns32 (void) --{ -- uint16x4_t out_uint16x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_uint16x4_t = vqrshrun_n_s32 (arg0_int32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vqrshrun\.s32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqRshrun_ns64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqRshrun_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqRshrun_ns64 (void) --{ -- uint32x2_t out_uint32x2_t; -- int64x2_t arg0_int64x2_t; -- -- out_uint32x2_t = vqrshrun_n_s64 (arg0_int64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vqrshrun\.s64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqabsQs16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqabsQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqabsQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_int16x8_t = vqabsq_s16 (arg0_int16x8_t); --} -- --/* { dg-final { scan-assembler "vqabs\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqabsQs32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqabsQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqabsQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_int32x4_t = vqabsq_s32 (arg0_int32x4_t); --} -- --/* { dg-final { scan-assembler "vqabs\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqabsQs8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqabsQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqabsQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- -- out_int8x16_t = vqabsq_s8 (arg0_int8x16_t); --} -- --/* { dg-final { scan-assembler "vqabs\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqabss16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqabss16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqabss16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- -- out_int16x4_t = vqabs_s16 (arg0_int16x4_t); --} -- --/* { dg-final { scan-assembler "vqabs\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqabss32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqabss32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqabss32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- -- out_int32x2_t = vqabs_s32 (arg0_int32x2_t); --} -- --/* { dg-final { scan-assembler "vqabs\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqabss8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqabss8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqabss8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- -- out_int8x8_t = vqabs_s8 (arg0_int8x8_t); --} -- --/* { dg-final { scan-assembler "vqabs\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqaddQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqaddQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqaddQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vqaddq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vqadd\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqaddQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqaddQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqaddQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vqaddq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vqadd\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqaddQs64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqaddQs64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqaddQs64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_int64x2_t = vqaddq_s64 (arg0_int64x2_t, arg1_int64x2_t); --} -- --/* { dg-final { scan-assembler "vqadd\.s64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqaddQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqaddQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqaddQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vqaddq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vqadd\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqaddQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqaddQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqaddQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vqaddq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vqadd\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqaddQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqaddQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqaddQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vqaddq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vqadd\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqaddQu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqaddQu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqaddQu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint64x2_t arg1_uint64x2_t; -- -- out_uint64x2_t = vqaddq_u64 (arg0_uint64x2_t, arg1_uint64x2_t); --} -- --/* { dg-final { scan-assembler "vqadd\.u64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqaddQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqaddQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqaddQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vqaddq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vqadd\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqadds16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqadds16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqadds16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vqadd_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vqadd\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqadds32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqadds32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqadds32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vqadd_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vqadd\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqadds64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqadds64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqadds64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg0_int64x1_t; -- int64x1_t arg1_int64x1_t; -- -- out_int64x1_t = vqadd_s64 (arg0_int64x1_t, arg1_int64x1_t); --} -- --/* { dg-final { scan-assembler "vqadd\.s64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqadds8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqadds8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqadds8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vqadd_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vqadd\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqaddu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqaddu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqaddu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vqadd_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vqadd\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqaddu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqaddu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqaddu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vqadd_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vqadd\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqaddu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqaddu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqaddu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- uint64x1_t arg1_uint64x1_t; -- -- out_uint64x1_t = vqadd_u64 (arg0_uint64x1_t, arg1_uint64x1_t); --} -- --/* { dg-final { scan-assembler "vqadd\.u64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqaddu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqaddu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqaddu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vqadd_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vqadd\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmlal_lanes16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vqdmlal_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmlal_lanes16 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int16x4_t arg1_int16x4_t; -- int16x4_t arg2_int16x4_t; -- -- out_int32x4_t = vqdmlal_lane_s16 (arg0_int32x4_t, arg1_int16x4_t, arg2_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vqdmlal\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmlal_lanes32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vqdmlal_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmlal_lanes32 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int32x2_t arg1_int32x2_t; -- int32x2_t arg2_int32x2_t; -- -- out_int64x2_t = vqdmlal_lane_s32 (arg0_int64x2_t, arg1_int32x2_t, arg2_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vqdmlal\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmlal_ns16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vqdmlal_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmlal_ns16 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int16x4_t arg1_int16x4_t; -- int16_t arg2_int16_t; -- -- out_int32x4_t = vqdmlal_n_s16 (arg0_int32x4_t, arg1_int16x4_t, arg2_int16_t); --} -- --/* { dg-final { scan-assembler "vqdmlal\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmlal_ns32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vqdmlal_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmlal_ns32 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int32x2_t arg1_int32x2_t; -- int32_t arg2_int32_t; -- -- out_int64x2_t = vqdmlal_n_s32 (arg0_int64x2_t, arg1_int32x2_t, arg2_int32_t); --} -- --/* { dg-final { scan-assembler "vqdmlal\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmlals16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vqdmlals16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmlals16 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int16x4_t arg1_int16x4_t; -- int16x4_t arg2_int16x4_t; -- -- out_int32x4_t = vqdmlal_s16 (arg0_int32x4_t, arg1_int16x4_t, arg2_int16x4_t); --} -- --/* { dg-final { scan-assembler "vqdmlal\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmlals32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vqdmlals32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmlals32 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int32x2_t arg1_int32x2_t; -- int32x2_t arg2_int32x2_t; -- -- out_int64x2_t = vqdmlal_s32 (arg0_int64x2_t, arg1_int32x2_t, arg2_int32x2_t); --} -- --/* { dg-final { scan-assembler "vqdmlal\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmlsl_lanes16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vqdmlsl_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmlsl_lanes16 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int16x4_t arg1_int16x4_t; -- int16x4_t arg2_int16x4_t; -- -- out_int32x4_t = vqdmlsl_lane_s16 (arg0_int32x4_t, arg1_int16x4_t, arg2_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vqdmlsl\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmlsl_lanes32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vqdmlsl_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmlsl_lanes32 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int32x2_t arg1_int32x2_t; -- int32x2_t arg2_int32x2_t; -- -- out_int64x2_t = vqdmlsl_lane_s32 (arg0_int64x2_t, arg1_int32x2_t, arg2_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vqdmlsl\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmlsl_ns16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vqdmlsl_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmlsl_ns16 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int16x4_t arg1_int16x4_t; -- int16_t arg2_int16_t; -- -- out_int32x4_t = vqdmlsl_n_s16 (arg0_int32x4_t, arg1_int16x4_t, arg2_int16_t); --} -- --/* { dg-final { scan-assembler "vqdmlsl\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmlsl_ns32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vqdmlsl_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmlsl_ns32 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int32x2_t arg1_int32x2_t; -- int32_t arg2_int32_t; -- -- out_int64x2_t = vqdmlsl_n_s32 (arg0_int64x2_t, arg1_int32x2_t, arg2_int32_t); --} -- --/* { dg-final { scan-assembler "vqdmlsl\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmlsls16.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vqdmlsls16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmlsls16 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int16x4_t arg1_int16x4_t; -- int16x4_t arg2_int16x4_t; -- -- out_int32x4_t = vqdmlsl_s16 (arg0_int32x4_t, arg1_int16x4_t, arg2_int16x4_t); --} -- --/* { dg-final { scan-assembler "vqdmlsl\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmlsls32.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vqdmlsls32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmlsls32 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int32x2_t arg1_int32x2_t; -- int32x2_t arg2_int32x2_t; -- -- out_int64x2_t = vqdmlsl_s32 (arg0_int64x2_t, arg1_int32x2_t, arg2_int32x2_t); --} -- --/* { dg-final { scan-assembler "vqdmlsl\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmulhQ_lanes16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqdmulhQ_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmulhQ_lanes16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x8_t = vqdmulhq_lane_s16 (arg0_int16x8_t, arg1_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vqdmulh\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmulhQ_lanes32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqdmulhQ_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmulhQ_lanes32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x4_t = vqdmulhq_lane_s32 (arg0_int32x4_t, arg1_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vqdmulh\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmulhQ_ns16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqdmulhQ_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmulhQ_ns16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16_t arg1_int16_t; -- -- out_int16x8_t = vqdmulhq_n_s16 (arg0_int16x8_t, arg1_int16_t); --} -- --/* { dg-final { scan-assembler "vqdmulh\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmulhQ_ns32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqdmulhQ_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmulhQ_ns32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32_t arg1_int32_t; -- -- out_int32x4_t = vqdmulhq_n_s32 (arg0_int32x4_t, arg1_int32_t); --} -- --/* { dg-final { scan-assembler "vqdmulh\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmulhQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqdmulhQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmulhQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vqdmulhq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vqdmulh\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmulhQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqdmulhQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmulhQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vqdmulhq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vqdmulh\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmulh_lanes16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqdmulh_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmulh_lanes16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vqdmulh_lane_s16 (arg0_int16x4_t, arg1_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vqdmulh\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmulh_lanes32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqdmulh_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmulh_lanes32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vqdmulh_lane_s32 (arg0_int32x2_t, arg1_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vqdmulh\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmulh_ns16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqdmulh_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmulh_ns16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16_t arg1_int16_t; -- -- out_int16x4_t = vqdmulh_n_s16 (arg0_int16x4_t, arg1_int16_t); --} -- --/* { dg-final { scan-assembler "vqdmulh\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmulh_ns32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqdmulh_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmulh_ns32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32_t arg1_int32_t; -- -- out_int32x2_t = vqdmulh_n_s32 (arg0_int32x2_t, arg1_int32_t); --} -- --/* { dg-final { scan-assembler "vqdmulh\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmulhs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqdmulhs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmulhs16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vqdmulh_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vqdmulh\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmulhs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqdmulhs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmulhs32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vqdmulh_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vqdmulh\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmull_lanes16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqdmull_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmull_lanes16 (void) --{ -- int32x4_t out_int32x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int32x4_t = vqdmull_lane_s16 (arg0_int16x4_t, arg1_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vqdmull\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmull_lanes32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqdmull_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmull_lanes32 (void) --{ -- int64x2_t out_int64x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int64x2_t = vqdmull_lane_s32 (arg0_int32x2_t, arg1_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vqdmull\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmull_ns16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqdmull_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmull_ns16 (void) --{ -- int32x4_t out_int32x4_t; -- int16x4_t arg0_int16x4_t; -- int16_t arg1_int16_t; -- -- out_int32x4_t = vqdmull_n_s16 (arg0_int16x4_t, arg1_int16_t); --} -- --/* { dg-final { scan-assembler "vqdmull\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmull_ns32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqdmull_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmull_ns32 (void) --{ -- int64x2_t out_int64x2_t; -- int32x2_t arg0_int32x2_t; -- int32_t arg1_int32_t; -- -- out_int64x2_t = vqdmull_n_s32 (arg0_int32x2_t, arg1_int32_t); --} -- --/* { dg-final { scan-assembler "vqdmull\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmulls16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqdmulls16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmulls16 (void) --{ -- int32x4_t out_int32x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int32x4_t = vqdmull_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vqdmull\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqdmulls32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqdmulls32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqdmulls32 (void) --{ -- int64x2_t out_int64x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int64x2_t = vqdmull_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vqdmull\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqmovns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqmovns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqmovns16 (void) --{ -- int8x8_t out_int8x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_int8x8_t = vqmovn_s16 (arg0_int16x8_t); --} -- --/* { dg-final { scan-assembler "vqmovn\.s16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqmovns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqmovns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqmovns32 (void) --{ -- int16x4_t out_int16x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_int16x4_t = vqmovn_s32 (arg0_int32x4_t); --} -- --/* { dg-final { scan-assembler "vqmovn\.s32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqmovns64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqmovns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqmovns64 (void) --{ -- int32x2_t out_int32x2_t; -- int64x2_t arg0_int64x2_t; -- -- out_int32x2_t = vqmovn_s64 (arg0_int64x2_t); --} -- --/* { dg-final { scan-assembler "vqmovn\.s64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqmovnu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqmovnu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqmovnu16 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_uint8x8_t = vqmovn_u16 (arg0_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vqmovn\.u16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqmovnu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqmovnu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqmovnu32 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_uint16x4_t = vqmovn_u32 (arg0_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vqmovn\.u32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqmovnu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqmovnu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqmovnu64 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_uint32x2_t = vqmovn_u64 (arg0_uint64x2_t); --} -- --/* { dg-final { scan-assembler "vqmovn\.u64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqmovuns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqmovuns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqmovuns16 (void) --{ -- uint8x8_t out_uint8x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_uint8x8_t = vqmovun_s16 (arg0_int16x8_t); --} -- --/* { dg-final { scan-assembler "vqmovun\.s16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqmovuns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqmovuns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqmovuns32 (void) --{ -- uint16x4_t out_uint16x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_uint16x4_t = vqmovun_s32 (arg0_int32x4_t); --} -- --/* { dg-final { scan-assembler "vqmovun\.s32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqmovuns64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqmovuns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqmovuns64 (void) --{ -- uint32x2_t out_uint32x2_t; -- int64x2_t arg0_int64x2_t; -- -- out_uint32x2_t = vqmovun_s64 (arg0_int64x2_t); --} -- --/* { dg-final { scan-assembler "vqmovun\.s64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqnegQs16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqnegQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqnegQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_int16x8_t = vqnegq_s16 (arg0_int16x8_t); --} -- --/* { dg-final { scan-assembler "vqneg\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqnegQs32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqnegQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqnegQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_int32x4_t = vqnegq_s32 (arg0_int32x4_t); --} -- --/* { dg-final { scan-assembler "vqneg\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqnegQs8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqnegQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqnegQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- -- out_int8x16_t = vqnegq_s8 (arg0_int8x16_t); --} -- --/* { dg-final { scan-assembler "vqneg\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqnegs16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqnegs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqnegs16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- -- out_int16x4_t = vqneg_s16 (arg0_int16x4_t); --} -- --/* { dg-final { scan-assembler "vqneg\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqnegs32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqnegs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqnegs32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- -- out_int32x2_t = vqneg_s32 (arg0_int32x2_t); --} -- --/* { dg-final { scan-assembler "vqneg\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqnegs8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqnegs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqnegs8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- -- out_int8x8_t = vqneg_s8 (arg0_int8x8_t); --} -- --/* { dg-final { scan-assembler "vqneg\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlQ_ns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshlQ_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlQ_ns16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_int16x8_t = vqshlq_n_s16 (arg0_int16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vqshl\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlQ_ns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshlQ_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlQ_ns32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_int32x4_t = vqshlq_n_s32 (arg0_int32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vqshl\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlQ_ns64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshlQ_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlQ_ns64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- -- out_int64x2_t = vqshlq_n_s64 (arg0_int64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vqshl\.s64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlQ_ns8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshlQ_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlQ_ns8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- -- out_int8x16_t = vqshlq_n_s8 (arg0_int8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vqshl\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlQ_nu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshlQ_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlQ_nu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_uint16x8_t = vqshlq_n_u16 (arg0_uint16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vqshl\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlQ_nu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshlQ_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlQ_nu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_uint32x4_t = vqshlq_n_u32 (arg0_uint32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vqshl\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlQ_nu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshlQ_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlQ_nu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_uint64x2_t = vqshlq_n_u64 (arg0_uint64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vqshl\.u64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlQ_nu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshlQ_nu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlQ_nu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_uint8x16_t = vqshlq_n_u8 (arg0_uint8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vqshl\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqshlQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vqshlq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vqshl\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqshlQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vqshlq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vqshl\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlQs64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqshlQs64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlQs64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_int64x2_t = vqshlq_s64 (arg0_int64x2_t, arg1_int64x2_t); --} -- --/* { dg-final { scan-assembler "vqshl\.s64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqshlQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vqshlq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vqshl\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqshlQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_uint16x8_t = vqshlq_u16 (arg0_uint16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vqshl\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqshlQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_uint32x4_t = vqshlq_u32 (arg0_uint32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vqshl\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlQu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqshlQu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlQu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_uint64x2_t = vqshlq_u64 (arg0_uint64x2_t, arg1_int64x2_t); --} -- --/* { dg-final { scan-assembler "vqshl\.u64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqshlQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_uint8x16_t = vqshlq_u8 (arg0_uint8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vqshl\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshl_ns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshl_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshl_ns16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- -- out_int16x4_t = vqshl_n_s16 (arg0_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vqshl\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshl_ns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshl_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshl_ns32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- -- out_int32x2_t = vqshl_n_s32 (arg0_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vqshl\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshl_ns64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshl_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshl_ns64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg0_int64x1_t; -- -- out_int64x1_t = vqshl_n_s64 (arg0_int64x1_t, 1); --} -- --/* { dg-final { scan-assembler "vqshl\.s64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshl_ns8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshl_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshl_ns8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- -- out_int8x8_t = vqshl_n_s8 (arg0_int8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vqshl\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshl_nu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshl_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshl_nu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_uint16x4_t = vqshl_n_u16 (arg0_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vqshl\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshl_nu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshl_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshl_nu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_uint32x2_t = vqshl_n_u32 (arg0_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vqshl\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshl_nu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshl_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshl_nu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- -- out_uint64x1_t = vqshl_n_u64 (arg0_uint64x1_t, 1); --} -- --/* { dg-final { scan-assembler "vqshl\.u64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshl_nu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshl_nu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshl_nu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_uint8x8_t = vqshl_n_u8 (arg0_uint8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vqshl\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshls16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqshls16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshls16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vqshl_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vqshl\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshls32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqshls32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshls32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vqshl_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vqshl\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshls64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqshls64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshls64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg0_int64x1_t; -- int64x1_t arg1_int64x1_t; -- -- out_int64x1_t = vqshl_s64 (arg0_int64x1_t, arg1_int64x1_t); --} -- --/* { dg-final { scan-assembler "vqshl\.s64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshls8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqshls8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshls8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vqshl_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vqshl\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqshlu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_uint16x4_t = vqshl_u16 (arg0_uint16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vqshl\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqshlu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_uint32x2_t = vqshl_u32 (arg0_uint32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vqshl\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqshlu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- int64x1_t arg1_int64x1_t; -- -- out_uint64x1_t = vqshl_u64 (arg0_uint64x1_t, arg1_int64x1_t); --} -- --/* { dg-final { scan-assembler "vqshl\.u64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqshlu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_uint8x8_t = vqshl_u8 (arg0_uint8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vqshl\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshluQ_ns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshluQ_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshluQ_ns16 (void) --{ -- uint16x8_t out_uint16x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_uint16x8_t = vqshluq_n_s16 (arg0_int16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vqshlu\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshluQ_ns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshluQ_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshluQ_ns32 (void) --{ -- uint32x4_t out_uint32x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_uint32x4_t = vqshluq_n_s32 (arg0_int32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vqshlu\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshluQ_ns64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshluQ_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshluQ_ns64 (void) --{ -- uint64x2_t out_uint64x2_t; -- int64x2_t arg0_int64x2_t; -- -- out_uint64x2_t = vqshluq_n_s64 (arg0_int64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vqshlu\.s64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshluQ_ns8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshluQ_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshluQ_ns8 (void) --{ -- uint8x16_t out_uint8x16_t; -- int8x16_t arg0_int8x16_t; -- -- out_uint8x16_t = vqshluq_n_s8 (arg0_int8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vqshlu\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlu_ns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshlu_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlu_ns16 (void) --{ -- uint16x4_t out_uint16x4_t; -- int16x4_t arg0_int16x4_t; -- -- out_uint16x4_t = vqshlu_n_s16 (arg0_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vqshlu\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlu_ns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshlu_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlu_ns32 (void) --{ -- uint32x2_t out_uint32x2_t; -- int32x2_t arg0_int32x2_t; -- -- out_uint32x2_t = vqshlu_n_s32 (arg0_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vqshlu\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlu_ns64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshlu_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlu_ns64 (void) --{ -- uint64x1_t out_uint64x1_t; -- int64x1_t arg0_int64x1_t; -- -- out_uint64x1_t = vqshlu_n_s64 (arg0_int64x1_t, 1); --} -- --/* { dg-final { scan-assembler "vqshlu\.s64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshlu_ns8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshlu_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshlu_ns8 (void) --{ -- uint8x8_t out_uint8x8_t; -- int8x8_t arg0_int8x8_t; -- -- out_uint8x8_t = vqshlu_n_s8 (arg0_int8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vqshlu\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshrn_ns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshrn_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshrn_ns16 (void) --{ -- int8x8_t out_int8x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_int8x8_t = vqshrn_n_s16 (arg0_int16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vqshrn\.s16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshrn_ns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshrn_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshrn_ns32 (void) --{ -- int16x4_t out_int16x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_int16x4_t = vqshrn_n_s32 (arg0_int32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vqshrn\.s32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshrn_ns64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshrn_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshrn_ns64 (void) --{ -- int32x2_t out_int32x2_t; -- int64x2_t arg0_int64x2_t; -- -- out_int32x2_t = vqshrn_n_s64 (arg0_int64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vqshrn\.s64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshrn_nu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshrn_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshrn_nu16 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_uint8x8_t = vqshrn_n_u16 (arg0_uint16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vqshrn\.u16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshrn_nu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshrn_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshrn_nu32 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_uint16x4_t = vqshrn_n_u32 (arg0_uint32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vqshrn\.u32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshrn_nu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshrn_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshrn_nu64 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_uint32x2_t = vqshrn_n_u64 (arg0_uint64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vqshrn\.u64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshrun_ns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshrun_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshrun_ns16 (void) --{ -- uint8x8_t out_uint8x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_uint8x8_t = vqshrun_n_s16 (arg0_int16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vqshrun\.s16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshrun_ns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshrun_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshrun_ns32 (void) --{ -- uint16x4_t out_uint16x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_uint16x4_t = vqshrun_n_s32 (arg0_int32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vqshrun\.s32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqshrun_ns64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vqshrun_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqshrun_ns64 (void) --{ -- uint32x2_t out_uint32x2_t; -- int64x2_t arg0_int64x2_t; -- -- out_uint32x2_t = vqshrun_n_s64 (arg0_int64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vqshrun\.s64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqsubQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqsubQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqsubQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vqsubq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vqsub\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqsubQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqsubQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqsubQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vqsubq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vqsub\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqsubQs64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqsubQs64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqsubQs64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_int64x2_t = vqsubq_s64 (arg0_int64x2_t, arg1_int64x2_t); --} -- --/* { dg-final { scan-assembler "vqsub\.s64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqsubQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqsubQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqsubQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vqsubq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vqsub\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqsubQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqsubQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqsubQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vqsubq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vqsub\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqsubQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqsubQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqsubQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vqsubq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vqsub\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqsubQu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqsubQu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqsubQu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint64x2_t arg1_uint64x2_t; -- -- out_uint64x2_t = vqsubq_u64 (arg0_uint64x2_t, arg1_uint64x2_t); --} -- --/* { dg-final { scan-assembler "vqsub\.u64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqsubQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqsubQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqsubQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vqsubq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vqsub\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqsubs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqsubs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqsubs16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vqsub_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vqsub\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqsubs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqsubs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqsubs32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vqsub_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vqsub\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqsubs64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqsubs64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqsubs64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg0_int64x1_t; -- int64x1_t arg1_int64x1_t; -- -- out_int64x1_t = vqsub_s64 (arg0_int64x1_t, arg1_int64x1_t); --} -- --/* { dg-final { scan-assembler "vqsub\.s64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqsubs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqsubs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqsubs8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vqsub_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vqsub\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqsubu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqsubu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqsubu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vqsub_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vqsub\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqsubu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqsubu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqsubu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vqsub_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vqsub\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqsubu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqsubu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqsubu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- uint64x1_t arg1_uint64x1_t; -- -- out_uint64x1_t = vqsub_u64 (arg0_uint64x1_t, arg1_uint64x1_t); --} -- --/* { dg-final { scan-assembler "vqsub\.u64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vqsubu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vqsubu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vqsubu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vqsub_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vqsub\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrecpeQf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrecpeQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrecpeQf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- -- out_float32x4_t = vrecpeq_f32 (arg0_float32x4_t); --} -- --/* { dg-final { scan-assembler "vrecpe\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrecpeQu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrecpeQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrecpeQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_uint32x4_t = vrecpeq_u32 (arg0_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vrecpe\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrecpef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrecpef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrecpef32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- -- out_float32x2_t = vrecpe_f32 (arg0_float32x2_t); --} -- --/* { dg-final { scan-assembler "vrecpe\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrecpeu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrecpeu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrecpeu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_uint32x2_t = vrecpe_u32 (arg0_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vrecpe\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrecpsQf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vrecpsQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrecpsQf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- -- out_float32x4_t = vrecpsq_f32 (arg0_float32x4_t, arg1_float32x4_t); --} -- --/* { dg-final { scan-assembler "vrecps\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrecpsf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vrecpsf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrecpsf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_float32x2_t = vrecps_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vrecps\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQf32_p128.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQf32_p128' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQf32_p128 (void) --{ -- float32x4_t out_float32x4_t; -- poly128_t arg0_poly128_t; -- -- out_float32x4_t = vreinterpretq_f32_p128 (arg0_poly128_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQf32_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQf32_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQf32_p16 (void) --{ -- float32x4_t out_float32x4_t; -- poly16x8_t arg0_poly16x8_t; -- -- out_float32x4_t = vreinterpretq_f32_p16 (arg0_poly16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQf32_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQf32_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQf32_p64 (void) --{ -- float32x4_t out_float32x4_t; -- poly64x2_t arg0_poly64x2_t; -- -- out_float32x4_t = vreinterpretq_f32_p64 (arg0_poly64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQf32_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQf32_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQf32_p8 (void) --{ -- float32x4_t out_float32x4_t; -- poly8x16_t arg0_poly8x16_t; -- -- out_float32x4_t = vreinterpretq_f32_p8 (arg0_poly8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQf32_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQf32_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQf32_s16 (void) --{ -- float32x4_t out_float32x4_t; -- int16x8_t arg0_int16x8_t; -- -- out_float32x4_t = vreinterpretq_f32_s16 (arg0_int16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQf32_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQf32_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQf32_s32 (void) --{ -- float32x4_t out_float32x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_float32x4_t = vreinterpretq_f32_s32 (arg0_int32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQf32_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQf32_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQf32_s64 (void) --{ -- float32x4_t out_float32x4_t; -- int64x2_t arg0_int64x2_t; -- -- out_float32x4_t = vreinterpretq_f32_s64 (arg0_int64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQf32_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQf32_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQf32_s8 (void) --{ -- float32x4_t out_float32x4_t; -- int8x16_t arg0_int8x16_t; -- -- out_float32x4_t = vreinterpretq_f32_s8 (arg0_int8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQf32_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQf32_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQf32_u16 (void) --{ -- float32x4_t out_float32x4_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_float32x4_t = vreinterpretq_f32_u16 (arg0_uint16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQf32_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQf32_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQf32_u32 (void) --{ -- float32x4_t out_float32x4_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_float32x4_t = vreinterpretq_f32_u32 (arg0_uint32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQf32_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQf32_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQf32_u64 (void) --{ -- float32x4_t out_float32x4_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_float32x4_t = vreinterpretq_f32_u64 (arg0_uint64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQf32_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQf32_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQf32_u8 (void) --{ -- float32x4_t out_float32x4_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_float32x4_t = vreinterpretq_f32_u8 (arg0_uint8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp128_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp128_f32 (void) --{ -- poly128_t out_poly128_t; -- float32x4_t arg0_float32x4_t; -- -- out_poly128_t = vreinterpretq_p128_f32 (arg0_float32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp128_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp128_p16 (void) --{ -- poly128_t out_poly128_t; -- poly16x8_t arg0_poly16x8_t; -- -- out_poly128_t = vreinterpretq_p128_p16 (arg0_poly16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp128_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp128_p64 (void) --{ -- poly128_t out_poly128_t; -- poly64x2_t arg0_poly64x2_t; -- -- out_poly128_t = vreinterpretq_p128_p64 (arg0_poly64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp128_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp128_p8 (void) --{ -- poly128_t out_poly128_t; -- poly8x16_t arg0_poly8x16_t; -- -- out_poly128_t = vreinterpretq_p128_p8 (arg0_poly8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp128_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp128_s16 (void) --{ -- poly128_t out_poly128_t; -- int16x8_t arg0_int16x8_t; -- -- out_poly128_t = vreinterpretq_p128_s16 (arg0_int16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp128_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp128_s32 (void) --{ -- poly128_t out_poly128_t; -- int32x4_t arg0_int32x4_t; -- -- out_poly128_t = vreinterpretq_p128_s32 (arg0_int32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp128_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp128_s64 (void) --{ -- poly128_t out_poly128_t; -- int64x2_t arg0_int64x2_t; -- -- out_poly128_t = vreinterpretq_p128_s64 (arg0_int64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp128_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp128_s8 (void) --{ -- poly128_t out_poly128_t; -- int8x16_t arg0_int8x16_t; -- -- out_poly128_t = vreinterpretq_p128_s8 (arg0_int8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp128_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp128_u16 (void) --{ -- poly128_t out_poly128_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_poly128_t = vreinterpretq_p128_u16 (arg0_uint16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp128_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp128_u32 (void) --{ -- poly128_t out_poly128_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_poly128_t = vreinterpretq_p128_u32 (arg0_uint32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp128_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp128_u64 (void) --{ -- poly128_t out_poly128_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_poly128_t = vreinterpretq_p128_u64 (arg0_uint64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp128_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp128_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp128_u8 (void) --{ -- poly128_t out_poly128_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_poly128_t = vreinterpretq_p128_u8 (arg0_uint8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp16_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp16_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp16_f32 (void) --{ -- poly16x8_t out_poly16x8_t; -- float32x4_t arg0_float32x4_t; -- -- out_poly16x8_t = vreinterpretq_p16_f32 (arg0_float32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp16_p128.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp16_p128' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp16_p128 (void) --{ -- poly16x8_t out_poly16x8_t; -- poly128_t arg0_poly128_t; -- -- out_poly16x8_t = vreinterpretq_p16_p128 (arg0_poly128_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp16_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp16_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp16_p64 (void) --{ -- poly16x8_t out_poly16x8_t; -- poly64x2_t arg0_poly64x2_t; -- -- out_poly16x8_t = vreinterpretq_p16_p64 (arg0_poly64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp16_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp16_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp16_p8 (void) --{ -- poly16x8_t out_poly16x8_t; -- poly8x16_t arg0_poly8x16_t; -- -- out_poly16x8_t = vreinterpretq_p16_p8 (arg0_poly8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp16_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp16_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp16_s16 (void) --{ -- poly16x8_t out_poly16x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_poly16x8_t = vreinterpretq_p16_s16 (arg0_int16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp16_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp16_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp16_s32 (void) --{ -- poly16x8_t out_poly16x8_t; -- int32x4_t arg0_int32x4_t; -- -- out_poly16x8_t = vreinterpretq_p16_s32 (arg0_int32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp16_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp16_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp16_s64 (void) --{ -- poly16x8_t out_poly16x8_t; -- int64x2_t arg0_int64x2_t; -- -- out_poly16x8_t = vreinterpretq_p16_s64 (arg0_int64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp16_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp16_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp16_s8 (void) --{ -- poly16x8_t out_poly16x8_t; -- int8x16_t arg0_int8x16_t; -- -- out_poly16x8_t = vreinterpretq_p16_s8 (arg0_int8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp16_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp16_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp16_u16 (void) --{ -- poly16x8_t out_poly16x8_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_poly16x8_t = vreinterpretq_p16_u16 (arg0_uint16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp16_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp16_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp16_u32 (void) --{ -- poly16x8_t out_poly16x8_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_poly16x8_t = vreinterpretq_p16_u32 (arg0_uint32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp16_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp16_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp16_u64 (void) --{ -- poly16x8_t out_poly16x8_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_poly16x8_t = vreinterpretq_p16_u64 (arg0_uint64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp16_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp16_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp16_u8 (void) --{ -- poly16x8_t out_poly16x8_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_poly16x8_t = vreinterpretq_p16_u8 (arg0_uint8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp64_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp64_f32 (void) --{ -- poly64x2_t out_poly64x2_t; -- float32x4_t arg0_float32x4_t; -- -- out_poly64x2_t = vreinterpretq_p64_f32 (arg0_float32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_p128.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp64_p128' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp64_p128 (void) --{ -- poly64x2_t out_poly64x2_t; -- poly128_t arg0_poly128_t; -- -- out_poly64x2_t = vreinterpretq_p64_p128 (arg0_poly128_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp64_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp64_p16 (void) --{ -- poly64x2_t out_poly64x2_t; -- poly16x8_t arg0_poly16x8_t; -- -- out_poly64x2_t = vreinterpretq_p64_p16 (arg0_poly16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp64_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp64_p8 (void) --{ -- poly64x2_t out_poly64x2_t; -- poly8x16_t arg0_poly8x16_t; -- -- out_poly64x2_t = vreinterpretq_p64_p8 (arg0_poly8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp64_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp64_s16 (void) --{ -- poly64x2_t out_poly64x2_t; -- int16x8_t arg0_int16x8_t; -- -- out_poly64x2_t = vreinterpretq_p64_s16 (arg0_int16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp64_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp64_s32 (void) --{ -- poly64x2_t out_poly64x2_t; -- int32x4_t arg0_int32x4_t; -- -- out_poly64x2_t = vreinterpretq_p64_s32 (arg0_int32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp64_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp64_s64 (void) --{ -- poly64x2_t out_poly64x2_t; -- int64x2_t arg0_int64x2_t; -- -- out_poly64x2_t = vreinterpretq_p64_s64 (arg0_int64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp64_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp64_s8 (void) --{ -- poly64x2_t out_poly64x2_t; -- int8x16_t arg0_int8x16_t; -- -- out_poly64x2_t = vreinterpretq_p64_s8 (arg0_int8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp64_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp64_u16 (void) --{ -- poly64x2_t out_poly64x2_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_poly64x2_t = vreinterpretq_p64_u16 (arg0_uint16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp64_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp64_u32 (void) --{ -- poly64x2_t out_poly64x2_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_poly64x2_t = vreinterpretq_p64_u32 (arg0_uint32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp64_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp64_u64 (void) --{ -- poly64x2_t out_poly64x2_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_poly64x2_t = vreinterpretq_p64_u64 (arg0_uint64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp64_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp64_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp64_u8 (void) --{ -- poly64x2_t out_poly64x2_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_poly64x2_t = vreinterpretq_p64_u8 (arg0_uint8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp8_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp8_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp8_f32 (void) --{ -- poly8x16_t out_poly8x16_t; -- float32x4_t arg0_float32x4_t; -- -- out_poly8x16_t = vreinterpretq_p8_f32 (arg0_float32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp8_p128.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp8_p128' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp8_p128 (void) --{ -- poly8x16_t out_poly8x16_t; -- poly128_t arg0_poly128_t; -- -- out_poly8x16_t = vreinterpretq_p8_p128 (arg0_poly128_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp8_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp8_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp8_p16 (void) --{ -- poly8x16_t out_poly8x16_t; -- poly16x8_t arg0_poly16x8_t; -- -- out_poly8x16_t = vreinterpretq_p8_p16 (arg0_poly16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp8_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp8_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp8_p64 (void) --{ -- poly8x16_t out_poly8x16_t; -- poly64x2_t arg0_poly64x2_t; -- -- out_poly8x16_t = vreinterpretq_p8_p64 (arg0_poly64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp8_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp8_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp8_s16 (void) --{ -- poly8x16_t out_poly8x16_t; -- int16x8_t arg0_int16x8_t; -- -- out_poly8x16_t = vreinterpretq_p8_s16 (arg0_int16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp8_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp8_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp8_s32 (void) --{ -- poly8x16_t out_poly8x16_t; -- int32x4_t arg0_int32x4_t; -- -- out_poly8x16_t = vreinterpretq_p8_s32 (arg0_int32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp8_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp8_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp8_s64 (void) --{ -- poly8x16_t out_poly8x16_t; -- int64x2_t arg0_int64x2_t; -- -- out_poly8x16_t = vreinterpretq_p8_s64 (arg0_int64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp8_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp8_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp8_s8 (void) --{ -- poly8x16_t out_poly8x16_t; -- int8x16_t arg0_int8x16_t; -- -- out_poly8x16_t = vreinterpretq_p8_s8 (arg0_int8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp8_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp8_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp8_u16 (void) --{ -- poly8x16_t out_poly8x16_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_poly8x16_t = vreinterpretq_p8_u16 (arg0_uint16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp8_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp8_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp8_u32 (void) --{ -- poly8x16_t out_poly8x16_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_poly8x16_t = vreinterpretq_p8_u32 (arg0_uint32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp8_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp8_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp8_u64 (void) --{ -- poly8x16_t out_poly8x16_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_poly8x16_t = vreinterpretq_p8_u64 (arg0_uint64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQp8_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQp8_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQp8_u8 (void) --{ -- poly8x16_t out_poly8x16_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_poly8x16_t = vreinterpretq_p8_u8 (arg0_uint8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs16_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs16_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs16_f32 (void) --{ -- int16x8_t out_int16x8_t; -- float32x4_t arg0_float32x4_t; -- -- out_int16x8_t = vreinterpretq_s16_f32 (arg0_float32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs16_p128.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs16_p128' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs16_p128 (void) --{ -- int16x8_t out_int16x8_t; -- poly128_t arg0_poly128_t; -- -- out_int16x8_t = vreinterpretq_s16_p128 (arg0_poly128_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs16_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs16_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs16_p16 (void) --{ -- int16x8_t out_int16x8_t; -- poly16x8_t arg0_poly16x8_t; -- -- out_int16x8_t = vreinterpretq_s16_p16 (arg0_poly16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs16_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs16_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs16_p64 (void) --{ -- int16x8_t out_int16x8_t; -- poly64x2_t arg0_poly64x2_t; -- -- out_int16x8_t = vreinterpretq_s16_p64 (arg0_poly64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs16_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs16_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs16_p8 (void) --{ -- int16x8_t out_int16x8_t; -- poly8x16_t arg0_poly8x16_t; -- -- out_int16x8_t = vreinterpretq_s16_p8 (arg0_poly8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs16_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs16_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs16_s32 (void) --{ -- int16x8_t out_int16x8_t; -- int32x4_t arg0_int32x4_t; -- -- out_int16x8_t = vreinterpretq_s16_s32 (arg0_int32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs16_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs16_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs16_s64 (void) --{ -- int16x8_t out_int16x8_t; -- int64x2_t arg0_int64x2_t; -- -- out_int16x8_t = vreinterpretq_s16_s64 (arg0_int64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs16_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs16_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs16_s8 (void) --{ -- int16x8_t out_int16x8_t; -- int8x16_t arg0_int8x16_t; -- -- out_int16x8_t = vreinterpretq_s16_s8 (arg0_int8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs16_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs16_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs16_u16 (void) --{ -- int16x8_t out_int16x8_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_int16x8_t = vreinterpretq_s16_u16 (arg0_uint16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs16_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs16_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs16_u32 (void) --{ -- int16x8_t out_int16x8_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_int16x8_t = vreinterpretq_s16_u32 (arg0_uint32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs16_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs16_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs16_u64 (void) --{ -- int16x8_t out_int16x8_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_int16x8_t = vreinterpretq_s16_u64 (arg0_uint64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs16_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs16_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs16_u8 (void) --{ -- int16x8_t out_int16x8_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_int16x8_t = vreinterpretq_s16_u8 (arg0_uint8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs32_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs32_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs32_f32 (void) --{ -- int32x4_t out_int32x4_t; -- float32x4_t arg0_float32x4_t; -- -- out_int32x4_t = vreinterpretq_s32_f32 (arg0_float32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs32_p128.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs32_p128' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs32_p128 (void) --{ -- int32x4_t out_int32x4_t; -- poly128_t arg0_poly128_t; -- -- out_int32x4_t = vreinterpretq_s32_p128 (arg0_poly128_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs32_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs32_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs32_p16 (void) --{ -- int32x4_t out_int32x4_t; -- poly16x8_t arg0_poly16x8_t; -- -- out_int32x4_t = vreinterpretq_s32_p16 (arg0_poly16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs32_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs32_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs32_p64 (void) --{ -- int32x4_t out_int32x4_t; -- poly64x2_t arg0_poly64x2_t; -- -- out_int32x4_t = vreinterpretq_s32_p64 (arg0_poly64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs32_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs32_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs32_p8 (void) --{ -- int32x4_t out_int32x4_t; -- poly8x16_t arg0_poly8x16_t; -- -- out_int32x4_t = vreinterpretq_s32_p8 (arg0_poly8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs32_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs32_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs32_s16 (void) --{ -- int32x4_t out_int32x4_t; -- int16x8_t arg0_int16x8_t; -- -- out_int32x4_t = vreinterpretq_s32_s16 (arg0_int16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs32_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs32_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs32_s64 (void) --{ -- int32x4_t out_int32x4_t; -- int64x2_t arg0_int64x2_t; -- -- out_int32x4_t = vreinterpretq_s32_s64 (arg0_int64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs32_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs32_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs32_s8 (void) --{ -- int32x4_t out_int32x4_t; -- int8x16_t arg0_int8x16_t; -- -- out_int32x4_t = vreinterpretq_s32_s8 (arg0_int8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs32_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs32_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs32_u16 (void) --{ -- int32x4_t out_int32x4_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_int32x4_t = vreinterpretq_s32_u16 (arg0_uint16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs32_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs32_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs32_u32 (void) --{ -- int32x4_t out_int32x4_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_int32x4_t = vreinterpretq_s32_u32 (arg0_uint32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs32_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs32_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs32_u64 (void) --{ -- int32x4_t out_int32x4_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_int32x4_t = vreinterpretq_s32_u64 (arg0_uint64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs32_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs32_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs32_u8 (void) --{ -- int32x4_t out_int32x4_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_int32x4_t = vreinterpretq_s32_u8 (arg0_uint8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs64_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs64_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs64_f32 (void) --{ -- int64x2_t out_int64x2_t; -- float32x4_t arg0_float32x4_t; -- -- out_int64x2_t = vreinterpretq_s64_f32 (arg0_float32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs64_p128.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs64_p128' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs64_p128 (void) --{ -- int64x2_t out_int64x2_t; -- poly128_t arg0_poly128_t; -- -- out_int64x2_t = vreinterpretq_s64_p128 (arg0_poly128_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs64_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs64_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs64_p16 (void) --{ -- int64x2_t out_int64x2_t; -- poly16x8_t arg0_poly16x8_t; -- -- out_int64x2_t = vreinterpretq_s64_p16 (arg0_poly16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs64_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs64_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs64_p64 (void) --{ -- int64x2_t out_int64x2_t; -- poly64x2_t arg0_poly64x2_t; -- -- out_int64x2_t = vreinterpretq_s64_p64 (arg0_poly64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs64_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs64_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs64_p8 (void) --{ -- int64x2_t out_int64x2_t; -- poly8x16_t arg0_poly8x16_t; -- -- out_int64x2_t = vreinterpretq_s64_p8 (arg0_poly8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs64_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs64_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs64_s16 (void) --{ -- int64x2_t out_int64x2_t; -- int16x8_t arg0_int16x8_t; -- -- out_int64x2_t = vreinterpretq_s64_s16 (arg0_int16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs64_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs64_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs64_s32 (void) --{ -- int64x2_t out_int64x2_t; -- int32x4_t arg0_int32x4_t; -- -- out_int64x2_t = vreinterpretq_s64_s32 (arg0_int32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs64_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs64_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs64_s8 (void) --{ -- int64x2_t out_int64x2_t; -- int8x16_t arg0_int8x16_t; -- -- out_int64x2_t = vreinterpretq_s64_s8 (arg0_int8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs64_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs64_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs64_u16 (void) --{ -- int64x2_t out_int64x2_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_int64x2_t = vreinterpretq_s64_u16 (arg0_uint16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs64_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs64_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs64_u32 (void) --{ -- int64x2_t out_int64x2_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_int64x2_t = vreinterpretq_s64_u32 (arg0_uint32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs64_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs64_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs64_u64 (void) --{ -- int64x2_t out_int64x2_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_int64x2_t = vreinterpretq_s64_u64 (arg0_uint64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs64_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs64_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs64_u8 (void) --{ -- int64x2_t out_int64x2_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_int64x2_t = vreinterpretq_s64_u8 (arg0_uint8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs8_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs8_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs8_f32 (void) --{ -- int8x16_t out_int8x16_t; -- float32x4_t arg0_float32x4_t; -- -- out_int8x16_t = vreinterpretq_s8_f32 (arg0_float32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs8_p128.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs8_p128' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs8_p128 (void) --{ -- int8x16_t out_int8x16_t; -- poly128_t arg0_poly128_t; -- -- out_int8x16_t = vreinterpretq_s8_p128 (arg0_poly128_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs8_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs8_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs8_p16 (void) --{ -- int8x16_t out_int8x16_t; -- poly16x8_t arg0_poly16x8_t; -- -- out_int8x16_t = vreinterpretq_s8_p16 (arg0_poly16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs8_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs8_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs8_p64 (void) --{ -- int8x16_t out_int8x16_t; -- poly64x2_t arg0_poly64x2_t; -- -- out_int8x16_t = vreinterpretq_s8_p64 (arg0_poly64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs8_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs8_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs8_p8 (void) --{ -- int8x16_t out_int8x16_t; -- poly8x16_t arg0_poly8x16_t; -- -- out_int8x16_t = vreinterpretq_s8_p8 (arg0_poly8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs8_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs8_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs8_s16 (void) --{ -- int8x16_t out_int8x16_t; -- int16x8_t arg0_int16x8_t; -- -- out_int8x16_t = vreinterpretq_s8_s16 (arg0_int16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs8_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs8_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs8_s32 (void) --{ -- int8x16_t out_int8x16_t; -- int32x4_t arg0_int32x4_t; -- -- out_int8x16_t = vreinterpretq_s8_s32 (arg0_int32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs8_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs8_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs8_s64 (void) --{ -- int8x16_t out_int8x16_t; -- int64x2_t arg0_int64x2_t; -- -- out_int8x16_t = vreinterpretq_s8_s64 (arg0_int64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs8_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs8_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs8_u16 (void) --{ -- int8x16_t out_int8x16_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_int8x16_t = vreinterpretq_s8_u16 (arg0_uint16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs8_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs8_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs8_u32 (void) --{ -- int8x16_t out_int8x16_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_int8x16_t = vreinterpretq_s8_u32 (arg0_uint32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs8_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs8_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs8_u64 (void) --{ -- int8x16_t out_int8x16_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_int8x16_t = vreinterpretq_s8_u64 (arg0_uint64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQs8_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQs8_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQs8_u8 (void) --{ -- int8x16_t out_int8x16_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_int8x16_t = vreinterpretq_s8_u8 (arg0_uint8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu16_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu16_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu16_f32 (void) --{ -- uint16x8_t out_uint16x8_t; -- float32x4_t arg0_float32x4_t; -- -- out_uint16x8_t = vreinterpretq_u16_f32 (arg0_float32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu16_p128.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu16_p128' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu16_p128 (void) --{ -- uint16x8_t out_uint16x8_t; -- poly128_t arg0_poly128_t; -- -- out_uint16x8_t = vreinterpretq_u16_p128 (arg0_poly128_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu16_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu16_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu16_p16 (void) --{ -- uint16x8_t out_uint16x8_t; -- poly16x8_t arg0_poly16x8_t; -- -- out_uint16x8_t = vreinterpretq_u16_p16 (arg0_poly16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu16_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu16_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu16_p64 (void) --{ -- uint16x8_t out_uint16x8_t; -- poly64x2_t arg0_poly64x2_t; -- -- out_uint16x8_t = vreinterpretq_u16_p64 (arg0_poly64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu16_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu16_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu16_p8 (void) --{ -- uint16x8_t out_uint16x8_t; -- poly8x16_t arg0_poly8x16_t; -- -- out_uint16x8_t = vreinterpretq_u16_p8 (arg0_poly8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu16_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu16_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu16_s16 (void) --{ -- uint16x8_t out_uint16x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_uint16x8_t = vreinterpretq_u16_s16 (arg0_int16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu16_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu16_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu16_s32 (void) --{ -- uint16x8_t out_uint16x8_t; -- int32x4_t arg0_int32x4_t; -- -- out_uint16x8_t = vreinterpretq_u16_s32 (arg0_int32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu16_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu16_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu16_s64 (void) --{ -- uint16x8_t out_uint16x8_t; -- int64x2_t arg0_int64x2_t; -- -- out_uint16x8_t = vreinterpretq_u16_s64 (arg0_int64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu16_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu16_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu16_s8 (void) --{ -- uint16x8_t out_uint16x8_t; -- int8x16_t arg0_int8x16_t; -- -- out_uint16x8_t = vreinterpretq_u16_s8 (arg0_int8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu16_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu16_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu16_u32 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_uint16x8_t = vreinterpretq_u16_u32 (arg0_uint32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu16_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu16_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu16_u64 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_uint16x8_t = vreinterpretq_u16_u64 (arg0_uint64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu16_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu16_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu16_u8 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_uint16x8_t = vreinterpretq_u16_u8 (arg0_uint8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu32_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu32_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu32_f32 (void) --{ -- uint32x4_t out_uint32x4_t; -- float32x4_t arg0_float32x4_t; -- -- out_uint32x4_t = vreinterpretq_u32_f32 (arg0_float32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu32_p128.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu32_p128' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu32_p128 (void) --{ -- uint32x4_t out_uint32x4_t; -- poly128_t arg0_poly128_t; -- -- out_uint32x4_t = vreinterpretq_u32_p128 (arg0_poly128_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu32_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu32_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu32_p16 (void) --{ -- uint32x4_t out_uint32x4_t; -- poly16x8_t arg0_poly16x8_t; -- -- out_uint32x4_t = vreinterpretq_u32_p16 (arg0_poly16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu32_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu32_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu32_p64 (void) --{ -- uint32x4_t out_uint32x4_t; -- poly64x2_t arg0_poly64x2_t; -- -- out_uint32x4_t = vreinterpretq_u32_p64 (arg0_poly64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu32_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu32_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu32_p8 (void) --{ -- uint32x4_t out_uint32x4_t; -- poly8x16_t arg0_poly8x16_t; -- -- out_uint32x4_t = vreinterpretq_u32_p8 (arg0_poly8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu32_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu32_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu32_s16 (void) --{ -- uint32x4_t out_uint32x4_t; -- int16x8_t arg0_int16x8_t; -- -- out_uint32x4_t = vreinterpretq_u32_s16 (arg0_int16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu32_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu32_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu32_s32 (void) --{ -- uint32x4_t out_uint32x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_uint32x4_t = vreinterpretq_u32_s32 (arg0_int32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu32_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu32_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu32_s64 (void) --{ -- uint32x4_t out_uint32x4_t; -- int64x2_t arg0_int64x2_t; -- -- out_uint32x4_t = vreinterpretq_u32_s64 (arg0_int64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu32_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu32_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu32_s8 (void) --{ -- uint32x4_t out_uint32x4_t; -- int8x16_t arg0_int8x16_t; -- -- out_uint32x4_t = vreinterpretq_u32_s8 (arg0_int8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu32_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu32_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu32_u16 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_uint32x4_t = vreinterpretq_u32_u16 (arg0_uint16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu32_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu32_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu32_u64 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_uint32x4_t = vreinterpretq_u32_u64 (arg0_uint64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu32_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu32_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu32_u8 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_uint32x4_t = vreinterpretq_u32_u8 (arg0_uint8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu64_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu64_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu64_f32 (void) --{ -- uint64x2_t out_uint64x2_t; -- float32x4_t arg0_float32x4_t; -- -- out_uint64x2_t = vreinterpretq_u64_f32 (arg0_float32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu64_p128.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu64_p128' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu64_p128 (void) --{ -- uint64x2_t out_uint64x2_t; -- poly128_t arg0_poly128_t; -- -- out_uint64x2_t = vreinterpretq_u64_p128 (arg0_poly128_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu64_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu64_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu64_p16 (void) --{ -- uint64x2_t out_uint64x2_t; -- poly16x8_t arg0_poly16x8_t; -- -- out_uint64x2_t = vreinterpretq_u64_p16 (arg0_poly16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu64_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu64_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu64_p64 (void) --{ -- uint64x2_t out_uint64x2_t; -- poly64x2_t arg0_poly64x2_t; -- -- out_uint64x2_t = vreinterpretq_u64_p64 (arg0_poly64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu64_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu64_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu64_p8 (void) --{ -- uint64x2_t out_uint64x2_t; -- poly8x16_t arg0_poly8x16_t; -- -- out_uint64x2_t = vreinterpretq_u64_p8 (arg0_poly8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu64_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu64_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu64_s16 (void) --{ -- uint64x2_t out_uint64x2_t; -- int16x8_t arg0_int16x8_t; -- -- out_uint64x2_t = vreinterpretq_u64_s16 (arg0_int16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu64_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu64_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu64_s32 (void) --{ -- uint64x2_t out_uint64x2_t; -- int32x4_t arg0_int32x4_t; -- -- out_uint64x2_t = vreinterpretq_u64_s32 (arg0_int32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu64_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu64_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu64_s64 (void) --{ -- uint64x2_t out_uint64x2_t; -- int64x2_t arg0_int64x2_t; -- -- out_uint64x2_t = vreinterpretq_u64_s64 (arg0_int64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu64_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu64_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu64_s8 (void) --{ -- uint64x2_t out_uint64x2_t; -- int8x16_t arg0_int8x16_t; -- -- out_uint64x2_t = vreinterpretq_u64_s8 (arg0_int8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu64_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu64_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu64_u16 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_uint64x2_t = vreinterpretq_u64_u16 (arg0_uint16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu64_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu64_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu64_u32 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_uint64x2_t = vreinterpretq_u64_u32 (arg0_uint32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu64_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu64_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu64_u8 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_uint64x2_t = vreinterpretq_u64_u8 (arg0_uint8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu8_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu8_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu8_f32 (void) --{ -- uint8x16_t out_uint8x16_t; -- float32x4_t arg0_float32x4_t; -- -- out_uint8x16_t = vreinterpretq_u8_f32 (arg0_float32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu8_p128.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu8_p128' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu8_p128 (void) --{ -- uint8x16_t out_uint8x16_t; -- poly128_t arg0_poly128_t; -- -- out_uint8x16_t = vreinterpretq_u8_p128 (arg0_poly128_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu8_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu8_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu8_p16 (void) --{ -- uint8x16_t out_uint8x16_t; -- poly16x8_t arg0_poly16x8_t; -- -- out_uint8x16_t = vreinterpretq_u8_p16 (arg0_poly16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu8_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu8_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu8_p64 (void) --{ -- uint8x16_t out_uint8x16_t; -- poly64x2_t arg0_poly64x2_t; -- -- out_uint8x16_t = vreinterpretq_u8_p64 (arg0_poly64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu8_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu8_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu8_p8 (void) --{ -- uint8x16_t out_uint8x16_t; -- poly8x16_t arg0_poly8x16_t; -- -- out_uint8x16_t = vreinterpretq_u8_p8 (arg0_poly8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu8_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu8_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu8_s16 (void) --{ -- uint8x16_t out_uint8x16_t; -- int16x8_t arg0_int16x8_t; -- -- out_uint8x16_t = vreinterpretq_u8_s16 (arg0_int16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu8_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu8_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu8_s32 (void) --{ -- uint8x16_t out_uint8x16_t; -- int32x4_t arg0_int32x4_t; -- -- out_uint8x16_t = vreinterpretq_u8_s32 (arg0_int32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu8_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu8_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu8_s64 (void) --{ -- uint8x16_t out_uint8x16_t; -- int64x2_t arg0_int64x2_t; -- -- out_uint8x16_t = vreinterpretq_u8_s64 (arg0_int64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu8_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu8_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu8_s8 (void) --{ -- uint8x16_t out_uint8x16_t; -- int8x16_t arg0_int8x16_t; -- -- out_uint8x16_t = vreinterpretq_u8_s8 (arg0_int8x16_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu8_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu8_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu8_u16 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_uint8x16_t = vreinterpretq_u8_u16 (arg0_uint16x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu8_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu8_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu8_u32 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_uint8x16_t = vreinterpretq_u8_u32 (arg0_uint32x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretQu8_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretQu8_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretQu8_u64 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_uint8x16_t = vreinterpretq_u8_u64 (arg0_uint64x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretf32_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretf32_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretf32_p16 (void) --{ -- float32x2_t out_float32x2_t; -- poly16x4_t arg0_poly16x4_t; -- -- out_float32x2_t = vreinterpret_f32_p16 (arg0_poly16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretf32_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretf32_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretf32_p64 (void) --{ -- float32x2_t out_float32x2_t; -- poly64x1_t arg0_poly64x1_t; -- -- out_float32x2_t = vreinterpret_f32_p64 (arg0_poly64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretf32_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretf32_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretf32_p8 (void) --{ -- float32x2_t out_float32x2_t; -- poly8x8_t arg0_poly8x8_t; -- -- out_float32x2_t = vreinterpret_f32_p8 (arg0_poly8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretf32_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretf32_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretf32_s16 (void) --{ -- float32x2_t out_float32x2_t; -- int16x4_t arg0_int16x4_t; -- -- out_float32x2_t = vreinterpret_f32_s16 (arg0_int16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretf32_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretf32_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretf32_s32 (void) --{ -- float32x2_t out_float32x2_t; -- int32x2_t arg0_int32x2_t; -- -- out_float32x2_t = vreinterpret_f32_s32 (arg0_int32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretf32_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretf32_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretf32_s64 (void) --{ -- float32x2_t out_float32x2_t; -- int64x1_t arg0_int64x1_t; -- -- out_float32x2_t = vreinterpret_f32_s64 (arg0_int64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretf32_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretf32_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretf32_s8 (void) --{ -- float32x2_t out_float32x2_t; -- int8x8_t arg0_int8x8_t; -- -- out_float32x2_t = vreinterpret_f32_s8 (arg0_int8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretf32_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretf32_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretf32_u16 (void) --{ -- float32x2_t out_float32x2_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_float32x2_t = vreinterpret_f32_u16 (arg0_uint16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretf32_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretf32_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretf32_u32 (void) --{ -- float32x2_t out_float32x2_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_float32x2_t = vreinterpret_f32_u32 (arg0_uint32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretf32_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretf32_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretf32_u64 (void) --{ -- float32x2_t out_float32x2_t; -- uint64x1_t arg0_uint64x1_t; -- -- out_float32x2_t = vreinterpret_f32_u64 (arg0_uint64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretf32_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretf32_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretf32_u8 (void) --{ -- float32x2_t out_float32x2_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_float32x2_t = vreinterpret_f32_u8 (arg0_uint8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp16_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp16_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp16_f32 (void) --{ -- poly16x4_t out_poly16x4_t; -- float32x2_t arg0_float32x2_t; -- -- out_poly16x4_t = vreinterpret_p16_f32 (arg0_float32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp16_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp16_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp16_p64 (void) --{ -- poly16x4_t out_poly16x4_t; -- poly64x1_t arg0_poly64x1_t; -- -- out_poly16x4_t = vreinterpret_p16_p64 (arg0_poly64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp16_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp16_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp16_p8 (void) --{ -- poly16x4_t out_poly16x4_t; -- poly8x8_t arg0_poly8x8_t; -- -- out_poly16x4_t = vreinterpret_p16_p8 (arg0_poly8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp16_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp16_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp16_s16 (void) --{ -- poly16x4_t out_poly16x4_t; -- int16x4_t arg0_int16x4_t; -- -- out_poly16x4_t = vreinterpret_p16_s16 (arg0_int16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp16_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp16_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp16_s32 (void) --{ -- poly16x4_t out_poly16x4_t; -- int32x2_t arg0_int32x2_t; -- -- out_poly16x4_t = vreinterpret_p16_s32 (arg0_int32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp16_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp16_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp16_s64 (void) --{ -- poly16x4_t out_poly16x4_t; -- int64x1_t arg0_int64x1_t; -- -- out_poly16x4_t = vreinterpret_p16_s64 (arg0_int64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp16_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp16_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp16_s8 (void) --{ -- poly16x4_t out_poly16x4_t; -- int8x8_t arg0_int8x8_t; -- -- out_poly16x4_t = vreinterpret_p16_s8 (arg0_int8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp16_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp16_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp16_u16 (void) --{ -- poly16x4_t out_poly16x4_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_poly16x4_t = vreinterpret_p16_u16 (arg0_uint16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp16_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp16_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp16_u32 (void) --{ -- poly16x4_t out_poly16x4_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_poly16x4_t = vreinterpret_p16_u32 (arg0_uint32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp16_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp16_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp16_u64 (void) --{ -- poly16x4_t out_poly16x4_t; -- uint64x1_t arg0_uint64x1_t; -- -- out_poly16x4_t = vreinterpret_p16_u64 (arg0_uint64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp16_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp16_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp16_u8 (void) --{ -- poly16x4_t out_poly16x4_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_poly16x4_t = vreinterpret_p16_u8 (arg0_uint8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp64_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp64_f32 (void) --{ -- poly64x1_t out_poly64x1_t; -- float32x2_t arg0_float32x2_t; -- -- out_poly64x1_t = vreinterpret_p64_f32 (arg0_float32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp64_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp64_p16 (void) --{ -- poly64x1_t out_poly64x1_t; -- poly16x4_t arg0_poly16x4_t; -- -- out_poly64x1_t = vreinterpret_p64_p16 (arg0_poly16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp64_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp64_p8 (void) --{ -- poly64x1_t out_poly64x1_t; -- poly8x8_t arg0_poly8x8_t; -- -- out_poly64x1_t = vreinterpret_p64_p8 (arg0_poly8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp64_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp64_s16 (void) --{ -- poly64x1_t out_poly64x1_t; -- int16x4_t arg0_int16x4_t; -- -- out_poly64x1_t = vreinterpret_p64_s16 (arg0_int16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp64_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp64_s32 (void) --{ -- poly64x1_t out_poly64x1_t; -- int32x2_t arg0_int32x2_t; -- -- out_poly64x1_t = vreinterpret_p64_s32 (arg0_int32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp64_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp64_s64 (void) --{ -- poly64x1_t out_poly64x1_t; -- int64x1_t arg0_int64x1_t; -- -- out_poly64x1_t = vreinterpret_p64_s64 (arg0_int64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp64_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp64_s8 (void) --{ -- poly64x1_t out_poly64x1_t; -- int8x8_t arg0_int8x8_t; -- -- out_poly64x1_t = vreinterpret_p64_s8 (arg0_int8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp64_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp64_u16 (void) --{ -- poly64x1_t out_poly64x1_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_poly64x1_t = vreinterpret_p64_u16 (arg0_uint16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp64_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp64_u32 (void) --{ -- poly64x1_t out_poly64x1_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_poly64x1_t = vreinterpret_p64_u32 (arg0_uint32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp64_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp64_u64 (void) --{ -- poly64x1_t out_poly64x1_t; -- uint64x1_t arg0_uint64x1_t; -- -- out_poly64x1_t = vreinterpret_p64_u64 (arg0_uint64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp64_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp64_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp64_u8 (void) --{ -- poly64x1_t out_poly64x1_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_poly64x1_t = vreinterpret_p64_u8 (arg0_uint8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp8_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp8_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp8_f32 (void) --{ -- poly8x8_t out_poly8x8_t; -- float32x2_t arg0_float32x2_t; -- -- out_poly8x8_t = vreinterpret_p8_f32 (arg0_float32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp8_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp8_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp8_p16 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly16x4_t arg0_poly16x4_t; -- -- out_poly8x8_t = vreinterpret_p8_p16 (arg0_poly16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp8_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp8_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp8_p64 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly64x1_t arg0_poly64x1_t; -- -- out_poly8x8_t = vreinterpret_p8_p64 (arg0_poly64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp8_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp8_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp8_s16 (void) --{ -- poly8x8_t out_poly8x8_t; -- int16x4_t arg0_int16x4_t; -- -- out_poly8x8_t = vreinterpret_p8_s16 (arg0_int16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp8_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp8_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp8_s32 (void) --{ -- poly8x8_t out_poly8x8_t; -- int32x2_t arg0_int32x2_t; -- -- out_poly8x8_t = vreinterpret_p8_s32 (arg0_int32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp8_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp8_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp8_s64 (void) --{ -- poly8x8_t out_poly8x8_t; -- int64x1_t arg0_int64x1_t; -- -- out_poly8x8_t = vreinterpret_p8_s64 (arg0_int64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp8_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp8_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp8_s8 (void) --{ -- poly8x8_t out_poly8x8_t; -- int8x8_t arg0_int8x8_t; -- -- out_poly8x8_t = vreinterpret_p8_s8 (arg0_int8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp8_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp8_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp8_u16 (void) --{ -- poly8x8_t out_poly8x8_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_poly8x8_t = vreinterpret_p8_u16 (arg0_uint16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp8_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp8_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp8_u32 (void) --{ -- poly8x8_t out_poly8x8_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_poly8x8_t = vreinterpret_p8_u32 (arg0_uint32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp8_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp8_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp8_u64 (void) --{ -- poly8x8_t out_poly8x8_t; -- uint64x1_t arg0_uint64x1_t; -- -- out_poly8x8_t = vreinterpret_p8_u64 (arg0_uint64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretp8_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretp8_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretp8_u8 (void) --{ -- poly8x8_t out_poly8x8_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_poly8x8_t = vreinterpret_p8_u8 (arg0_uint8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets16_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets16_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets16_f32 (void) --{ -- int16x4_t out_int16x4_t; -- float32x2_t arg0_float32x2_t; -- -- out_int16x4_t = vreinterpret_s16_f32 (arg0_float32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets16_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets16_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets16_p16 (void) --{ -- int16x4_t out_int16x4_t; -- poly16x4_t arg0_poly16x4_t; -- -- out_int16x4_t = vreinterpret_s16_p16 (arg0_poly16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets16_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets16_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterprets16_p64 (void) --{ -- int16x4_t out_int16x4_t; -- poly64x1_t arg0_poly64x1_t; -- -- out_int16x4_t = vreinterpret_s16_p64 (arg0_poly64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets16_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets16_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets16_p8 (void) --{ -- int16x4_t out_int16x4_t; -- poly8x8_t arg0_poly8x8_t; -- -- out_int16x4_t = vreinterpret_s16_p8 (arg0_poly8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets16_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets16_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets16_s32 (void) --{ -- int16x4_t out_int16x4_t; -- int32x2_t arg0_int32x2_t; -- -- out_int16x4_t = vreinterpret_s16_s32 (arg0_int32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets16_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets16_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets16_s64 (void) --{ -- int16x4_t out_int16x4_t; -- int64x1_t arg0_int64x1_t; -- -- out_int16x4_t = vreinterpret_s16_s64 (arg0_int64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets16_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets16_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets16_s8 (void) --{ -- int16x4_t out_int16x4_t; -- int8x8_t arg0_int8x8_t; -- -- out_int16x4_t = vreinterpret_s16_s8 (arg0_int8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets16_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets16_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets16_u16 (void) --{ -- int16x4_t out_int16x4_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_int16x4_t = vreinterpret_s16_u16 (arg0_uint16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets16_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets16_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets16_u32 (void) --{ -- int16x4_t out_int16x4_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_int16x4_t = vreinterpret_s16_u32 (arg0_uint32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets16_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets16_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets16_u64 (void) --{ -- int16x4_t out_int16x4_t; -- uint64x1_t arg0_uint64x1_t; -- -- out_int16x4_t = vreinterpret_s16_u64 (arg0_uint64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets16_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets16_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets16_u8 (void) --{ -- int16x4_t out_int16x4_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_int16x4_t = vreinterpret_s16_u8 (arg0_uint8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets32_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets32_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets32_f32 (void) --{ -- int32x2_t out_int32x2_t; -- float32x2_t arg0_float32x2_t; -- -- out_int32x2_t = vreinterpret_s32_f32 (arg0_float32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets32_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets32_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets32_p16 (void) --{ -- int32x2_t out_int32x2_t; -- poly16x4_t arg0_poly16x4_t; -- -- out_int32x2_t = vreinterpret_s32_p16 (arg0_poly16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets32_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets32_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterprets32_p64 (void) --{ -- int32x2_t out_int32x2_t; -- poly64x1_t arg0_poly64x1_t; -- -- out_int32x2_t = vreinterpret_s32_p64 (arg0_poly64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets32_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets32_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets32_p8 (void) --{ -- int32x2_t out_int32x2_t; -- poly8x8_t arg0_poly8x8_t; -- -- out_int32x2_t = vreinterpret_s32_p8 (arg0_poly8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets32_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets32_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets32_s16 (void) --{ -- int32x2_t out_int32x2_t; -- int16x4_t arg0_int16x4_t; -- -- out_int32x2_t = vreinterpret_s32_s16 (arg0_int16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets32_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets32_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets32_s64 (void) --{ -- int32x2_t out_int32x2_t; -- int64x1_t arg0_int64x1_t; -- -- out_int32x2_t = vreinterpret_s32_s64 (arg0_int64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets32_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets32_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets32_s8 (void) --{ -- int32x2_t out_int32x2_t; -- int8x8_t arg0_int8x8_t; -- -- out_int32x2_t = vreinterpret_s32_s8 (arg0_int8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets32_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets32_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets32_u16 (void) --{ -- int32x2_t out_int32x2_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_int32x2_t = vreinterpret_s32_u16 (arg0_uint16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets32_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets32_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets32_u32 (void) --{ -- int32x2_t out_int32x2_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_int32x2_t = vreinterpret_s32_u32 (arg0_uint32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets32_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets32_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets32_u64 (void) --{ -- int32x2_t out_int32x2_t; -- uint64x1_t arg0_uint64x1_t; -- -- out_int32x2_t = vreinterpret_s32_u64 (arg0_uint64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets32_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets32_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets32_u8 (void) --{ -- int32x2_t out_int32x2_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_int32x2_t = vreinterpret_s32_u8 (arg0_uint8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets64_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets64_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets64_f32 (void) --{ -- int64x1_t out_int64x1_t; -- float32x2_t arg0_float32x2_t; -- -- out_int64x1_t = vreinterpret_s64_f32 (arg0_float32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets64_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets64_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets64_p16 (void) --{ -- int64x1_t out_int64x1_t; -- poly16x4_t arg0_poly16x4_t; -- -- out_int64x1_t = vreinterpret_s64_p16 (arg0_poly16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets64_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets64_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterprets64_p64 (void) --{ -- int64x1_t out_int64x1_t; -- poly64x1_t arg0_poly64x1_t; -- -- out_int64x1_t = vreinterpret_s64_p64 (arg0_poly64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets64_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets64_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets64_p8 (void) --{ -- int64x1_t out_int64x1_t; -- poly8x8_t arg0_poly8x8_t; -- -- out_int64x1_t = vreinterpret_s64_p8 (arg0_poly8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets64_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets64_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets64_s16 (void) --{ -- int64x1_t out_int64x1_t; -- int16x4_t arg0_int16x4_t; -- -- out_int64x1_t = vreinterpret_s64_s16 (arg0_int16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets64_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets64_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets64_s32 (void) --{ -- int64x1_t out_int64x1_t; -- int32x2_t arg0_int32x2_t; -- -- out_int64x1_t = vreinterpret_s64_s32 (arg0_int32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets64_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets64_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets64_s8 (void) --{ -- int64x1_t out_int64x1_t; -- int8x8_t arg0_int8x8_t; -- -- out_int64x1_t = vreinterpret_s64_s8 (arg0_int8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets64_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets64_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets64_u16 (void) --{ -- int64x1_t out_int64x1_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_int64x1_t = vreinterpret_s64_u16 (arg0_uint16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets64_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets64_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets64_u32 (void) --{ -- int64x1_t out_int64x1_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_int64x1_t = vreinterpret_s64_u32 (arg0_uint32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets64_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets64_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets64_u64 (void) --{ -- int64x1_t out_int64x1_t; -- uint64x1_t arg0_uint64x1_t; -- -- out_int64x1_t = vreinterpret_s64_u64 (arg0_uint64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets64_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets64_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets64_u8 (void) --{ -- int64x1_t out_int64x1_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_int64x1_t = vreinterpret_s64_u8 (arg0_uint8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets8_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets8_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets8_f32 (void) --{ -- int8x8_t out_int8x8_t; -- float32x2_t arg0_float32x2_t; -- -- out_int8x8_t = vreinterpret_s8_f32 (arg0_float32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets8_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets8_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets8_p16 (void) --{ -- int8x8_t out_int8x8_t; -- poly16x4_t arg0_poly16x4_t; -- -- out_int8x8_t = vreinterpret_s8_p16 (arg0_poly16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets8_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets8_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterprets8_p64 (void) --{ -- int8x8_t out_int8x8_t; -- poly64x1_t arg0_poly64x1_t; -- -- out_int8x8_t = vreinterpret_s8_p64 (arg0_poly64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets8_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets8_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets8_p8 (void) --{ -- int8x8_t out_int8x8_t; -- poly8x8_t arg0_poly8x8_t; -- -- out_int8x8_t = vreinterpret_s8_p8 (arg0_poly8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets8_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets8_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets8_s16 (void) --{ -- int8x8_t out_int8x8_t; -- int16x4_t arg0_int16x4_t; -- -- out_int8x8_t = vreinterpret_s8_s16 (arg0_int16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets8_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets8_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets8_s32 (void) --{ -- int8x8_t out_int8x8_t; -- int32x2_t arg0_int32x2_t; -- -- out_int8x8_t = vreinterpret_s8_s32 (arg0_int32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets8_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets8_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets8_s64 (void) --{ -- int8x8_t out_int8x8_t; -- int64x1_t arg0_int64x1_t; -- -- out_int8x8_t = vreinterpret_s8_s64 (arg0_int64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets8_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets8_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets8_u16 (void) --{ -- int8x8_t out_int8x8_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_int8x8_t = vreinterpret_s8_u16 (arg0_uint16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets8_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets8_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets8_u32 (void) --{ -- int8x8_t out_int8x8_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_int8x8_t = vreinterpret_s8_u32 (arg0_uint32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets8_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets8_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets8_u64 (void) --{ -- int8x8_t out_int8x8_t; -- uint64x1_t arg0_uint64x1_t; -- -- out_int8x8_t = vreinterpret_s8_u64 (arg0_uint64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterprets8_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterprets8_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterprets8_u8 (void) --{ -- int8x8_t out_int8x8_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_int8x8_t = vreinterpret_s8_u8 (arg0_uint8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu16_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu16_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu16_f32 (void) --{ -- uint16x4_t out_uint16x4_t; -- float32x2_t arg0_float32x2_t; -- -- out_uint16x4_t = vreinterpret_u16_f32 (arg0_float32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu16_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu16_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu16_p16 (void) --{ -- uint16x4_t out_uint16x4_t; -- poly16x4_t arg0_poly16x4_t; -- -- out_uint16x4_t = vreinterpret_u16_p16 (arg0_poly16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu16_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu16_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu16_p64 (void) --{ -- uint16x4_t out_uint16x4_t; -- poly64x1_t arg0_poly64x1_t; -- -- out_uint16x4_t = vreinterpret_u16_p64 (arg0_poly64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu16_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu16_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu16_p8 (void) --{ -- uint16x4_t out_uint16x4_t; -- poly8x8_t arg0_poly8x8_t; -- -- out_uint16x4_t = vreinterpret_u16_p8 (arg0_poly8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu16_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu16_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu16_s16 (void) --{ -- uint16x4_t out_uint16x4_t; -- int16x4_t arg0_int16x4_t; -- -- out_uint16x4_t = vreinterpret_u16_s16 (arg0_int16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu16_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu16_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu16_s32 (void) --{ -- uint16x4_t out_uint16x4_t; -- int32x2_t arg0_int32x2_t; -- -- out_uint16x4_t = vreinterpret_u16_s32 (arg0_int32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu16_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu16_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu16_s64 (void) --{ -- uint16x4_t out_uint16x4_t; -- int64x1_t arg0_int64x1_t; -- -- out_uint16x4_t = vreinterpret_u16_s64 (arg0_int64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu16_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu16_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu16_s8 (void) --{ -- uint16x4_t out_uint16x4_t; -- int8x8_t arg0_int8x8_t; -- -- out_uint16x4_t = vreinterpret_u16_s8 (arg0_int8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu16_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu16_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu16_u32 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_uint16x4_t = vreinterpret_u16_u32 (arg0_uint32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu16_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu16_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu16_u64 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint64x1_t arg0_uint64x1_t; -- -- out_uint16x4_t = vreinterpret_u16_u64 (arg0_uint64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu16_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu16_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu16_u8 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_uint16x4_t = vreinterpret_u16_u8 (arg0_uint8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu32_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu32_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu32_f32 (void) --{ -- uint32x2_t out_uint32x2_t; -- float32x2_t arg0_float32x2_t; -- -- out_uint32x2_t = vreinterpret_u32_f32 (arg0_float32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu32_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu32_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu32_p16 (void) --{ -- uint32x2_t out_uint32x2_t; -- poly16x4_t arg0_poly16x4_t; -- -- out_uint32x2_t = vreinterpret_u32_p16 (arg0_poly16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu32_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu32_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu32_p64 (void) --{ -- uint32x2_t out_uint32x2_t; -- poly64x1_t arg0_poly64x1_t; -- -- out_uint32x2_t = vreinterpret_u32_p64 (arg0_poly64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu32_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu32_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu32_p8 (void) --{ -- uint32x2_t out_uint32x2_t; -- poly8x8_t arg0_poly8x8_t; -- -- out_uint32x2_t = vreinterpret_u32_p8 (arg0_poly8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu32_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu32_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu32_s16 (void) --{ -- uint32x2_t out_uint32x2_t; -- int16x4_t arg0_int16x4_t; -- -- out_uint32x2_t = vreinterpret_u32_s16 (arg0_int16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu32_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu32_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu32_s32 (void) --{ -- uint32x2_t out_uint32x2_t; -- int32x2_t arg0_int32x2_t; -- -- out_uint32x2_t = vreinterpret_u32_s32 (arg0_int32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu32_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu32_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu32_s64 (void) --{ -- uint32x2_t out_uint32x2_t; -- int64x1_t arg0_int64x1_t; -- -- out_uint32x2_t = vreinterpret_u32_s64 (arg0_int64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu32_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu32_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu32_s8 (void) --{ -- uint32x2_t out_uint32x2_t; -- int8x8_t arg0_int8x8_t; -- -- out_uint32x2_t = vreinterpret_u32_s8 (arg0_int8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu32_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu32_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu32_u16 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_uint32x2_t = vreinterpret_u32_u16 (arg0_uint16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu32_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu32_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu32_u64 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint64x1_t arg0_uint64x1_t; -- -- out_uint32x2_t = vreinterpret_u32_u64 (arg0_uint64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu32_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu32_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu32_u8 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_uint32x2_t = vreinterpret_u32_u8 (arg0_uint8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu64_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu64_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu64_f32 (void) --{ -- uint64x1_t out_uint64x1_t; -- float32x2_t arg0_float32x2_t; -- -- out_uint64x1_t = vreinterpret_u64_f32 (arg0_float32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu64_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu64_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu64_p16 (void) --{ -- uint64x1_t out_uint64x1_t; -- poly16x4_t arg0_poly16x4_t; -- -- out_uint64x1_t = vreinterpret_u64_p16 (arg0_poly16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu64_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu64_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu64_p64 (void) --{ -- uint64x1_t out_uint64x1_t; -- poly64x1_t arg0_poly64x1_t; -- -- out_uint64x1_t = vreinterpret_u64_p64 (arg0_poly64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu64_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu64_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu64_p8 (void) --{ -- uint64x1_t out_uint64x1_t; -- poly8x8_t arg0_poly8x8_t; -- -- out_uint64x1_t = vreinterpret_u64_p8 (arg0_poly8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu64_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu64_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu64_s16 (void) --{ -- uint64x1_t out_uint64x1_t; -- int16x4_t arg0_int16x4_t; -- -- out_uint64x1_t = vreinterpret_u64_s16 (arg0_int16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu64_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu64_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu64_s32 (void) --{ -- uint64x1_t out_uint64x1_t; -- int32x2_t arg0_int32x2_t; -- -- out_uint64x1_t = vreinterpret_u64_s32 (arg0_int32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu64_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu64_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu64_s64 (void) --{ -- uint64x1_t out_uint64x1_t; -- int64x1_t arg0_int64x1_t; -- -- out_uint64x1_t = vreinterpret_u64_s64 (arg0_int64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu64_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu64_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu64_s8 (void) --{ -- uint64x1_t out_uint64x1_t; -- int8x8_t arg0_int8x8_t; -- -- out_uint64x1_t = vreinterpret_u64_s8 (arg0_int8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu64_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu64_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu64_u16 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_uint64x1_t = vreinterpret_u64_u16 (arg0_uint16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu64_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu64_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu64_u32 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_uint64x1_t = vreinterpret_u64_u32 (arg0_uint32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu64_u8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu64_u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu64_u8 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_uint64x1_t = vreinterpret_u64_u8 (arg0_uint8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu8_f32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu8_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu8_f32 (void) --{ -- uint8x8_t out_uint8x8_t; -- float32x2_t arg0_float32x2_t; -- -- out_uint8x8_t = vreinterpret_u8_f32 (arg0_float32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu8_p16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu8_p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu8_p16 (void) --{ -- uint8x8_t out_uint8x8_t; -- poly16x4_t arg0_poly16x4_t; -- -- out_uint8x8_t = vreinterpret_u8_p16 (arg0_poly16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu8_p64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu8_p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu8_p64 (void) --{ -- uint8x8_t out_uint8x8_t; -- poly64x1_t arg0_poly64x1_t; -- -- out_uint8x8_t = vreinterpret_u8_p64 (arg0_poly64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu8_p8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu8_p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu8_p8 (void) --{ -- uint8x8_t out_uint8x8_t; -- poly8x8_t arg0_poly8x8_t; -- -- out_uint8x8_t = vreinterpret_u8_p8 (arg0_poly8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu8_s16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu8_s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu8_s16 (void) --{ -- uint8x8_t out_uint8x8_t; -- int16x4_t arg0_int16x4_t; -- -- out_uint8x8_t = vreinterpret_u8_s16 (arg0_int16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu8_s32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu8_s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu8_s32 (void) --{ -- uint8x8_t out_uint8x8_t; -- int32x2_t arg0_int32x2_t; -- -- out_uint8x8_t = vreinterpret_u8_s32 (arg0_int32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu8_s64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu8_s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu8_s64 (void) --{ -- uint8x8_t out_uint8x8_t; -- int64x1_t arg0_int64x1_t; -- -- out_uint8x8_t = vreinterpret_u8_s64 (arg0_int64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu8_s8.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu8_s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu8_s8 (void) --{ -- uint8x8_t out_uint8x8_t; -- int8x8_t arg0_int8x8_t; -- -- out_uint8x8_t = vreinterpret_u8_s8 (arg0_int8x8_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu8_u16.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu8_u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu8_u16 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_uint8x8_t = vreinterpret_u8_u16 (arg0_uint16x4_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu8_u32.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu8_u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu8_u32 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_uint8x8_t = vreinterpret_u8_u32 (arg0_uint32x2_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vreinterpretu8_u64.c -+++ b/src//dev/null -@@ -1,18 +0,0 @@ --/* Test the `vreinterpretu8_u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vreinterpretu8_u64 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint64x1_t arg0_uint64x1_t; -- -- out_uint8x8_t = vreinterpret_u8_u64 (arg0_uint64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev16Qp8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev16Qp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev16Qp8 (void) --{ -- poly8x16_t out_poly8x16_t; -- poly8x16_t arg0_poly8x16_t; -- -- out_poly8x16_t = vrev16q_p8 (arg0_poly8x16_t); --} -- --/* { dg-final { scan-assembler "vrev16\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev16Qs8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev16Qs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev16Qs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- -- out_int8x16_t = vrev16q_s8 (arg0_int8x16_t); --} -- --/* { dg-final { scan-assembler "vrev16\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev16Qu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev16Qu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev16Qu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_uint8x16_t = vrev16q_u8 (arg0_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vrev16\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev16p8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev16p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev16p8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8x8_t arg0_poly8x8_t; -- -- out_poly8x8_t = vrev16_p8 (arg0_poly8x8_t); --} -- --/* { dg-final { scan-assembler "vrev16\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev16s8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev16s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev16s8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- -- out_int8x8_t = vrev16_s8 (arg0_int8x8_t); --} -- --/* { dg-final { scan-assembler "vrev16\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev16u8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev16u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev16u8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_uint8x8_t = vrev16_u8 (arg0_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vrev16\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev32Qp16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev32Qp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev32Qp16 (void) --{ -- poly16x8_t out_poly16x8_t; -- poly16x8_t arg0_poly16x8_t; -- -- out_poly16x8_t = vrev32q_p16 (arg0_poly16x8_t); --} -- --/* { dg-final { scan-assembler "vrev32\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev32Qp8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev32Qp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev32Qp8 (void) --{ -- poly8x16_t out_poly8x16_t; -- poly8x16_t arg0_poly8x16_t; -- -- out_poly8x16_t = vrev32q_p8 (arg0_poly8x16_t); --} -- --/* { dg-final { scan-assembler "vrev32\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev32Qs16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev32Qs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev32Qs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_int16x8_t = vrev32q_s16 (arg0_int16x8_t); --} -- --/* { dg-final { scan-assembler "vrev32\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev32Qs8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev32Qs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev32Qs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- -- out_int8x16_t = vrev32q_s8 (arg0_int8x16_t); --} -- --/* { dg-final { scan-assembler "vrev32\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev32Qu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev32Qu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev32Qu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_uint16x8_t = vrev32q_u16 (arg0_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vrev32\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev32Qu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev32Qu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev32Qu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_uint8x16_t = vrev32q_u8 (arg0_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vrev32\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev32p16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev32p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev32p16 (void) --{ -- poly16x4_t out_poly16x4_t; -- poly16x4_t arg0_poly16x4_t; -- -- out_poly16x4_t = vrev32_p16 (arg0_poly16x4_t); --} -- --/* { dg-final { scan-assembler "vrev32\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev32p8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev32p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev32p8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8x8_t arg0_poly8x8_t; -- -- out_poly8x8_t = vrev32_p8 (arg0_poly8x8_t); --} -- --/* { dg-final { scan-assembler "vrev32\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev32s16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev32s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev32s16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- -- out_int16x4_t = vrev32_s16 (arg0_int16x4_t); --} -- --/* { dg-final { scan-assembler "vrev32\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev32s8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev32s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev32s8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- -- out_int8x8_t = vrev32_s8 (arg0_int8x8_t); --} -- --/* { dg-final { scan-assembler "vrev32\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev32u16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev32u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev32u16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_uint16x4_t = vrev32_u16 (arg0_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vrev32\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev32u8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev32u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev32u8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_uint8x8_t = vrev32_u8 (arg0_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vrev32\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev64Qf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev64Qf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev64Qf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- -- out_float32x4_t = vrev64q_f32 (arg0_float32x4_t); --} -- --/* { dg-final { scan-assembler "vrev64\.32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev64Qp16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev64Qp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev64Qp16 (void) --{ -- poly16x8_t out_poly16x8_t; -- poly16x8_t arg0_poly16x8_t; -- -- out_poly16x8_t = vrev64q_p16 (arg0_poly16x8_t); --} -- --/* { dg-final { scan-assembler "vrev64\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev64Qp8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev64Qp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev64Qp8 (void) --{ -- poly8x16_t out_poly8x16_t; -- poly8x16_t arg0_poly8x16_t; -- -- out_poly8x16_t = vrev64q_p8 (arg0_poly8x16_t); --} -- --/* { dg-final { scan-assembler "vrev64\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev64Qs16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev64Qs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev64Qs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_int16x8_t = vrev64q_s16 (arg0_int16x8_t); --} -- --/* { dg-final { scan-assembler "vrev64\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev64Qs32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev64Qs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev64Qs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_int32x4_t = vrev64q_s32 (arg0_int32x4_t); --} -- --/* { dg-final { scan-assembler "vrev64\.32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev64Qs8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev64Qs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev64Qs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- -- out_int8x16_t = vrev64q_s8 (arg0_int8x16_t); --} -- --/* { dg-final { scan-assembler "vrev64\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev64Qu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev64Qu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev64Qu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_uint16x8_t = vrev64q_u16 (arg0_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vrev64\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev64Qu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev64Qu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev64Qu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_uint32x4_t = vrev64q_u32 (arg0_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vrev64\.32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev64Qu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev64Qu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev64Qu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_uint8x16_t = vrev64q_u8 (arg0_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vrev64\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev64f32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev64f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev64f32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- -- out_float32x2_t = vrev64_f32 (arg0_float32x2_t); --} -- --/* { dg-final { scan-assembler "vrev64\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev64p16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev64p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev64p16 (void) --{ -- poly16x4_t out_poly16x4_t; -- poly16x4_t arg0_poly16x4_t; -- -- out_poly16x4_t = vrev64_p16 (arg0_poly16x4_t); --} -- --/* { dg-final { scan-assembler "vrev64\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev64p8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev64p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev64p8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8x8_t arg0_poly8x8_t; -- -- out_poly8x8_t = vrev64_p8 (arg0_poly8x8_t); --} -- --/* { dg-final { scan-assembler "vrev64\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev64s16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev64s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev64s16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- -- out_int16x4_t = vrev64_s16 (arg0_int16x4_t); --} -- --/* { dg-final { scan-assembler "vrev64\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev64s32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev64s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev64s32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- -- out_int32x2_t = vrev64_s32 (arg0_int32x2_t); --} -- --/* { dg-final { scan-assembler "vrev64\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev64s8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev64s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev64s8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- -- out_int8x8_t = vrev64_s8 (arg0_int8x8_t); --} -- --/* { dg-final { scan-assembler "vrev64\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev64u16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev64u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev64u16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_uint16x4_t = vrev64_u16 (arg0_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vrev64\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev64u32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev64u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev64u32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_uint32x2_t = vrev64_u32 (arg0_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vrev64\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrev64u8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrev64u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrev64u8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_uint8x8_t = vrev64_u8 (arg0_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vrev64\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrndaf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrndaf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_v8_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_v8_neon } */ -- --#include "arm_neon.h" -- --void test_vrndaf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- -- out_float32x2_t = vrnda_f32 (arg0_float32x2_t); --} -- --/* { dg-final { scan-assembler "vrinta\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrndaqf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrndaq_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_v8_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_v8_neon } */ -- --#include "arm_neon.h" -- --void test_vrndaqf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- -- out_float32x4_t = vrndaq_f32 (arg0_float32x4_t); --} -- --/* { dg-final { scan-assembler "vrinta\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrndf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrndf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_v8_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_v8_neon } */ -- --#include "arm_neon.h" -- --void test_vrndf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- -- out_float32x2_t = vrnd_f32 (arg0_float32x2_t); --} -- --/* { dg-final { scan-assembler "vrintz\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrndmf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrndmf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_v8_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_v8_neon } */ -- --#include "arm_neon.h" -- --void test_vrndmf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- -- out_float32x2_t = vrndm_f32 (arg0_float32x2_t); --} -- --/* { dg-final { scan-assembler "vrintm\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrndmqf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrndmq_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_v8_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_v8_neon } */ -- --#include "arm_neon.h" -- --void test_vrndmqf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- -- out_float32x4_t = vrndmq_f32 (arg0_float32x4_t); --} -- --/* { dg-final { scan-assembler "vrintm\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrndnf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrndnf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_v8_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_v8_neon } */ -- --#include "arm_neon.h" -- --void test_vrndnf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- -- out_float32x2_t = vrndn_f32 (arg0_float32x2_t); --} -- --/* { dg-final { scan-assembler "vrintn\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrndnqf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrndnq_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_v8_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_v8_neon } */ -- --#include "arm_neon.h" -- --void test_vrndnqf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- -- out_float32x4_t = vrndnq_f32 (arg0_float32x4_t); --} -- --/* { dg-final { scan-assembler "vrintn\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrndpf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrndpf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_v8_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_v8_neon } */ -- --#include "arm_neon.h" -- --void test_vrndpf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- -- out_float32x2_t = vrndp_f32 (arg0_float32x2_t); --} -- --/* { dg-final { scan-assembler "vrintp\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrndpqf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrndpq_f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_v8_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_v8_neon } */ -- --#include "arm_neon.h" -- --void test_vrndpqf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- -- out_float32x4_t = vrndpq_f32 (arg0_float32x4_t); --} -- --/* { dg-final { scan-assembler "vrintp\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrndqf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrndqf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_v8_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_v8_neon } */ -- --#include "arm_neon.h" -- --void test_vrndqf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- -- out_float32x4_t = vrndq_f32 (arg0_float32x4_t); --} -- --/* { dg-final { scan-assembler "vrintz\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrsqrteQf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrsqrteQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrsqrteQf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- -- out_float32x4_t = vrsqrteq_f32 (arg0_float32x4_t); --} -- --/* { dg-final { scan-assembler "vrsqrte\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrsqrteQu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrsqrteQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrsqrteQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_uint32x4_t = vrsqrteq_u32 (arg0_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vrsqrte\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrsqrtef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrsqrtef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrsqrtef32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- -- out_float32x2_t = vrsqrte_f32 (arg0_float32x2_t); --} -- --/* { dg-final { scan-assembler "vrsqrte\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrsqrteu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vrsqrteu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrsqrteu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_uint32x2_t = vrsqrte_u32 (arg0_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vrsqrte\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrsqrtsQf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vrsqrtsQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrsqrtsQf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- -- out_float32x4_t = vrsqrtsq_f32 (arg0_float32x4_t, arg1_float32x4_t); --} -- --/* { dg-final { scan-assembler "vrsqrts\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vrsqrtsf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vrsqrtsf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vrsqrtsf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_float32x2_t = vrsqrts_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vrsqrts\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsetQ_lanef32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsetQ_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsetQ_lanef32 (void) --{ -- float32x4_t out_float32x4_t; -- float32_t arg0_float32_t; -- float32x4_t arg1_float32x4_t; -- -- out_float32x4_t = vsetq_lane_f32 (arg0_float32_t, arg1_float32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.32\[ \]+\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsetQ_lanep16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsetQ_lanep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsetQ_lanep16 (void) --{ -- poly16x8_t out_poly16x8_t; -- poly16_t arg0_poly16_t; -- poly16x8_t arg1_poly16x8_t; -- -- out_poly16x8_t = vsetq_lane_p16 (arg0_poly16_t, arg1_poly16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.16\[ \]+\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsetQ_lanep8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsetQ_lanep8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsetQ_lanep8 (void) --{ -- poly8x16_t out_poly8x16_t; -- poly8_t arg0_poly8_t; -- poly8x16_t arg1_poly8x16_t; -- -- out_poly8x16_t = vsetq_lane_p8 (arg0_poly8_t, arg1_poly8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.8\[ \]+\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsetQ_lanes16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsetQ_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsetQ_lanes16 (void) --{ -- int16x8_t out_int16x8_t; -- int16_t arg0_int16_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vsetq_lane_s16 (arg0_int16_t, arg1_int16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.16\[ \]+\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsetQ_lanes32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsetQ_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsetQ_lanes32 (void) --{ -- int32x4_t out_int32x4_t; -- int32_t arg0_int32_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vsetq_lane_s32 (arg0_int32_t, arg1_int32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.32\[ \]+\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsetQ_lanes64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsetQ_lanes64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsetQ_lanes64 (void) --{ -- int64x2_t out_int64x2_t; -- int64_t arg0_int64_t; -- int64x2_t arg1_int64x2_t; -- -- out_int64x2_t = vsetq_lane_s64 (arg0_int64_t, arg1_int64x2_t, 0); --} -- --/* { dg-final { scan-assembler "vmov\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsetQ_lanes8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsetQ_lanes8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsetQ_lanes8 (void) --{ -- int8x16_t out_int8x16_t; -- int8_t arg0_int8_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vsetq_lane_s8 (arg0_int8_t, arg1_int8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.8\[ \]+\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsetQ_laneu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsetQ_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsetQ_laneu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16_t arg0_uint16_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vsetq_lane_u16 (arg0_uint16_t, arg1_uint16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.16\[ \]+\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsetQ_laneu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsetQ_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsetQ_laneu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32_t arg0_uint32_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vsetq_lane_u32 (arg0_uint32_t, arg1_uint32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.32\[ \]+\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsetQ_laneu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsetQ_laneu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsetQ_laneu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64_t arg0_uint64_t; -- uint64x2_t arg1_uint64x2_t; -- -- out_uint64x2_t = vsetq_lane_u64 (arg0_uint64_t, arg1_uint64x2_t, 0); --} -- --/* { dg-final { scan-assembler "vmov\[ \]+\[dD\]\[0-9\]+, \[rR\]\[0-9\]+, \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsetQ_laneu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsetQ_laneu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsetQ_laneu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8_t arg0_uint8_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vsetq_lane_u8 (arg0_uint8_t, arg1_uint8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.8\[ \]+\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vset_lanef32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vset_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vset_lanef32 (void) --{ -- float32x2_t out_float32x2_t; -- float32_t arg0_float32_t; -- float32x2_t arg1_float32x2_t; -- -- out_float32x2_t = vset_lane_f32 (arg0_float32_t, arg1_float32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.32\[ \]+\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vset_lanep16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vset_lanep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vset_lanep16 (void) --{ -- poly16x4_t out_poly16x4_t; -- poly16_t arg0_poly16_t; -- poly16x4_t arg1_poly16x4_t; -- -- out_poly16x4_t = vset_lane_p16 (arg0_poly16_t, arg1_poly16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.16\[ \]+\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vset_lanep8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vset_lanep8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vset_lanep8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8_t arg0_poly8_t; -- poly8x8_t arg1_poly8x8_t; -- -- out_poly8x8_t = vset_lane_p8 (arg0_poly8_t, arg1_poly8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.8\[ \]+\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vset_lanes16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vset_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vset_lanes16 (void) --{ -- int16x4_t out_int16x4_t; -- int16_t arg0_int16_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vset_lane_s16 (arg0_int16_t, arg1_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.16\[ \]+\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vset_lanes32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vset_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vset_lanes32 (void) --{ -- int32x2_t out_int32x2_t; -- int32_t arg0_int32_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vset_lane_s32 (arg0_int32_t, arg1_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.32\[ \]+\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vset_lanes64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vset_lanes64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vset_lanes64 (void) --{ -- int64x1_t out_int64x1_t; -- int64_t arg0_int64_t; -- int64x1_t arg1_int64x1_t; -- -- out_int64x1_t = vset_lane_s64 (arg0_int64_t, arg1_int64x1_t, 0); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vset_lanes8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vset_lanes8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vset_lanes8 (void) --{ -- int8x8_t out_int8x8_t; -- int8_t arg0_int8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vset_lane_s8 (arg0_int8_t, arg1_int8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.8\[ \]+\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vset_laneu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vset_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vset_laneu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16_t arg0_uint16_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vset_lane_u16 (arg0_uint16_t, arg1_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.16\[ \]+\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vset_laneu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vset_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vset_laneu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32_t arg0_uint32_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vset_lane_u32 (arg0_uint32_t, arg1_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.32\[ \]+\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vset_laneu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vset_laneu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vset_laneu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64_t arg0_uint64_t; -- uint64x1_t arg1_uint64x1_t; -- -- out_uint64x1_t = vset_lane_u64 (arg0_uint64_t, arg1_uint64x1_t, 0); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vset_laneu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vset_laneu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vset_laneu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8_t arg0_uint8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vset_lane_u8 (arg0_uint8_t, arg1_uint8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vmov\.8\[ \]+\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[rR\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshlQ_ns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshlQ_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshlQ_ns16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_int16x8_t = vshlq_n_s16 (arg0_int16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vshl\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshlQ_ns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshlQ_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshlQ_ns32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_int32x4_t = vshlq_n_s32 (arg0_int32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vshl\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshlQ_ns64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshlQ_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshlQ_ns64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- -- out_int64x2_t = vshlq_n_s64 (arg0_int64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vshl\.i64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshlQ_ns8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshlQ_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshlQ_ns8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- -- out_int8x16_t = vshlq_n_s8 (arg0_int8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vshl\.i8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshlQ_nu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshlQ_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshlQ_nu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_uint16x8_t = vshlq_n_u16 (arg0_uint16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vshl\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshlQ_nu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshlQ_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshlQ_nu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_uint32x4_t = vshlq_n_u32 (arg0_uint32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vshl\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshlQ_nu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshlQ_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshlQ_nu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_uint64x2_t = vshlq_n_u64 (arg0_uint64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vshl\.i64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshlQ_nu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshlQ_nu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshlQ_nu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_uint8x16_t = vshlq_n_u8 (arg0_uint8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vshl\.i8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshlQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vshlQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshlQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vshlq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vshl\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshlQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vshlQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshlQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vshlq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vshl\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshlQs64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vshlQs64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshlQs64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_int64x2_t = vshlq_s64 (arg0_int64x2_t, arg1_int64x2_t); --} -- --/* { dg-final { scan-assembler "vshl\.s64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshlQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vshlQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshlQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vshlq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vshl\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshlQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vshlQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshlQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_uint16x8_t = vshlq_u16 (arg0_uint16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vshl\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshlQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vshlQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshlQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_uint32x4_t = vshlq_u32 (arg0_uint32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vshl\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshlQu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vshlQu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshlQu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_uint64x2_t = vshlq_u64 (arg0_uint64x2_t, arg1_int64x2_t); --} -- --/* { dg-final { scan-assembler "vshl\.u64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshlQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vshlQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshlQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_uint8x16_t = vshlq_u8 (arg0_uint8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vshl\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshl_ns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshl_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshl_ns16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- -- out_int16x4_t = vshl_n_s16 (arg0_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vshl\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshl_ns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshl_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshl_ns32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- -- out_int32x2_t = vshl_n_s32 (arg0_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vshl\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshl_ns64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshl_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshl_ns64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg0_int64x1_t; -- -- out_int64x1_t = vshl_n_s64 (arg0_int64x1_t, 1); --} -- --/* { dg-final { scan-assembler "vshl\.i64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshl_ns8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshl_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshl_ns8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- -- out_int8x8_t = vshl_n_s8 (arg0_int8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vshl\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshl_nu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshl_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshl_nu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_uint16x4_t = vshl_n_u16 (arg0_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vshl\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshl_nu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshl_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshl_nu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_uint32x2_t = vshl_n_u32 (arg0_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vshl\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshl_nu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshl_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshl_nu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- -- out_uint64x1_t = vshl_n_u64 (arg0_uint64x1_t, 1); --} -- --/* { dg-final { scan-assembler "vshl\.i64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshl_nu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshl_nu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshl_nu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_uint8x8_t = vshl_n_u8 (arg0_uint8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vshl\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshll_ns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshll_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshll_ns16 (void) --{ -- int32x4_t out_int32x4_t; -- int16x4_t arg0_int16x4_t; -- -- out_int32x4_t = vshll_n_s16 (arg0_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vshll\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshll_ns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshll_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshll_ns32 (void) --{ -- int64x2_t out_int64x2_t; -- int32x2_t arg0_int32x2_t; -- -- out_int64x2_t = vshll_n_s32 (arg0_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vshll\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshll_ns8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshll_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshll_ns8 (void) --{ -- int16x8_t out_int16x8_t; -- int8x8_t arg0_int8x8_t; -- -- out_int16x8_t = vshll_n_s8 (arg0_int8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vshll\.s8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshll_nu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshll_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshll_nu16 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_uint32x4_t = vshll_n_u16 (arg0_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vshll\.u16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshll_nu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshll_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshll_nu32 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_uint64x2_t = vshll_n_u32 (arg0_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vshll\.u32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshll_nu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshll_nu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshll_nu8 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_uint16x8_t = vshll_n_u8 (arg0_uint8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vshll\.u8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshls16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vshls16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshls16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vshl_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vshl\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshls32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vshls32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshls32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vshl_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vshl\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshls64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vshls64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshls64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg0_int64x1_t; -- int64x1_t arg1_int64x1_t; -- -- out_int64x1_t = vshl_s64 (arg0_int64x1_t, arg1_int64x1_t); --} -- --/* { dg-final { scan-assembler "vshl\.s64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshls8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vshls8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshls8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vshl_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vshl\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshlu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vshlu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshlu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_uint16x4_t = vshl_u16 (arg0_uint16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vshl\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshlu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vshlu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshlu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_uint32x2_t = vshl_u32 (arg0_uint32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vshl\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshlu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vshlu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshlu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- int64x1_t arg1_int64x1_t; -- -- out_uint64x1_t = vshl_u64 (arg0_uint64x1_t, arg1_int64x1_t); --} -- --/* { dg-final { scan-assembler "vshl\.u64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshlu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vshlu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshlu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_uint8x8_t = vshl_u8 (arg0_uint8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vshl\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshrQ_ns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshrQ_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshrQ_ns16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_int16x8_t = vshrq_n_s16 (arg0_int16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vshr\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshrQ_ns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshrQ_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshrQ_ns32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_int32x4_t = vshrq_n_s32 (arg0_int32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vshr\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshrQ_ns64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshrQ_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshrQ_ns64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- -- out_int64x2_t = vshrq_n_s64 (arg0_int64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vshr\.s64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshrQ_ns8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshrQ_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshrQ_ns8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- -- out_int8x16_t = vshrq_n_s8 (arg0_int8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vshr\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshrQ_nu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshrQ_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshrQ_nu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_uint16x8_t = vshrq_n_u16 (arg0_uint16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vshr\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshrQ_nu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshrQ_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshrQ_nu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_uint32x4_t = vshrq_n_u32 (arg0_uint32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vshr\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshrQ_nu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshrQ_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshrQ_nu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_uint64x2_t = vshrq_n_u64 (arg0_uint64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vshr\.u64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshrQ_nu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshrQ_nu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshrQ_nu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- -- out_uint8x16_t = vshrq_n_u8 (arg0_uint8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vshr\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshr_ns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshr_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshr_ns16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- -- out_int16x4_t = vshr_n_s16 (arg0_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vshr\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshr_ns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshr_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshr_ns32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- -- out_int32x2_t = vshr_n_s32 (arg0_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vshr\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshr_ns64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshr_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshr_ns64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg0_int64x1_t; -- -- out_int64x1_t = vshr_n_s64 (arg0_int64x1_t, 1); --} -- --/* { dg-final { scan-assembler "vshr\.s64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshr_ns8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshr_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshr_ns8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- -- out_int8x8_t = vshr_n_s8 (arg0_int8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vshr\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshr_nu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshr_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshr_nu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- -- out_uint16x4_t = vshr_n_u16 (arg0_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vshr\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshr_nu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshr_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshr_nu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- -- out_uint32x2_t = vshr_n_u32 (arg0_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vshr\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshr_nu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshr_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshr_nu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- -- out_uint64x1_t = vshr_n_u64 (arg0_uint64x1_t, 1); --} -- --/* { dg-final { scan-assembler "vshr\.u64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshr_nu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshr_nu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshr_nu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- -- out_uint8x8_t = vshr_n_u8 (arg0_uint8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vshr\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshrn_ns16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshrn_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshrn_ns16 (void) --{ -- int8x8_t out_int8x8_t; -- int16x8_t arg0_int16x8_t; -- -- out_int8x8_t = vshrn_n_s16 (arg0_int16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vshrn\.i16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshrn_ns32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshrn_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshrn_ns32 (void) --{ -- int16x4_t out_int16x4_t; -- int32x4_t arg0_int32x4_t; -- -- out_int16x4_t = vshrn_n_s32 (arg0_int32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vshrn\.i32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshrn_ns64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshrn_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshrn_ns64 (void) --{ -- int32x2_t out_int32x2_t; -- int64x2_t arg0_int64x2_t; -- -- out_int32x2_t = vshrn_n_s64 (arg0_int64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vshrn\.i64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshrn_nu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshrn_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshrn_nu16 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint16x8_t arg0_uint16x8_t; -- -- out_uint8x8_t = vshrn_n_u16 (arg0_uint16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vshrn\.i16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshrn_nu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshrn_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshrn_nu32 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint32x4_t arg0_uint32x4_t; -- -- out_uint16x4_t = vshrn_n_u32 (arg0_uint32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vshrn\.i32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vshrn_nu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vshrn_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vshrn_nu64 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint64x2_t arg0_uint64x2_t; -- -- out_uint32x2_t = vshrn_n_u64 (arg0_uint64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vshrn\.i64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsliQ_np16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsliQ_np16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsliQ_np16 (void) --{ -- poly16x8_t out_poly16x8_t; -- poly16x8_t arg0_poly16x8_t; -- poly16x8_t arg1_poly16x8_t; -- -- out_poly16x8_t = vsliq_n_p16 (arg0_poly16x8_t, arg1_poly16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vsli\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsliQ_np64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsliQ_np64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vsliQ_np64 (void) --{ -- poly64x2_t out_poly64x2_t; -- poly64x2_t arg0_poly64x2_t; -- poly64x2_t arg1_poly64x2_t; -- -- out_poly64x2_t = vsliq_n_p64 (arg0_poly64x2_t, arg1_poly64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vsli\.64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsliQ_np8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsliQ_np8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsliQ_np8 (void) --{ -- poly8x16_t out_poly8x16_t; -- poly8x16_t arg0_poly8x16_t; -- poly8x16_t arg1_poly8x16_t; -- -- out_poly8x16_t = vsliq_n_p8 (arg0_poly8x16_t, arg1_poly8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vsli\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsliQ_ns16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsliQ_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsliQ_ns16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vsliq_n_s16 (arg0_int16x8_t, arg1_int16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vsli\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsliQ_ns32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsliQ_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsliQ_ns32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vsliq_n_s32 (arg0_int32x4_t, arg1_int32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vsli\.32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsliQ_ns64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsliQ_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsliQ_ns64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_int64x2_t = vsliq_n_s64 (arg0_int64x2_t, arg1_int64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vsli\.64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsliQ_ns8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsliQ_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsliQ_ns8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vsliq_n_s8 (arg0_int8x16_t, arg1_int8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vsli\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsliQ_nu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsliQ_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsliQ_nu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vsliq_n_u16 (arg0_uint16x8_t, arg1_uint16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vsli\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsliQ_nu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsliQ_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsliQ_nu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vsliq_n_u32 (arg0_uint32x4_t, arg1_uint32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vsli\.32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsliQ_nu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsliQ_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsliQ_nu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint64x2_t arg1_uint64x2_t; -- -- out_uint64x2_t = vsliq_n_u64 (arg0_uint64x2_t, arg1_uint64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vsli\.64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsliQ_nu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsliQ_nu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsliQ_nu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vsliq_n_u8 (arg0_uint8x16_t, arg1_uint8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vsli\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsli_np16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsli_np16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsli_np16 (void) --{ -- poly16x4_t out_poly16x4_t; -- poly16x4_t arg0_poly16x4_t; -- poly16x4_t arg1_poly16x4_t; -- -- out_poly16x4_t = vsli_n_p16 (arg0_poly16x4_t, arg1_poly16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vsli\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsli_np64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsli_np64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vsli_np64 (void) --{ -- poly64x1_t out_poly64x1_t; -- poly64x1_t arg0_poly64x1_t; -- poly64x1_t arg1_poly64x1_t; -- -- out_poly64x1_t = vsli_n_p64 (arg0_poly64x1_t, arg1_poly64x1_t, 1); --} -- --/* { dg-final { scan-assembler "vsli\.64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsli_np8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsli_np8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsli_np8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8x8_t arg0_poly8x8_t; -- poly8x8_t arg1_poly8x8_t; -- -- out_poly8x8_t = vsli_n_p8 (arg0_poly8x8_t, arg1_poly8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vsli\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsli_ns16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsli_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsli_ns16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vsli_n_s16 (arg0_int16x4_t, arg1_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vsli\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsli_ns32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsli_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsli_ns32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vsli_n_s32 (arg0_int32x2_t, arg1_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vsli\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsli_ns64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsli_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsli_ns64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg0_int64x1_t; -- int64x1_t arg1_int64x1_t; -- -- out_int64x1_t = vsli_n_s64 (arg0_int64x1_t, arg1_int64x1_t, 1); --} -- --/* { dg-final { scan-assembler "vsli\.64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsli_ns8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsli_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsli_ns8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vsli_n_s8 (arg0_int8x8_t, arg1_int8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vsli\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsli_nu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsli_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsli_nu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vsli_n_u16 (arg0_uint16x4_t, arg1_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vsli\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsli_nu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsli_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsli_nu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vsli_n_u32 (arg0_uint32x2_t, arg1_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vsli\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsli_nu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsli_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsli_nu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- uint64x1_t arg1_uint64x1_t; -- -- out_uint64x1_t = vsli_n_u64 (arg0_uint64x1_t, arg1_uint64x1_t, 1); --} -- --/* { dg-final { scan-assembler "vsli\.64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsli_nu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsli_nu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsli_nu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vsli_n_u8 (arg0_uint8x8_t, arg1_uint8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vsli\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsraQ_ns16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsraQ_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsraQ_ns16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vsraq_n_s16 (arg0_int16x8_t, arg1_int16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vsra\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsraQ_ns32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsraQ_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsraQ_ns32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vsraq_n_s32 (arg0_int32x4_t, arg1_int32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vsra\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsraQ_ns64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsraQ_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsraQ_ns64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_int64x2_t = vsraq_n_s64 (arg0_int64x2_t, arg1_int64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vsra\.s64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsraQ_ns8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsraQ_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsraQ_ns8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vsraq_n_s8 (arg0_int8x16_t, arg1_int8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vsra\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsraQ_nu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsraQ_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsraQ_nu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vsraq_n_u16 (arg0_uint16x8_t, arg1_uint16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vsra\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsraQ_nu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsraQ_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsraQ_nu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vsraq_n_u32 (arg0_uint32x4_t, arg1_uint32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vsra\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsraQ_nu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsraQ_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsraQ_nu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint64x2_t arg1_uint64x2_t; -- -- out_uint64x2_t = vsraq_n_u64 (arg0_uint64x2_t, arg1_uint64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vsra\.u64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsraQ_nu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsraQ_nu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsraQ_nu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vsraq_n_u8 (arg0_uint8x16_t, arg1_uint8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vsra\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsra_ns16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsra_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsra_ns16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vsra_n_s16 (arg0_int16x4_t, arg1_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vsra\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsra_ns32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsra_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsra_ns32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vsra_n_s32 (arg0_int32x2_t, arg1_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vsra\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsra_ns64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsra_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsra_ns64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg0_int64x1_t; -- int64x1_t arg1_int64x1_t; -- -- out_int64x1_t = vsra_n_s64 (arg0_int64x1_t, arg1_int64x1_t, 1); --} -- --/* { dg-final { scan-assembler "vsra\.s64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsra_ns8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsra_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsra_ns8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vsra_n_s8 (arg0_int8x8_t, arg1_int8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vsra\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsra_nu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsra_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsra_nu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vsra_n_u16 (arg0_uint16x4_t, arg1_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vsra\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsra_nu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsra_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsra_nu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vsra_n_u32 (arg0_uint32x2_t, arg1_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vsra\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsra_nu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsra_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsra_nu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- uint64x1_t arg1_uint64x1_t; -- -- out_uint64x1_t = vsra_n_u64 (arg0_uint64x1_t, arg1_uint64x1_t, 1); --} -- --/* { dg-final { scan-assembler "vsra\.u64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsra_nu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsra_nu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsra_nu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vsra_n_u8 (arg0_uint8x8_t, arg1_uint8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vsra\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsriQ_np16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsriQ_np16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsriQ_np16 (void) --{ -- poly16x8_t out_poly16x8_t; -- poly16x8_t arg0_poly16x8_t; -- poly16x8_t arg1_poly16x8_t; -- -- out_poly16x8_t = vsriq_n_p16 (arg0_poly16x8_t, arg1_poly16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vsri\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsriQ_np64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsriQ_np64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vsriQ_np64 (void) --{ -- poly64x2_t out_poly64x2_t; -- poly64x2_t arg0_poly64x2_t; -- poly64x2_t arg1_poly64x2_t; -- -- out_poly64x2_t = vsriq_n_p64 (arg0_poly64x2_t, arg1_poly64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vsri\.64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsriQ_np8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsriQ_np8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsriQ_np8 (void) --{ -- poly8x16_t out_poly8x16_t; -- poly8x16_t arg0_poly8x16_t; -- poly8x16_t arg1_poly8x16_t; -- -- out_poly8x16_t = vsriq_n_p8 (arg0_poly8x16_t, arg1_poly8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vsri\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsriQ_ns16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsriQ_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsriQ_ns16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vsriq_n_s16 (arg0_int16x8_t, arg1_int16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vsri\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsriQ_ns32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsriQ_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsriQ_ns32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vsriq_n_s32 (arg0_int32x4_t, arg1_int32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vsri\.32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsriQ_ns64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsriQ_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsriQ_ns64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_int64x2_t = vsriq_n_s64 (arg0_int64x2_t, arg1_int64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vsri\.64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsriQ_ns8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsriQ_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsriQ_ns8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vsriq_n_s8 (arg0_int8x16_t, arg1_int8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vsri\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsriQ_nu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsriQ_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsriQ_nu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vsriq_n_u16 (arg0_uint16x8_t, arg1_uint16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vsri\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsriQ_nu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsriQ_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsriQ_nu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vsriq_n_u32 (arg0_uint32x4_t, arg1_uint32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vsri\.32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsriQ_nu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsriQ_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsriQ_nu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint64x2_t arg1_uint64x2_t; -- -- out_uint64x2_t = vsriq_n_u64 (arg0_uint64x2_t, arg1_uint64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vsri\.64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsriQ_nu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsriQ_nu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsriQ_nu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vsriq_n_u8 (arg0_uint8x16_t, arg1_uint8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vsri\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsri_np16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsri_np16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsri_np16 (void) --{ -- poly16x4_t out_poly16x4_t; -- poly16x4_t arg0_poly16x4_t; -- poly16x4_t arg1_poly16x4_t; -- -- out_poly16x4_t = vsri_n_p16 (arg0_poly16x4_t, arg1_poly16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vsri\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsri_np64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsri_np64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vsri_np64 (void) --{ -- poly64x1_t out_poly64x1_t; -- poly64x1_t arg0_poly64x1_t; -- poly64x1_t arg1_poly64x1_t; -- -- out_poly64x1_t = vsri_n_p64 (arg0_poly64x1_t, arg1_poly64x1_t, 1); --} -- --/* { dg-final { scan-assembler "vsri\.64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsri_np8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsri_np8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsri_np8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8x8_t arg0_poly8x8_t; -- poly8x8_t arg1_poly8x8_t; -- -- out_poly8x8_t = vsri_n_p8 (arg0_poly8x8_t, arg1_poly8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vsri\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsri_ns16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsri_ns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsri_ns16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vsri_n_s16 (arg0_int16x4_t, arg1_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vsri\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsri_ns32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsri_ns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsri_ns32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vsri_n_s32 (arg0_int32x2_t, arg1_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vsri\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsri_ns64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsri_ns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsri_ns64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg0_int64x1_t; -- int64x1_t arg1_int64x1_t; -- -- out_int64x1_t = vsri_n_s64 (arg0_int64x1_t, arg1_int64x1_t, 1); --} -- --/* { dg-final { scan-assembler "vsri\.64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsri_ns8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsri_ns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsri_ns8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vsri_n_s8 (arg0_int8x8_t, arg1_int8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vsri\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsri_nu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsri_nu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsri_nu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vsri_n_u16 (arg0_uint16x4_t, arg1_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vsri\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsri_nu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsri_nu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsri_nu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vsri_n_u32 (arg0_uint32x2_t, arg1_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vsri\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsri_nu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsri_nu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsri_nu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- uint64x1_t arg1_uint64x1_t; -- -- out_uint64x1_t = vsri_n_u64 (arg0_uint64x1_t, arg1_uint64x1_t, 1); --} -- --/* { dg-final { scan-assembler "vsri\.64\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsri_nu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsri_nu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsri_nu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vsri_n_u8 (arg0_uint8x8_t, arg1_uint8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vsri\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Q_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1Q_lanef32 (void) --{ -- float32_t *arg0_float32_t; -- float32x4_t arg1_float32x4_t; -- -- vst1q_lane_f32 (arg0_float32_t, arg1_float32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Q_lanep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1Q_lanep16 (void) --{ -- poly16_t *arg0_poly16_t; -- poly16x8_t arg1_poly16x8_t; -- -- vst1q_lane_p16 (arg0_poly16_t, arg1_poly16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Q_lanep64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vst1Q_lanep64 (void) --{ -- poly64_t *arg0_poly64_t; -- poly64x2_t arg1_poly64x2_t; -- -- vst1q_lane_p64 (arg0_poly64_t, arg1_poly64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanep8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Q_lanep8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1Q_lanep8 (void) --{ -- poly8_t *arg0_poly8_t; -- poly8x16_t arg1_poly8x16_t; -- -- vst1q_lane_p8 (arg0_poly8_t, arg1_poly8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Q_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1Q_lanes16 (void) --{ -- int16_t *arg0_int16_t; -- int16x8_t arg1_int16x8_t; -- -- vst1q_lane_s16 (arg0_int16_t, arg1_int16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Q_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1Q_lanes32 (void) --{ -- int32_t *arg0_int32_t; -- int32x4_t arg1_int32x4_t; -- -- vst1q_lane_s32 (arg0_int32_t, arg1_int32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Q_lanes64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1Q_lanes64 (void) --{ -- int64_t *arg0_int64_t; -- int64x2_t arg1_int64x2_t; -- -- vst1q_lane_s64 (arg0_int64_t, arg1_int64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_lanes8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Q_lanes8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1Q_lanes8 (void) --{ -- int8_t *arg0_int8_t; -- int8x16_t arg1_int8x16_t; -- -- vst1q_lane_s8 (arg0_int8_t, arg1_int8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Q_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1Q_laneu16 (void) --{ -- uint16_t *arg0_uint16_t; -- uint16x8_t arg1_uint16x8_t; -- -- vst1q_lane_u16 (arg0_uint16_t, arg1_uint16x8_t, 1); --} -- --/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Q_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1Q_laneu32 (void) --{ -- uint32_t *arg0_uint32_t; -- uint32x4_t arg1_uint32x4_t; -- -- vst1q_lane_u32 (arg0_uint32_t, arg1_uint32x4_t, 1); --} -- --/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu64-1.c -+++ b/src//dev/null -@@ -1,25 +0,0 @@ --/* Test the `vst1Q_laneu64' ARM Neon intrinsic. */ -- --/* Detect ICE in the case of unaligned memory address. */ -- --/* { dg-do compile } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --unsigned char dummy_store[1000]; -- --void --foo (char* addr) --{ -- uint8x16_t vdata = vld1q_u8 (addr); -- vst1q_lane_u64 ((uint64_t*) &dummy_store, vreinterpretq_u64_u8 (vdata), 0); --} -- --uint64_t --bar (uint64x2_t vdata) --{ -- vdata = vld1q_lane_u64 ((uint64_t*) &dummy_store, vdata, 0); -- return vgetq_lane_u64 (vdata, 0); --} ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Q_laneu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1Q_laneu64 (void) --{ -- uint64_t *arg0_uint64_t; -- uint64x2_t arg1_uint64x2_t; -- -- vst1q_lane_u64 (arg0_uint64_t, arg1_uint64x2_t, 1); --} -- --/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Q_laneu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Q_laneu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1Q_laneu8 (void) --{ -- uint8_t *arg0_uint8_t; -- uint8x16_t arg1_uint8x16_t; -- -- vst1q_lane_u8 (arg0_uint8_t, arg1_uint8x16_t, 1); --} -- --/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qf32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Qf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1Qf32 (void) --{ -- float32_t *arg0_float32_t; -- float32x4_t arg1_float32x4_t; -- -- vst1q_f32 (arg0_float32_t, arg1_float32x4_t); --} -- --/* { dg-final { scan-assembler "vst1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qp16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Qp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1Qp16 (void) --{ -- poly16_t *arg0_poly16_t; -- poly16x8_t arg1_poly16x8_t; -- -- vst1q_p16 (arg0_poly16_t, arg1_poly16x8_t); --} -- --/* { dg-final { scan-assembler "vst1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qp64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Qp64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vst1Qp64 (void) --{ -- poly64_t *arg0_poly64_t; -- poly64x2_t arg1_poly64x2_t; -- -- vst1q_p64 (arg0_poly64_t, arg1_poly64x2_t); --} -- --/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qp8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Qp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1Qp8 (void) --{ -- poly8_t *arg0_poly8_t; -- poly8x16_t arg1_poly8x16_t; -- -- vst1q_p8 (arg0_poly8_t, arg1_poly8x16_t); --} -- --/* { dg-final { scan-assembler "vst1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Qs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1Qs16 (void) --{ -- int16_t *arg0_int16_t; -- int16x8_t arg1_int16x8_t; -- -- vst1q_s16 (arg0_int16_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vst1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Qs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1Qs32 (void) --{ -- int32_t *arg0_int32_t; -- int32x4_t arg1_int32x4_t; -- -- vst1q_s32 (arg0_int32_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vst1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Qs64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1Qs64 (void) --{ -- int64_t *arg0_int64_t; -- int64x2_t arg1_int64x2_t; -- -- vst1q_s64 (arg0_int64_t, arg1_int64x2_t); --} -- --/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qs8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Qs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1Qs8 (void) --{ -- int8_t *arg0_int8_t; -- int8x16_t arg1_int8x16_t; -- -- vst1q_s8 (arg0_int8_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vst1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Qu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1Qu16 (void) --{ -- uint16_t *arg0_uint16_t; -- uint16x8_t arg1_uint16x8_t; -- -- vst1q_u16 (arg0_uint16_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vst1\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Qu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1Qu32 (void) --{ -- uint32_t *arg0_uint32_t; -- uint32x4_t arg1_uint32x4_t; -- -- vst1q_u32 (arg0_uint32_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vst1\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Qu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1Qu64 (void) --{ -- uint64_t *arg0_uint64_t; -- uint64x2_t arg1_uint64x2_t; -- -- vst1q_u64 (arg0_uint64_t, arg1_uint64x2_t); --} -- --/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1Qu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1Qu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1Qu8 (void) --{ -- uint8_t *arg0_uint8_t; -- uint8x16_t arg1_uint8x16_t; -- -- vst1q_u8 (arg0_uint8_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vst1\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1_lanef32 (void) --{ -- float32_t *arg0_float32_t; -- float32x2_t arg1_float32x2_t; -- -- vst1_lane_f32 (arg0_float32_t, arg1_float32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanep16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1_lanep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1_lanep16 (void) --{ -- poly16_t *arg0_poly16_t; -- poly16x4_t arg1_poly16x4_t; -- -- vst1_lane_p16 (arg0_poly16_t, arg1_poly16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanep64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1_lanep64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vst1_lanep64 (void) --{ -- poly64_t *arg0_poly64_t; -- poly64x1_t arg1_poly64x1_t; -- -- vst1_lane_p64 (arg0_poly64_t, arg1_poly64x1_t, 0); --} -- --/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanep8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1_lanep8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1_lanep8 (void) --{ -- poly8_t *arg0_poly8_t; -- poly8x8_t arg1_poly8x8_t; -- -- vst1_lane_p8 (arg0_poly8_t, arg1_poly8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1_lanes16 (void) --{ -- int16_t *arg0_int16_t; -- int16x4_t arg1_int16x4_t; -- -- vst1_lane_s16 (arg0_int16_t, arg1_int16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1_lanes32 (void) --{ -- int32_t *arg0_int32_t; -- int32x2_t arg1_int32x2_t; -- -- vst1_lane_s32 (arg0_int32_t, arg1_int32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1_lanes64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1_lanes64 (void) --{ -- int64_t *arg0_int64_t; -- int64x1_t arg1_int64x1_t; -- -- vst1_lane_s64 (arg0_int64_t, arg1_int64x1_t, 0); --} -- --/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_lanes8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1_lanes8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1_lanes8 (void) --{ -- int8_t *arg0_int8_t; -- int8x8_t arg1_int8x8_t; -- -- vst1_lane_s8 (arg0_int8_t, arg1_int8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1_laneu16 (void) --{ -- uint16_t *arg0_uint16_t; -- uint16x4_t arg1_uint16x4_t; -- -- vst1_lane_u16 (arg0_uint16_t, arg1_uint16x4_t, 1); --} -- --/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1_laneu32 (void) --{ -- uint32_t *arg0_uint32_t; -- uint32x2_t arg1_uint32x2_t; -- -- vst1_lane_u32 (arg0_uint32_t, arg1_uint32x2_t, 1); --} -- --/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1_laneu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1_laneu64 (void) --{ -- uint64_t *arg0_uint64_t; -- uint64x1_t arg1_uint64x1_t; -- -- vst1_lane_u64 (arg0_uint64_t, arg1_uint64x1_t, 0); --} -- --/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1_laneu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1_laneu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1_laneu8 (void) --{ -- uint8_t *arg0_uint8_t; -- uint8x8_t arg1_uint8x8_t; -- -- vst1_lane_u8 (arg0_uint8_t, arg1_uint8x8_t, 1); --} -- --/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]\\\})|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1f32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1f32 (void) --{ -- float32_t *arg0_float32_t; -- float32x2_t arg1_float32x2_t; -- -- vst1_f32 (arg0_float32_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1p16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1p16 (void) --{ -- poly16_t *arg0_poly16_t; -- poly16x4_t arg1_poly16x4_t; -- -- vst1_p16 (arg0_poly16_t, arg1_poly16x4_t); --} -- --/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1p64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vst1p64 (void) --{ -- poly64_t *arg0_poly64_t; -- poly64x1_t arg1_poly64x1_t; -- -- vst1_p64 (arg0_poly64_t, arg1_poly64x1_t); --} -- --/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1p8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1p8 (void) --{ -- poly8_t *arg0_poly8_t; -- poly8x8_t arg1_poly8x8_t; -- -- vst1_p8 (arg0_poly8_t, arg1_poly8x8_t); --} -- --/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1s16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1s16 (void) --{ -- int16_t *arg0_int16_t; -- int16x4_t arg1_int16x4_t; -- -- vst1_s16 (arg0_int16_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1s32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1s32 (void) --{ -- int32_t *arg0_int32_t; -- int32x2_t arg1_int32x2_t; -- -- vst1_s32 (arg0_int32_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1s64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1s64 (void) --{ -- int64_t *arg0_int64_t; -- int64x1_t arg1_int64x1_t; -- -- vst1_s64 (arg0_int64_t, arg1_int64x1_t); --} -- --/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1s8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1s8 (void) --{ -- int8_t *arg0_int8_t; -- int8x8_t arg1_int8x8_t; -- -- vst1_s8 (arg0_int8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1u16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1u16 (void) --{ -- uint16_t *arg0_uint16_t; -- uint16x4_t arg1_uint16x4_t; -- -- vst1_u16 (arg0_uint16_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vst1\.16\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1u32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1u32 (void) --{ -- uint32_t *arg0_uint32_t; -- uint32x2_t arg1_uint32x2_t; -- -- vst1_u32 (arg0_uint32_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vst1\.32\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1u64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1u64 (void) --{ -- uint64_t *arg0_uint64_t; -- uint64x1_t arg1_uint64x1_t; -- -- vst1_u64 (arg0_uint64_t, arg1_uint64x1_t); --} -- --/* { dg-final { scan-assembler "vst1\.64\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst1u8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst1u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst1u8 (void) --{ -- uint8_t *arg0_uint8_t; -- uint8x8_t arg1_uint8x8_t; -- -- vst1_u8 (arg0_uint8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vst1\.8\[ \]+((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2Q_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2Q_lanef32 (void) --{ -- float32_t *arg0_float32_t; -- float32x4x2_t arg1_float32x4x2_t; -- -- vst2q_lane_f32 (arg0_float32_t, arg1_float32x4x2_t, 1); --} -- --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanep16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2Q_lanep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2Q_lanep16 (void) --{ -- poly16_t *arg0_poly16_t; -- poly16x8x2_t arg1_poly16x8x2_t; -- -- vst2q_lane_p16 (arg0_poly16_t, arg1_poly16x8x2_t, 1); --} -- --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2Q_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2Q_lanes16 (void) --{ -- int16_t *arg0_int16_t; -- int16x8x2_t arg1_int16x8x2_t; -- -- vst2q_lane_s16 (arg0_int16_t, arg1_int16x8x2_t, 1); --} -- --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_lanes32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2Q_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2Q_lanes32 (void) --{ -- int32_t *arg0_int32_t; -- int32x4x2_t arg1_int32x4x2_t; -- -- vst2q_lane_s32 (arg0_int32_t, arg1_int32x4x2_t, 1); --} -- --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2Q_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2Q_laneu16 (void) --{ -- uint16_t *arg0_uint16_t; -- uint16x8x2_t arg1_uint16x8x2_t; -- -- vst2q_lane_u16 (arg0_uint16_t, arg1_uint16x8x2_t, 1); --} -- --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Q_laneu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2Q_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2Q_laneu32 (void) --{ -- uint32_t *arg0_uint32_t; -- uint32x4x2_t arg1_uint32x4x2_t; -- -- vst2q_lane_u32 (arg0_uint32_t, arg1_uint32x4x2_t, 1); --} -- --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst2Qf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2Qf32 (void) --{ -- float32_t *arg0_float32_t; -- float32x4x2_t arg1_float32x4x2_t; -- -- vst2q_f32 (arg0_float32_t, arg1_float32x4x2_t); --} -- --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qp16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst2Qp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2Qp16 (void) --{ -- poly16_t *arg0_poly16_t; -- poly16x8x2_t arg1_poly16x8x2_t; -- -- vst2q_p16 (arg0_poly16_t, arg1_poly16x8x2_t); --} -- --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qp8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst2Qp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2Qp8 (void) --{ -- poly8_t *arg0_poly8_t; -- poly8x16x2_t arg1_poly8x16x2_t; -- -- vst2q_p8 (arg0_poly8_t, arg1_poly8x16x2_t); --} -- --/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst2Qs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2Qs16 (void) --{ -- int16_t *arg0_int16_t; -- int16x8x2_t arg1_int16x8x2_t; -- -- vst2q_s16 (arg0_int16_t, arg1_int16x8x2_t); --} -- --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst2Qs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2Qs32 (void) --{ -- int32_t *arg0_int32_t; -- int32x4x2_t arg1_int32x4x2_t; -- -- vst2q_s32 (arg0_int32_t, arg1_int32x4x2_t); --} -- --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst2Qs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2Qs8 (void) --{ -- int8_t *arg0_int8_t; -- int8x16x2_t arg1_int8x16x2_t; -- -- vst2q_s8 (arg0_int8_t, arg1_int8x16x2_t); --} -- --/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst2Qu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2Qu16 (void) --{ -- uint16_t *arg0_uint16_t; -- uint16x8x2_t arg1_uint16x8x2_t; -- -- vst2q_u16 (arg0_uint16_t, arg1_uint16x8x2_t); --} -- --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst2Qu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2Qu32 (void) --{ -- uint32_t *arg0_uint32_t; -- uint32x4x2_t arg1_uint32x4x2_t; -- -- vst2q_u32 (arg0_uint32_t, arg1_uint32x4x2_t); --} -- --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2Qu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst2Qu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2Qu8 (void) --{ -- uint8_t *arg0_uint8_t; -- uint8x16x2_t arg1_uint8x16x2_t; -- -- vst2q_u8 (arg0_uint8_t, arg1_uint8x16x2_t); --} -- --/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2_lanef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2_lanef32 (void) --{ -- float32_t *arg0_float32_t; -- float32x2x2_t arg1_float32x2x2_t; -- -- vst2_lane_f32 (arg0_float32_t, arg1_float32x2x2_t, 1); --} -- --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2_lanep16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2_lanep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2_lanep16 (void) --{ -- poly16_t *arg0_poly16_t; -- poly16x4x2_t arg1_poly16x4x2_t; -- -- vst2_lane_p16 (arg0_poly16_t, arg1_poly16x4x2_t, 1); --} -- --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2_lanep8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2_lanep8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2_lanep8 (void) --{ -- poly8_t *arg0_poly8_t; -- poly8x8x2_t arg1_poly8x8x2_t; -- -- vst2_lane_p8 (arg0_poly8_t, arg1_poly8x8x2_t, 1); --} -- --/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2_lanes16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2_lanes16 (void) --{ -- int16_t *arg0_int16_t; -- int16x4x2_t arg1_int16x4x2_t; -- -- vst2_lane_s16 (arg0_int16_t, arg1_int16x4x2_t, 1); --} -- --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2_lanes32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2_lanes32 (void) --{ -- int32_t *arg0_int32_t; -- int32x2x2_t arg1_int32x2x2_t; -- -- vst2_lane_s32 (arg0_int32_t, arg1_int32x2x2_t, 1); --} -- --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2_lanes8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2_lanes8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2_lanes8 (void) --{ -- int8_t *arg0_int8_t; -- int8x8x2_t arg1_int8x8x2_t; -- -- vst2_lane_s8 (arg0_int8_t, arg1_int8x8x2_t, 1); --} -- --/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2_laneu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2_laneu16 (void) --{ -- uint16_t *arg0_uint16_t; -- uint16x4x2_t arg1_uint16x4x2_t; -- -- vst2_lane_u16 (arg0_uint16_t, arg1_uint16x4x2_t, 1); --} -- --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2_laneu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2_laneu32 (void) --{ -- uint32_t *arg0_uint32_t; -- uint32x2x2_t arg1_uint32x2x2_t; -- -- vst2_lane_u32 (arg0_uint32_t, arg1_uint32x2x2_t, 1); --} -- --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2_laneu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2_laneu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2_laneu8 (void) --{ -- uint8_t *arg0_uint8_t; -- uint8x8x2_t arg1_uint8x8x2_t; -- -- vst2_lane_u8 (arg0_uint8_t, arg1_uint8x8x2_t, 1); --} -- --/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2f32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2f32 (void) --{ -- float32_t *arg0_float32_t; -- float32x2x2_t arg1_float32x2x2_t; -- -- vst2_f32 (arg0_float32_t, arg1_float32x2x2_t); --} -- --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2p16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2p16 (void) --{ -- poly16_t *arg0_poly16_t; -- poly16x4x2_t arg1_poly16x4x2_t; -- -- vst2_p16 (arg0_poly16_t, arg1_poly16x4x2_t); --} -- --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2p64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vst2p64 (void) --{ -- poly64_t *arg0_poly64_t; -- poly64x1x2_t arg1_poly64x1x2_t; -- -- vst2_p64 (arg0_poly64_t, arg1_poly64x1x2_t); --} -- --/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2p8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2p8 (void) --{ -- poly8_t *arg0_poly8_t; -- poly8x8x2_t arg1_poly8x8x2_t; -- -- vst2_p8 (arg0_poly8_t, arg1_poly8x8x2_t); --} -- --/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2s16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2s16 (void) --{ -- int16_t *arg0_int16_t; -- int16x4x2_t arg1_int16x4x2_t; -- -- vst2_s16 (arg0_int16_t, arg1_int16x4x2_t); --} -- --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2s32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2s32 (void) --{ -- int32_t *arg0_int32_t; -- int32x2x2_t arg1_int32x2x2_t; -- -- vst2_s32 (arg0_int32_t, arg1_int32x2x2_t); --} -- --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2s64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2s64 (void) --{ -- int64_t *arg0_int64_t; -- int64x1x2_t arg1_int64x1x2_t; -- -- vst2_s64 (arg0_int64_t, arg1_int64x1x2_t); --} -- --/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2s8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2s8 (void) --{ -- int8_t *arg0_int8_t; -- int8x8x2_t arg1_int8x8x2_t; -- -- vst2_s8 (arg0_int8_t, arg1_int8x8x2_t); --} -- --/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2u16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2u16 (void) --{ -- uint16_t *arg0_uint16_t; -- uint16x4x2_t arg1_uint16x4x2_t; -- -- vst2_u16 (arg0_uint16_t, arg1_uint16x4x2_t); --} -- --/* { dg-final { scan-assembler "vst2\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2u32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2u32 (void) --{ -- uint32_t *arg0_uint32_t; -- uint32x2x2_t arg1_uint32x2x2_t; -- -- vst2_u32 (arg0_uint32_t, arg1_uint32x2x2_t); --} -- --/* { dg-final { scan-assembler "vst2\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2u64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2u64 (void) --{ -- uint64_t *arg0_uint64_t; -- uint64x1x2_t arg1_uint64x1x2_t; -- -- vst2_u64 (arg0_uint64_t, arg1_uint64x1x2_t); --} -- --/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst2u8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst2u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst2u8 (void) --{ -- uint8_t *arg0_uint8_t; -- uint8x8x2_t arg1_uint8x8x2_t; -- -- vst2_u8 (arg0_uint8_t, arg1_uint8x8x2_t); --} -- --/* { dg-final { scan-assembler "vst2\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3Q_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3Q_lanef32 (void) --{ -- float32_t *arg0_float32_t; -- float32x4x3_t arg1_float32x4x3_t; -- -- vst3q_lane_f32 (arg0_float32_t, arg1_float32x4x3_t, 1); --} -- --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanep16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3Q_lanep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3Q_lanep16 (void) --{ -- poly16_t *arg0_poly16_t; -- poly16x8x3_t arg1_poly16x8x3_t; -- -- vst3q_lane_p16 (arg0_poly16_t, arg1_poly16x8x3_t, 1); --} -- --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3Q_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3Q_lanes16 (void) --{ -- int16_t *arg0_int16_t; -- int16x8x3_t arg1_int16x8x3_t; -- -- vst3q_lane_s16 (arg0_int16_t, arg1_int16x8x3_t, 1); --} -- --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_lanes32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3Q_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3Q_lanes32 (void) --{ -- int32_t *arg0_int32_t; -- int32x4x3_t arg1_int32x4x3_t; -- -- vst3q_lane_s32 (arg0_int32_t, arg1_int32x4x3_t, 1); --} -- --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3Q_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3Q_laneu16 (void) --{ -- uint16_t *arg0_uint16_t; -- uint16x8x3_t arg1_uint16x8x3_t; -- -- vst3q_lane_u16 (arg0_uint16_t, arg1_uint16x8x3_t, 1); --} -- --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Q_laneu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3Q_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3Q_laneu32 (void) --{ -- uint32_t *arg0_uint32_t; -- uint32x4x3_t arg1_uint32x4x3_t; -- -- vst3q_lane_u32 (arg0_uint32_t, arg1_uint32x4x3_t, 1); --} -- --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst3Qf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3Qf32 (void) --{ -- float32_t *arg0_float32_t; -- float32x4x3_t arg1_float32x4x3_t; -- -- vst3q_f32 (arg0_float32_t, arg1_float32x4x3_t); --} -- --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qp16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst3Qp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3Qp16 (void) --{ -- poly16_t *arg0_poly16_t; -- poly16x8x3_t arg1_poly16x8x3_t; -- -- vst3q_p16 (arg0_poly16_t, arg1_poly16x8x3_t); --} -- --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qp8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst3Qp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3Qp8 (void) --{ -- poly8_t *arg0_poly8_t; -- poly8x16x3_t arg1_poly8x16x3_t; -- -- vst3q_p8 (arg0_poly8_t, arg1_poly8x16x3_t); --} -- --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst3Qs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3Qs16 (void) --{ -- int16_t *arg0_int16_t; -- int16x8x3_t arg1_int16x8x3_t; -- -- vst3q_s16 (arg0_int16_t, arg1_int16x8x3_t); --} -- --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst3Qs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3Qs32 (void) --{ -- int32_t *arg0_int32_t; -- int32x4x3_t arg1_int32x4x3_t; -- -- vst3q_s32 (arg0_int32_t, arg1_int32x4x3_t); --} -- --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst3Qs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3Qs8 (void) --{ -- int8_t *arg0_int8_t; -- int8x16x3_t arg1_int8x16x3_t; -- -- vst3q_s8 (arg0_int8_t, arg1_int8x16x3_t); --} -- --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst3Qu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3Qu16 (void) --{ -- uint16_t *arg0_uint16_t; -- uint16x8x3_t arg1_uint16x8x3_t; -- -- vst3q_u16 (arg0_uint16_t, arg1_uint16x8x3_t); --} -- --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst3Qu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3Qu32 (void) --{ -- uint32_t *arg0_uint32_t; -- uint32x4x3_t arg1_uint32x4x3_t; -- -- vst3q_u32 (arg0_uint32_t, arg1_uint32x4x3_t); --} -- --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3Qu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst3Qu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3Qu8 (void) --{ -- uint8_t *arg0_uint8_t; -- uint8x16x3_t arg1_uint8x16x3_t; -- -- vst3q_u8 (arg0_uint8_t, arg1_uint8x16x3_t); --} -- --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3_lanef32 (void) --{ -- float32_t *arg0_float32_t; -- float32x2x3_t arg1_float32x2x3_t; -- -- vst3_lane_f32 (arg0_float32_t, arg1_float32x2x3_t, 1); --} -- --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanep16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3_lanep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3_lanep16 (void) --{ -- poly16_t *arg0_poly16_t; -- poly16x4x3_t arg1_poly16x4x3_t; -- -- vst3_lane_p16 (arg0_poly16_t, arg1_poly16x4x3_t, 1); --} -- --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanep8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3_lanep8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3_lanep8 (void) --{ -- poly8_t *arg0_poly8_t; -- poly8x8x3_t arg1_poly8x8x3_t; -- -- vst3_lane_p8 (arg0_poly8_t, arg1_poly8x8x3_t, 1); --} -- --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanes16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3_lanes16 (void) --{ -- int16_t *arg0_int16_t; -- int16x4x3_t arg1_int16x4x3_t; -- -- vst3_lane_s16 (arg0_int16_t, arg1_int16x4x3_t, 1); --} -- --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanes32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3_lanes32 (void) --{ -- int32_t *arg0_int32_t; -- int32x2x3_t arg1_int32x2x3_t; -- -- vst3_lane_s32 (arg0_int32_t, arg1_int32x2x3_t, 1); --} -- --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_lanes8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3_lanes8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3_lanes8 (void) --{ -- int8_t *arg0_int8_t; -- int8x8x3_t arg1_int8x8x3_t; -- -- vst3_lane_s8 (arg0_int8_t, arg1_int8x8x3_t, 1); --} -- --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_laneu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3_laneu16 (void) --{ -- uint16_t *arg0_uint16_t; -- uint16x4x3_t arg1_uint16x4x3_t; -- -- vst3_lane_u16 (arg0_uint16_t, arg1_uint16x4x3_t, 1); --} -- --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_laneu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3_laneu32 (void) --{ -- uint32_t *arg0_uint32_t; -- uint32x2x3_t arg1_uint32x2x3_t; -- -- vst3_lane_u32 (arg0_uint32_t, arg1_uint32x2x3_t, 1); --} -- --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3_laneu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3_laneu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3_laneu8 (void) --{ -- uint8_t *arg0_uint8_t; -- uint8x8x3_t arg1_uint8x8x3_t; -- -- vst3_lane_u8 (arg0_uint8_t, arg1_uint8x8x3_t, 1); --} -- --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3f32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3f32 (void) --{ -- float32_t *arg0_float32_t; -- float32x2x3_t arg1_float32x2x3_t; -- -- vst3_f32 (arg0_float32_t, arg1_float32x2x3_t); --} -- --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3p16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3p16 (void) --{ -- poly16_t *arg0_poly16_t; -- poly16x4x3_t arg1_poly16x4x3_t; -- -- vst3_p16 (arg0_poly16_t, arg1_poly16x4x3_t); --} -- --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3p64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vst3p64 (void) --{ -- poly64_t *arg0_poly64_t; -- poly64x1x3_t arg1_poly64x1x3_t; -- -- vst3_p64 (arg0_poly64_t, arg1_poly64x1x3_t); --} -- --/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3p8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3p8 (void) --{ -- poly8_t *arg0_poly8_t; -- poly8x8x3_t arg1_poly8x8x3_t; -- -- vst3_p8 (arg0_poly8_t, arg1_poly8x8x3_t); --} -- --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3s16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3s16 (void) --{ -- int16_t *arg0_int16_t; -- int16x4x3_t arg1_int16x4x3_t; -- -- vst3_s16 (arg0_int16_t, arg1_int16x4x3_t); --} -- --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3s32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3s32 (void) --{ -- int32_t *arg0_int32_t; -- int32x2x3_t arg1_int32x2x3_t; -- -- vst3_s32 (arg0_int32_t, arg1_int32x2x3_t); --} -- --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3s64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3s64 (void) --{ -- int64_t *arg0_int64_t; -- int64x1x3_t arg1_int64x1x3_t; -- -- vst3_s64 (arg0_int64_t, arg1_int64x1x3_t); --} -- --/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3s8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3s8 (void) --{ -- int8_t *arg0_int8_t; -- int8x8x3_t arg1_int8x8x3_t; -- -- vst3_s8 (arg0_int8_t, arg1_int8x8x3_t); --} -- --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3u16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3u16 (void) --{ -- uint16_t *arg0_uint16_t; -- uint16x4x3_t arg1_uint16x4x3_t; -- -- vst3_u16 (arg0_uint16_t, arg1_uint16x4x3_t); --} -- --/* { dg-final { scan-assembler "vst3\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3u32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3u32 (void) --{ -- uint32_t *arg0_uint32_t; -- uint32x2x3_t arg1_uint32x2x3_t; -- -- vst3_u32 (arg0_uint32_t, arg1_uint32x2x3_t); --} -- --/* { dg-final { scan-assembler "vst3\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3u64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3u64 (void) --{ -- uint64_t *arg0_uint64_t; -- uint64x1x3_t arg1_uint64x1x3_t; -- -- vst3_u64 (arg0_uint64_t, arg1_uint64x1x3_t); --} -- --/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst3u8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst3u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst3u8 (void) --{ -- uint8_t *arg0_uint8_t; -- uint8x8x3_t arg1_uint8x8x3_t; -- -- vst3_u8 (arg0_uint8_t, arg1_uint8x8x3_t); --} -- --/* { dg-final { scan-assembler "vst3\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4Q_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4Q_lanef32 (void) --{ -- float32_t *arg0_float32_t; -- float32x4x4_t arg1_float32x4x4_t; -- -- vst4q_lane_f32 (arg0_float32_t, arg1_float32x4x4_t, 1); --} -- --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanep16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4Q_lanep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4Q_lanep16 (void) --{ -- poly16_t *arg0_poly16_t; -- poly16x8x4_t arg1_poly16x8x4_t; -- -- vst4q_lane_p16 (arg0_poly16_t, arg1_poly16x8x4_t, 1); --} -- --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4Q_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4Q_lanes16 (void) --{ -- int16_t *arg0_int16_t; -- int16x8x4_t arg1_int16x8x4_t; -- -- vst4q_lane_s16 (arg0_int16_t, arg1_int16x8x4_t, 1); --} -- --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_lanes32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4Q_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4Q_lanes32 (void) --{ -- int32_t *arg0_int32_t; -- int32x4x4_t arg1_int32x4x4_t; -- -- vst4q_lane_s32 (arg0_int32_t, arg1_int32x4x4_t, 1); --} -- --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4Q_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4Q_laneu16 (void) --{ -- uint16_t *arg0_uint16_t; -- uint16x8x4_t arg1_uint16x8x4_t; -- -- vst4q_lane_u16 (arg0_uint16_t, arg1_uint16x8x4_t, 1); --} -- --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Q_laneu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4Q_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4Q_laneu32 (void) --{ -- uint32_t *arg0_uint32_t; -- uint32x4x4_t arg1_uint32x4x4_t; -- -- vst4q_lane_u32 (arg0_uint32_t, arg1_uint32x4x4_t, 1); --} -- --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst4Qf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4Qf32 (void) --{ -- float32_t *arg0_float32_t; -- float32x4x4_t arg1_float32x4x4_t; -- -- vst4q_f32 (arg0_float32_t, arg1_float32x4x4_t); --} -- --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qp16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst4Qp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4Qp16 (void) --{ -- poly16_t *arg0_poly16_t; -- poly16x8x4_t arg1_poly16x8x4_t; -- -- vst4q_p16 (arg0_poly16_t, arg1_poly16x8x4_t); --} -- --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qp8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst4Qp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4Qp8 (void) --{ -- poly8_t *arg0_poly8_t; -- poly8x16x4_t arg1_poly8x16x4_t; -- -- vst4q_p8 (arg0_poly8_t, arg1_poly8x16x4_t); --} -- --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst4Qs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4Qs16 (void) --{ -- int16_t *arg0_int16_t; -- int16x8x4_t arg1_int16x8x4_t; -- -- vst4q_s16 (arg0_int16_t, arg1_int16x8x4_t); --} -- --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst4Qs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4Qs32 (void) --{ -- int32_t *arg0_int32_t; -- int32x4x4_t arg1_int32x4x4_t; -- -- vst4q_s32 (arg0_int32_t, arg1_int32x4x4_t); --} -- --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst4Qs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4Qs8 (void) --{ -- int8_t *arg0_int8_t; -- int8x16x4_t arg1_int8x16x4_t; -- -- vst4q_s8 (arg0_int8_t, arg1_int8x16x4_t); --} -- --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst4Qu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4Qu16 (void) --{ -- uint16_t *arg0_uint16_t; -- uint16x8x4_t arg1_uint16x8x4_t; -- -- vst4q_u16 (arg0_uint16_t, arg1_uint16x8x4_t); --} -- --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst4Qu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4Qu32 (void) --{ -- uint32_t *arg0_uint32_t; -- uint32x4x4_t arg1_uint32x4x4_t; -- -- vst4q_u32 (arg0_uint32_t, arg1_uint32x4x4_t); --} -- --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4Qu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vst4Qu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4Qu8 (void) --{ -- uint8_t *arg0_uint8_t; -- uint8x16x4_t arg1_uint8x16x4_t; -- -- vst4q_u8 (arg0_uint8_t, arg1_uint8x16x4_t); --} -- --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanef32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4_lanef32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4_lanef32 (void) --{ -- float32_t *arg0_float32_t; -- float32x2x4_t arg1_float32x2x4_t; -- -- vst4_lane_f32 (arg0_float32_t, arg1_float32x2x4_t, 1); --} -- --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanep16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4_lanep16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4_lanep16 (void) --{ -- poly16_t *arg0_poly16_t; -- poly16x4x4_t arg1_poly16x4x4_t; -- -- vst4_lane_p16 (arg0_poly16_t, arg1_poly16x4x4_t, 1); --} -- --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanep8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4_lanep8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4_lanep8 (void) --{ -- poly8_t *arg0_poly8_t; -- poly8x8x4_t arg1_poly8x8x4_t; -- -- vst4_lane_p8 (arg0_poly8_t, arg1_poly8x8x4_t, 1); --} -- --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanes16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4_lanes16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4_lanes16 (void) --{ -- int16_t *arg0_int16_t; -- int16x4x4_t arg1_int16x4x4_t; -- -- vst4_lane_s16 (arg0_int16_t, arg1_int16x4x4_t, 1); --} -- --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanes32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4_lanes32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4_lanes32 (void) --{ -- int32_t *arg0_int32_t; -- int32x2x4_t arg1_int32x2x4_t; -- -- vst4_lane_s32 (arg0_int32_t, arg1_int32x2x4_t, 1); --} -- --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_lanes8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4_lanes8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4_lanes8 (void) --{ -- int8_t *arg0_int8_t; -- int8x8x4_t arg1_int8x8x4_t; -- -- vst4_lane_s8 (arg0_int8_t, arg1_int8x8x4_t, 1); --} -- --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_laneu16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4_laneu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4_laneu16 (void) --{ -- uint16_t *arg0_uint16_t; -- uint16x4x4_t arg1_uint16x4x4_t; -- -- vst4_lane_u16 (arg0_uint16_t, arg1_uint16x4x4_t, 1); --} -- --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_laneu32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4_laneu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4_laneu32 (void) --{ -- uint32_t *arg0_uint32_t; -- uint32x2x4_t arg1_uint32x2x4_t; -- -- vst4_lane_u32 (arg0_uint32_t, arg1_uint32x2x4_t, 1); --} -- --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4_laneu8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4_laneu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4_laneu8 (void) --{ -- uint8_t *arg0_uint8_t; -- uint8x8x4_t arg1_uint8x8x4_t; -- -- vst4_lane_u8 (arg0_uint8_t, arg1_uint8x8x4_t, 1); --} -- --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+\\\[\[0-9\]+\\\]-\[dD\]\[0-9\]+\\\[\[0-9\]+\\\])|(\[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\], \[dD\]\[0-9\]+\\\[\[0-9\]+\\\]))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4f32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4f32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4f32 (void) --{ -- float32_t *arg0_float32_t; -- float32x2x4_t arg1_float32x2x4_t; -- -- vst4_f32 (arg0_float32_t, arg1_float32x2x4_t); --} -- --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4p16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4p16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4p16 (void) --{ -- poly16_t *arg0_poly16_t; -- poly16x4x4_t arg1_poly16x4x4_t; -- -- vst4_p16 (arg0_poly16_t, arg1_poly16x4x4_t); --} -- --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4p64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4p64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_crypto_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_crypto } */ -- --#include "arm_neon.h" -- --void test_vst4p64 (void) --{ -- poly64_t *arg0_poly64_t; -- poly64x1x4_t arg1_poly64x1x4_t; -- -- vst4_p64 (arg0_poly64_t, arg1_poly64x1x4_t); --} -- --/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4p8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4p8 (void) --{ -- poly8_t *arg0_poly8_t; -- poly8x8x4_t arg1_poly8x8x4_t; -- -- vst4_p8 (arg0_poly8_t, arg1_poly8x8x4_t); --} -- --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4s16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4s16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4s16 (void) --{ -- int16_t *arg0_int16_t; -- int16x4x4_t arg1_int16x4x4_t; -- -- vst4_s16 (arg0_int16_t, arg1_int16x4x4_t); --} -- --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4s32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4s32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4s32 (void) --{ -- int32_t *arg0_int32_t; -- int32x2x4_t arg1_int32x2x4_t; -- -- vst4_s32 (arg0_int32_t, arg1_int32x2x4_t); --} -- --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4s64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4s64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4s64 (void) --{ -- int64_t *arg0_int64_t; -- int64x1x4_t arg1_int64x1x4_t; -- -- vst4_s64 (arg0_int64_t, arg1_int64x1x4_t); --} -- --/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4s8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4s8 (void) --{ -- int8_t *arg0_int8_t; -- int8x8x4_t arg1_int8x8x4_t; -- -- vst4_s8 (arg0_int8_t, arg1_int8x8x4_t); --} -- --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4u16.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4u16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4u16 (void) --{ -- uint16_t *arg0_uint16_t; -- uint16x4x4_t arg1_uint16x4x4_t; -- -- vst4_u16 (arg0_uint16_t, arg1_uint16x4x4_t); --} -- --/* { dg-final { scan-assembler "vst4\.16\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4u32.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4u32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4u32 (void) --{ -- uint32_t *arg0_uint32_t; -- uint32x2x4_t arg1_uint32x2x4_t; -- -- vst4_u32 (arg0_uint32_t, arg1_uint32x2x4_t); --} -- --/* { dg-final { scan-assembler "vst4\.32\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4u64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4u64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4u64 (void) --{ -- uint64_t *arg0_uint64_t; -- uint64x1x4_t arg1_uint64x1x4_t; -- -- vst4_u64 (arg0_uint64_t, arg1_uint64x1x4_t); --} -- --/* { dg-final { scan-assembler "vst1\.64\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vst4u8.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vst4u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vst4u8 (void) --{ -- uint8_t *arg0_uint8_t; -- uint8x8x4_t arg1_uint8x8x4_t; -- -- vst4_u8 (arg0_uint8_t, arg1_uint8x8x4_t); --} -- --/* { dg-final { scan-assembler "vst4\.8\[ \]+\\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \\\[\[rR\]\[0-9\]+\(:\[0-9\]+\)?\\\]!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubQf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubQf32 (void) --{ -- float32x4_t out_float32x4_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- -- out_float32x4_t = vsubq_f32 (arg0_float32x4_t, arg1_float32x4_t); --} -- --/* { dg-final { scan-assembler "vsub\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubQs16 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8_t = vsubq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vsub\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubQs32 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4_t = vsubq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vsub\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubQs64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubQs64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubQs64 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_int64x2_t = vsubq_s64 (arg0_int64x2_t, arg1_int64x2_t); --} -- --/* { dg-final { scan-assembler "vsub\.i64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubQs8 (void) --{ -- int8x16_t out_int8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16_t = vsubq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vsub\.i8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vsubq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vsub\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vsubq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vsub\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubQu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubQu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubQu64 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint64x2_t arg1_uint64x2_t; -- -- out_uint64x2_t = vsubq_u64 (arg0_uint64x2_t, arg1_uint64x2_t); --} -- --/* { dg-final { scan-assembler "vsub\.i64\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vsubq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vsub\.i8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubf32 (void) --{ -- float32x2_t out_float32x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_float32x2_t = vsub_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vsub\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubhns16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubhns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubhns16 (void) --{ -- int8x8_t out_int8x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int8x8_t = vsubhn_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vsubhn\.i16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubhns32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubhns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubhns32 (void) --{ -- int16x4_t out_int16x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int16x4_t = vsubhn_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vsubhn\.i32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubhns64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubhns64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubhns64 (void) --{ -- int32x2_t out_int32x2_t; -- int64x2_t arg0_int64x2_t; -- int64x2_t arg1_int64x2_t; -- -- out_int32x2_t = vsubhn_s64 (arg0_int64x2_t, arg1_int64x2_t); --} -- --/* { dg-final { scan-assembler "vsubhn\.i64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubhnu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubhnu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubhnu16 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint8x8_t = vsubhn_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vsubhn\.i16\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubhnu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubhnu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubhnu32 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint16x4_t = vsubhn_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vsubhn\.i32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubhnu64.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubhnu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubhnu64 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint64x2_t arg1_uint64x2_t; -- -- out_uint32x2_t = vsubhn_u64 (arg0_uint64x2_t, arg1_uint64x2_t); --} -- --/* { dg-final { scan-assembler "vsubhn\.i64\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubls16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubls16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubls16 (void) --{ -- int32x4_t out_int32x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int32x4_t = vsubl_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vsubl\.s16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubls32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubls32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubls32 (void) --{ -- int64x2_t out_int64x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int64x2_t = vsubl_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vsubl\.s32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubls8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubls8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubls8 (void) --{ -- int16x8_t out_int16x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int16x8_t = vsubl_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vsubl\.s8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsublu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsublu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsublu16 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint32x4_t = vsubl_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vsubl\.u16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsublu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsublu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsublu32 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint64x2_t = vsubl_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vsubl\.u32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsublu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsublu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsublu8 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint16x8_t = vsubl_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vsubl\.u8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubs16 (void) --{ -- int16x4_t out_int16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4_t = vsub_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vsub\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubs32 (void) --{ -- int32x2_t out_int32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2_t = vsub_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vsub\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubs64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vsubs64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubs64 (void) --{ -- int64x1_t out_int64x1_t; -- int64x1_t arg0_int64x1_t; -- int64x1_t arg1_int64x1_t; -- -- out_int64x1_t = vsub_s64 (arg0_int64x1_t, arg1_int64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubs8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vsub_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vsub\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vsub_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vsub\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vsub_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vsub\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubu64.c -+++ b/src//dev/null -@@ -1,19 +0,0 @@ --/* Test the `vsubu64' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubu64 (void) --{ -- uint64x1_t out_uint64x1_t; -- uint64x1_t arg0_uint64x1_t; -- uint64x1_t arg1_uint64x1_t; -- -- out_uint64x1_t = vsub_u64 (arg0_uint64x1_t, arg1_uint64x1_t); --} -- ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vsub_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vsub\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubws16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubws16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubws16 (void) --{ -- int32x4_t out_int32x4_t; -- int32x4_t arg0_int32x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int32x4_t = vsubw_s16 (arg0_int32x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vsubw\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubws32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubws32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubws32 (void) --{ -- int64x2_t out_int64x2_t; -- int64x2_t arg0_int64x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int64x2_t = vsubw_s32 (arg0_int64x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vsubw\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubws8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubws8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubws8 (void) --{ -- int16x8_t out_int16x8_t; -- int16x8_t arg0_int16x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int16x8_t = vsubw_s8 (arg0_int16x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vsubw\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubwu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubwu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubwu16 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint32x4_t = vsubw_u16 (arg0_uint32x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vsubw\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubwu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubwu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubwu32 (void) --{ -- uint64x2_t out_uint64x2_t; -- uint64x2_t arg0_uint64x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint64x2_t = vsubw_u32 (arg0_uint64x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vsubw\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vsubwu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vsubwu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vsubwu8 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint16x8_t = vsubw_u8 (arg0_uint16x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vsubw\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbl1p8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtbl1p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbl1p8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8x8_t arg0_poly8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_poly8x8_t = vtbl1_p8 (arg0_poly8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vtbl\.8\[ \]+\[dD\]\[0-9\]+, ((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbl1s8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtbl1s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbl1s8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vtbl1_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vtbl\.8\[ \]+\[dD\]\[0-9\]+, ((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbl1u8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtbl1u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbl1u8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vtbl1_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vtbl\.8\[ \]+\[dD\]\[0-9\]+, ((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbl2p8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtbl2p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbl2p8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8x8x2_t arg0_poly8x8x2_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_poly8x8_t = vtbl2_p8 (arg0_poly8x8x2_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vtbl\.8\[ \]+\[dD\]\[0-9\]+, \\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbl2s8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtbl2s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbl2s8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8x2_t arg0_int8x8x2_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vtbl2_s8 (arg0_int8x8x2_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vtbl\.8\[ \]+\[dD\]\[0-9\]+, \\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbl2u8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtbl2u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbl2u8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8x2_t arg0_uint8x8x2_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vtbl2_u8 (arg0_uint8x8x2_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vtbl\.8\[ \]+\[dD\]\[0-9\]+, \\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbl3p8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtbl3p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbl3p8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8x8x3_t arg0_poly8x8x3_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_poly8x8_t = vtbl3_p8 (arg0_poly8x8x3_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vtbl\.8\[ \]+\[dD\]\[0-9\]+, \\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbl3s8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtbl3s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbl3s8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8x3_t arg0_int8x8x3_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vtbl3_s8 (arg0_int8x8x3_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vtbl\.8\[ \]+\[dD\]\[0-9\]+, \\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbl3u8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtbl3u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbl3u8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8x3_t arg0_uint8x8x3_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vtbl3_u8 (arg0_uint8x8x3_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vtbl\.8\[ \]+\[dD\]\[0-9\]+, \\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbl4p8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtbl4p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbl4p8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8x8x4_t arg0_poly8x8x4_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_poly8x8_t = vtbl4_p8 (arg0_poly8x8x4_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vtbl\.8\[ \]+\[dD\]\[0-9\]+, \\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbl4s8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtbl4s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbl4s8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8x4_t arg0_int8x8x4_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8_t = vtbl4_s8 (arg0_int8x8x4_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vtbl\.8\[ \]+\[dD\]\[0-9\]+, \\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbl4u8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtbl4u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbl4u8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8x4_t arg0_uint8x8x4_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vtbl4_u8 (arg0_uint8x8x4_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vtbl\.8\[ \]+\[dD\]\[0-9\]+, \\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbx1p8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vtbx1p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbx1p8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8x8_t arg0_poly8x8_t; -- poly8x8_t arg1_poly8x8_t; -- uint8x8_t arg2_uint8x8_t; -- -- out_poly8x8_t = vtbx1_p8 (arg0_poly8x8_t, arg1_poly8x8_t, arg2_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vtbx\.8\[ \]+\[dD\]\[0-9\]+, ((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbx1s8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vtbx1s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbx1s8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- int8x8_t arg2_int8x8_t; -- -- out_int8x8_t = vtbx1_s8 (arg0_int8x8_t, arg1_int8x8_t, arg2_int8x8_t); --} -- --/* { dg-final { scan-assembler "vtbx\.8\[ \]+\[dD\]\[0-9\]+, ((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbx1u8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vtbx1u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbx1u8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- uint8x8_t arg2_uint8x8_t; -- -- out_uint8x8_t = vtbx1_u8 (arg0_uint8x8_t, arg1_uint8x8_t, arg2_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vtbx\.8\[ \]+\[dD\]\[0-9\]+, ((\\\{\[dD\]\[0-9\]+\\\})|(\[dD\]\[0-9\]+)), \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbx2p8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vtbx2p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbx2p8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8x8_t arg0_poly8x8_t; -- poly8x8x2_t arg1_poly8x8x2_t; -- uint8x8_t arg2_uint8x8_t; -- -- out_poly8x8_t = vtbx2_p8 (arg0_poly8x8_t, arg1_poly8x8x2_t, arg2_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vtbx\.8\[ \]+\[dD\]\[0-9\]+, \\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbx2s8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vtbx2s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbx2s8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8x2_t arg1_int8x8x2_t; -- int8x8_t arg2_int8x8_t; -- -- out_int8x8_t = vtbx2_s8 (arg0_int8x8_t, arg1_int8x8x2_t, arg2_int8x8_t); --} -- --/* { dg-final { scan-assembler "vtbx\.8\[ \]+\[dD\]\[0-9\]+, \\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbx2u8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vtbx2u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbx2u8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8x2_t arg1_uint8x8x2_t; -- uint8x8_t arg2_uint8x8_t; -- -- out_uint8x8_t = vtbx2_u8 (arg0_uint8x8_t, arg1_uint8x8x2_t, arg2_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vtbx\.8\[ \]+\[dD\]\[0-9\]+, \\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbx3p8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vtbx3p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbx3p8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8x8_t arg0_poly8x8_t; -- poly8x8x3_t arg1_poly8x8x3_t; -- uint8x8_t arg2_uint8x8_t; -- -- out_poly8x8_t = vtbx3_p8 (arg0_poly8x8_t, arg1_poly8x8x3_t, arg2_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vtbx\.8\[ \]+\[dD\]\[0-9\]+, \\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbx3s8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vtbx3s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbx3s8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8x3_t arg1_int8x8x3_t; -- int8x8_t arg2_int8x8_t; -- -- out_int8x8_t = vtbx3_s8 (arg0_int8x8_t, arg1_int8x8x3_t, arg2_int8x8_t); --} -- --/* { dg-final { scan-assembler "vtbx\.8\[ \]+\[dD\]\[0-9\]+, \\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbx3u8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vtbx3u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbx3u8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8x3_t arg1_uint8x8x3_t; -- uint8x8_t arg2_uint8x8_t; -- -- out_uint8x8_t = vtbx3_u8 (arg0_uint8x8_t, arg1_uint8x8x3_t, arg2_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vtbx\.8\[ \]+\[dD\]\[0-9\]+, \\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbx4p8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vtbx4p8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbx4p8 (void) --{ -- poly8x8_t out_poly8x8_t; -- poly8x8_t arg0_poly8x8_t; -- poly8x8x4_t arg1_poly8x8x4_t; -- uint8x8_t arg2_uint8x8_t; -- -- out_poly8x8_t = vtbx4_p8 (arg0_poly8x8_t, arg1_poly8x8x4_t, arg2_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vtbx\.8\[ \]+\[dD\]\[0-9\]+, \\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbx4s8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vtbx4s8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbx4s8 (void) --{ -- int8x8_t out_int8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8x4_t arg1_int8x8x4_t; -- int8x8_t arg2_int8x8_t; -- -- out_int8x8_t = vtbx4_s8 (arg0_int8x8_t, arg1_int8x8x4_t, arg2_int8x8_t); --} -- --/* { dg-final { scan-assembler "vtbx\.8\[ \]+\[dD\]\[0-9\]+, \\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtbx4u8.c -+++ b/src//dev/null -@@ -1,21 +0,0 @@ --/* Test the `vtbx4u8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtbx4u8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8x4_t arg1_uint8x8x4_t; -- uint8x8_t arg2_uint8x8_t; -- -- out_uint8x8_t = vtbx4_u8 (arg0_uint8x8_t, arg1_uint8x8x4_t, arg2_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vtbx\.8\[ \]+\[dD\]\[0-9\]+, \\\{((\[dD\]\[0-9\]+-\[dD\]\[0-9\]+)|(\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+))\\\}, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtrnQf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtrnQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtrnQf32 (void) --{ -- float32x4x2_t out_float32x4x2_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- -- out_float32x4x2_t = vtrnq_f32 (arg0_float32x4_t, arg1_float32x4_t); --} -- --/* { dg-final { scan-assembler "vtrn\.32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtrnQp16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtrnQp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtrnQp16 (void) --{ -- poly16x8x2_t out_poly16x8x2_t; -- poly16x8_t arg0_poly16x8_t; -- poly16x8_t arg1_poly16x8_t; -- -- out_poly16x8x2_t = vtrnq_p16 (arg0_poly16x8_t, arg1_poly16x8_t); --} -- --/* { dg-final { scan-assembler "vtrn\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtrnQp8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtrnQp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtrnQp8 (void) --{ -- poly8x16x2_t out_poly8x16x2_t; -- poly8x16_t arg0_poly8x16_t; -- poly8x16_t arg1_poly8x16_t; -- -- out_poly8x16x2_t = vtrnq_p8 (arg0_poly8x16_t, arg1_poly8x16_t); --} -- --/* { dg-final { scan-assembler "vtrn\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtrnQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtrnQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtrnQs16 (void) --{ -- int16x8x2_t out_int16x8x2_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8x2_t = vtrnq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vtrn\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtrnQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtrnQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtrnQs32 (void) --{ -- int32x4x2_t out_int32x4x2_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4x2_t = vtrnq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vtrn\.32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtrnQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtrnQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtrnQs8 (void) --{ -- int8x16x2_t out_int8x16x2_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16x2_t = vtrnq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vtrn\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtrnQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtrnQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtrnQu16 (void) --{ -- uint16x8x2_t out_uint16x8x2_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8x2_t = vtrnq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vtrn\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtrnQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtrnQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtrnQu32 (void) --{ -- uint32x4x2_t out_uint32x4x2_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4x2_t = vtrnq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vtrn\.32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtrnQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtrnQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtrnQu8 (void) --{ -- uint8x16x2_t out_uint8x16x2_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16x2_t = vtrnq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vtrn\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtrnf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtrnf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtrnf32 (void) --{ -- float32x2x2_t out_float32x2x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_float32x2x2_t = vtrn_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vuzp\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtrnp16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtrnp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtrnp16 (void) --{ -- poly16x4x2_t out_poly16x4x2_t; -- poly16x4_t arg0_poly16x4_t; -- poly16x4_t arg1_poly16x4_t; -- -- out_poly16x4x2_t = vtrn_p16 (arg0_poly16x4_t, arg1_poly16x4_t); --} -- --/* { dg-final { scan-assembler "vtrn\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtrnp8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtrnp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtrnp8 (void) --{ -- poly8x8x2_t out_poly8x8x2_t; -- poly8x8_t arg0_poly8x8_t; -- poly8x8_t arg1_poly8x8_t; -- -- out_poly8x8x2_t = vtrn_p8 (arg0_poly8x8_t, arg1_poly8x8_t); --} -- --/* { dg-final { scan-assembler "vtrn\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtrns16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtrns16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtrns16 (void) --{ -- int16x4x2_t out_int16x4x2_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4x2_t = vtrn_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vtrn\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtrns32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtrns32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtrns32 (void) --{ -- int32x2x2_t out_int32x2x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2x2_t = vtrn_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vuzp\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtrns8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtrns8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtrns8 (void) --{ -- int8x8x2_t out_int8x8x2_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8x2_t = vtrn_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vtrn\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtrnu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtrnu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtrnu16 (void) --{ -- uint16x4x2_t out_uint16x4x2_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4x2_t = vtrn_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vtrn\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtrnu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtrnu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtrnu32 (void) --{ -- uint32x2x2_t out_uint32x2x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2x2_t = vtrn_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vuzp\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtrnu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtrnu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtrnu8 (void) --{ -- uint8x8x2_t out_uint8x8x2_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8x2_t = vtrn_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vtrn\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtstQp8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtstQp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtstQp8 (void) --{ -- uint8x16_t out_uint8x16_t; -- poly8x16_t arg0_poly8x16_t; -- poly8x16_t arg1_poly8x16_t; -- -- out_uint8x16_t = vtstq_p8 (arg0_poly8x16_t, arg1_poly8x16_t); --} -- --/* { dg-final { scan-assembler "vtst\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtstQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtstQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtstQs16 (void) --{ -- uint16x8_t out_uint16x8_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_uint16x8_t = vtstq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vtst\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtstQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtstQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtstQs32 (void) --{ -- uint32x4_t out_uint32x4_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_uint32x4_t = vtstq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vtst\.32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtstQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtstQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtstQs8 (void) --{ -- uint8x16_t out_uint8x16_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_uint8x16_t = vtstq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vtst\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtstQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtstQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtstQu16 (void) --{ -- uint16x8_t out_uint16x8_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8_t = vtstq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vtst\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtstQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtstQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtstQu32 (void) --{ -- uint32x4_t out_uint32x4_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4_t = vtstq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vtst\.32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtstQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtstQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtstQu8 (void) --{ -- uint8x16_t out_uint8x16_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16_t = vtstq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vtst\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtstp8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtstp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtstp8 (void) --{ -- uint8x8_t out_uint8x8_t; -- poly8x8_t arg0_poly8x8_t; -- poly8x8_t arg1_poly8x8_t; -- -- out_uint8x8_t = vtst_p8 (arg0_poly8x8_t, arg1_poly8x8_t); --} -- --/* { dg-final { scan-assembler "vtst\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtsts16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtsts16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtsts16 (void) --{ -- uint16x4_t out_uint16x4_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_uint16x4_t = vtst_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vtst\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtsts32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtsts32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtsts32 (void) --{ -- uint32x2_t out_uint32x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_uint32x2_t = vtst_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vtst\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtsts8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtsts8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtsts8 (void) --{ -- uint8x8_t out_uint8x8_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_uint8x8_t = vtst_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vtst\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtstu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtstu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtstu16 (void) --{ -- uint16x4_t out_uint16x4_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4_t = vtst_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vtst\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtstu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtstu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtstu32 (void) --{ -- uint32x2_t out_uint32x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2_t = vtst_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vtst\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vtstu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vtstu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vtstu8 (void) --{ -- uint8x8_t out_uint8x8_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8_t = vtst_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vtst\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vuzpQf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vuzpQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vuzpQf32 (void) --{ -- float32x4x2_t out_float32x4x2_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- -- out_float32x4x2_t = vuzpq_f32 (arg0_float32x4_t, arg1_float32x4_t); --} -- --/* { dg-final { scan-assembler "vuzp\.32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vuzpQp16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vuzpQp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vuzpQp16 (void) --{ -- poly16x8x2_t out_poly16x8x2_t; -- poly16x8_t arg0_poly16x8_t; -- poly16x8_t arg1_poly16x8_t; -- -- out_poly16x8x2_t = vuzpq_p16 (arg0_poly16x8_t, arg1_poly16x8_t); --} -- --/* { dg-final { scan-assembler "vuzp\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vuzpQp8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vuzpQp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vuzpQp8 (void) --{ -- poly8x16x2_t out_poly8x16x2_t; -- poly8x16_t arg0_poly8x16_t; -- poly8x16_t arg1_poly8x16_t; -- -- out_poly8x16x2_t = vuzpq_p8 (arg0_poly8x16_t, arg1_poly8x16_t); --} -- --/* { dg-final { scan-assembler "vuzp\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vuzpQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vuzpQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vuzpQs16 (void) --{ -- int16x8x2_t out_int16x8x2_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8x2_t = vuzpq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vuzp\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vuzpQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vuzpQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vuzpQs32 (void) --{ -- int32x4x2_t out_int32x4x2_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4x2_t = vuzpq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vuzp\.32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vuzpQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vuzpQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vuzpQs8 (void) --{ -- int8x16x2_t out_int8x16x2_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16x2_t = vuzpq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vuzp\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vuzpQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vuzpQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vuzpQu16 (void) --{ -- uint16x8x2_t out_uint16x8x2_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8x2_t = vuzpq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vuzp\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vuzpQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vuzpQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vuzpQu32 (void) --{ -- uint32x4x2_t out_uint32x4x2_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4x2_t = vuzpq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vuzp\.32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vuzpQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vuzpQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vuzpQu8 (void) --{ -- uint8x16x2_t out_uint8x16x2_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16x2_t = vuzpq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vuzp\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vuzpf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vuzpf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vuzpf32 (void) --{ -- float32x2x2_t out_float32x2x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_float32x2x2_t = vuzp_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vuzp\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vuzpp16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vuzpp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vuzpp16 (void) --{ -- poly16x4x2_t out_poly16x4x2_t; -- poly16x4_t arg0_poly16x4_t; -- poly16x4_t arg1_poly16x4_t; -- -- out_poly16x4x2_t = vuzp_p16 (arg0_poly16x4_t, arg1_poly16x4_t); --} -- --/* { dg-final { scan-assembler "vuzp\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vuzpp8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vuzpp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vuzpp8 (void) --{ -- poly8x8x2_t out_poly8x8x2_t; -- poly8x8_t arg0_poly8x8_t; -- poly8x8_t arg1_poly8x8_t; -- -- out_poly8x8x2_t = vuzp_p8 (arg0_poly8x8_t, arg1_poly8x8_t); --} -- --/* { dg-final { scan-assembler "vuzp\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vuzps16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vuzps16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vuzps16 (void) --{ -- int16x4x2_t out_int16x4x2_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4x2_t = vuzp_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vuzp\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vuzps32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vuzps32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vuzps32 (void) --{ -- int32x2x2_t out_int32x2x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2x2_t = vuzp_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vuzp\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vuzps8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vuzps8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vuzps8 (void) --{ -- int8x8x2_t out_int8x8x2_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8x2_t = vuzp_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vuzp\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vuzpu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vuzpu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vuzpu16 (void) --{ -- uint16x4x2_t out_uint16x4x2_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4x2_t = vuzp_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vuzp\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vuzpu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vuzpu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vuzpu32 (void) --{ -- uint32x2x2_t out_uint32x2x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2x2_t = vuzp_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vuzp\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vuzpu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vuzpu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vuzpu8 (void) --{ -- uint8x8x2_t out_uint8x8x2_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8x2_t = vuzp_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vuzp\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vzipQf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vzipQf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vzipQf32 (void) --{ -- float32x4x2_t out_float32x4x2_t; -- float32x4_t arg0_float32x4_t; -- float32x4_t arg1_float32x4_t; -- -- out_float32x4x2_t = vzipq_f32 (arg0_float32x4_t, arg1_float32x4_t); --} -- --/* { dg-final { scan-assembler "vzip\.32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vzipQp16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vzipQp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vzipQp16 (void) --{ -- poly16x8x2_t out_poly16x8x2_t; -- poly16x8_t arg0_poly16x8_t; -- poly16x8_t arg1_poly16x8_t; -- -- out_poly16x8x2_t = vzipq_p16 (arg0_poly16x8_t, arg1_poly16x8_t); --} -- --/* { dg-final { scan-assembler "vzip\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vzipQp8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vzipQp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vzipQp8 (void) --{ -- poly8x16x2_t out_poly8x16x2_t; -- poly8x16_t arg0_poly8x16_t; -- poly8x16_t arg1_poly8x16_t; -- -- out_poly8x16x2_t = vzipq_p8 (arg0_poly8x16_t, arg1_poly8x16_t); --} -- --/* { dg-final { scan-assembler "vzip\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vzipQs16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vzipQs16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vzipQs16 (void) --{ -- int16x8x2_t out_int16x8x2_t; -- int16x8_t arg0_int16x8_t; -- int16x8_t arg1_int16x8_t; -- -- out_int16x8x2_t = vzipq_s16 (arg0_int16x8_t, arg1_int16x8_t); --} -- --/* { dg-final { scan-assembler "vzip\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vzipQs32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vzipQs32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vzipQs32 (void) --{ -- int32x4x2_t out_int32x4x2_t; -- int32x4_t arg0_int32x4_t; -- int32x4_t arg1_int32x4_t; -- -- out_int32x4x2_t = vzipq_s32 (arg0_int32x4_t, arg1_int32x4_t); --} -- --/* { dg-final { scan-assembler "vzip\.32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vzipQs8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vzipQs8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vzipQs8 (void) --{ -- int8x16x2_t out_int8x16x2_t; -- int8x16_t arg0_int8x16_t; -- int8x16_t arg1_int8x16_t; -- -- out_int8x16x2_t = vzipq_s8 (arg0_int8x16_t, arg1_int8x16_t); --} -- --/* { dg-final { scan-assembler "vzip\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vzipQu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vzipQu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vzipQu16 (void) --{ -- uint16x8x2_t out_uint16x8x2_t; -- uint16x8_t arg0_uint16x8_t; -- uint16x8_t arg1_uint16x8_t; -- -- out_uint16x8x2_t = vzipq_u16 (arg0_uint16x8_t, arg1_uint16x8_t); --} -- --/* { dg-final { scan-assembler "vzip\.16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vzipQu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vzipQu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vzipQu32 (void) --{ -- uint32x4x2_t out_uint32x4x2_t; -- uint32x4_t arg0_uint32x4_t; -- uint32x4_t arg1_uint32x4_t; -- -- out_uint32x4x2_t = vzipq_u32 (arg0_uint32x4_t, arg1_uint32x4_t); --} -- --/* { dg-final { scan-assembler "vzip\.32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vzipQu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vzipQu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vzipQu8 (void) --{ -- uint8x16x2_t out_uint8x16x2_t; -- uint8x16_t arg0_uint8x16_t; -- uint8x16_t arg1_uint8x16_t; -- -- out_uint8x16x2_t = vzipq_u8 (arg0_uint8x16_t, arg1_uint8x16_t); --} -- --/* { dg-final { scan-assembler "vzip\.8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vzipf32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vzipf32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vzipf32 (void) --{ -- float32x2x2_t out_float32x2x2_t; -- float32x2_t arg0_float32x2_t; -- float32x2_t arg1_float32x2_t; -- -- out_float32x2x2_t = vzip_f32 (arg0_float32x2_t, arg1_float32x2_t); --} -- --/* { dg-final { scan-assembler "vuzp\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vzipp16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vzipp16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vzipp16 (void) --{ -- poly16x4x2_t out_poly16x4x2_t; -- poly16x4_t arg0_poly16x4_t; -- poly16x4_t arg1_poly16x4_t; -- -- out_poly16x4x2_t = vzip_p16 (arg0_poly16x4_t, arg1_poly16x4_t); --} -- --/* { dg-final { scan-assembler "vzip\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vzipp8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vzipp8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vzipp8 (void) --{ -- poly8x8x2_t out_poly8x8x2_t; -- poly8x8_t arg0_poly8x8_t; -- poly8x8_t arg1_poly8x8_t; -- -- out_poly8x8x2_t = vzip_p8 (arg0_poly8x8_t, arg1_poly8x8_t); --} -- --/* { dg-final { scan-assembler "vzip\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vzips16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vzips16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vzips16 (void) --{ -- int16x4x2_t out_int16x4x2_t; -- int16x4_t arg0_int16x4_t; -- int16x4_t arg1_int16x4_t; -- -- out_int16x4x2_t = vzip_s16 (arg0_int16x4_t, arg1_int16x4_t); --} -- --/* { dg-final { scan-assembler "vzip\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vzips32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vzips32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vzips32 (void) --{ -- int32x2x2_t out_int32x2x2_t; -- int32x2_t arg0_int32x2_t; -- int32x2_t arg1_int32x2_t; -- -- out_int32x2x2_t = vzip_s32 (arg0_int32x2_t, arg1_int32x2_t); --} -- --/* { dg-final { scan-assembler "vuzp\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vzips8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vzips8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vzips8 (void) --{ -- int8x8x2_t out_int8x8x2_t; -- int8x8_t arg0_int8x8_t; -- int8x8_t arg1_int8x8_t; -- -- out_int8x8x2_t = vzip_s8 (arg0_int8x8_t, arg1_int8x8_t); --} -- --/* { dg-final { scan-assembler "vzip\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vzipu16.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vzipu16' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vzipu16 (void) --{ -- uint16x4x2_t out_uint16x4x2_t; -- uint16x4_t arg0_uint16x4_t; -- uint16x4_t arg1_uint16x4_t; -- -- out_uint16x4x2_t = vzip_u16 (arg0_uint16x4_t, arg1_uint16x4_t); --} -- --/* { dg-final { scan-assembler "vzip\.16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vzipu32.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vzipu32' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vzipu32 (void) --{ -- uint32x2x2_t out_uint32x2x2_t; -- uint32x2_t arg0_uint32x2_t; -- uint32x2_t arg1_uint32x2_t; -- -- out_uint32x2x2_t = vzip_u32 (arg0_uint32x2_t, arg1_uint32x2_t); --} -- --/* { dg-final { scan-assembler "vuzp\.32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/neon/vzipu8.c -+++ b/src//dev/null -@@ -1,20 +0,0 @@ --/* Test the `vzipu8' ARM Neon intrinsic. */ --/* This file was autogenerated by neon-testgen. */ -- --/* { dg-do assemble } */ --/* { dg-require-effective-target arm_neon_ok } */ --/* { dg-options "-save-temps -O0" } */ --/* { dg-add-options arm_neon } */ -- --#include "arm_neon.h" -- --void test_vzipu8 (void) --{ -- uint8x8x2_t out_uint8x8x2_t; -- uint8x8_t arg0_uint8x8_t; -- uint8x8_t arg1_uint8x8_t; -- -- out_uint8x8x2_t = vzip_u8 (arg0_uint8x8_t, arg1_uint8x8_t); --} -- --/* { dg-final { scan-assembler "vzip\.8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/optional_thumb-1.c -@@ -0,0 +1,7 @@ -+/* { dg-do compile { target { ! default_mode } } } */ -+/* { dg-skip-if "-marm/-mthumb/-march/-mcpu given" { *-*-* } { "-marm" "-mthumb" "-march=*" "-mcpu=*" } } */ -+/* { dg-options "-march=armv6-m" } */ -+ -+/* Check that -mthumb is not needed when compiling for a Thumb-only target. */ -+ -+int foo; ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/optional_thumb-2.c -@@ -0,0 +1,7 @@ -+/* { dg-do compile { target { ! default_mode } } } */ -+/* { dg-skip-if "-marm/-mthumb/-march/-mcpu given" { *-*-* } { "-marm" "-mthumb" "-march=*" "-mcpu=*" } } */ -+/* { dg-options "-mcpu=cortex-m4" } */ -+ -+/* Check that -mthumb is not needed when compiling for a Thumb-only target. */ -+ -+int foo; ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/optional_thumb-3.c -@@ -0,0 +1,9 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_cortex_m } */ -+/* { dg-skip-if "-mthumb given" { *-*-* } { "-mthumb" } } */ -+/* { dg-options "-marm" } */ -+/* { dg-error "target CPU does not support ARM mode" "missing error with -marm on Thumb-only targets" { target *-*-* } 0 } */ -+ -+/* Check that -marm gives an error when compiling for a Thumb-only target. */ -+ -+int foo; ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/polytypes.c -@@ -0,0 +1,48 @@ -+/* Check that NEON polynomial vector types are suitably incompatible with -+ integer vector types of the same layout. */ -+ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-add-options arm_neon } */ -+ -+#include <arm_neon.h> -+ -+void s64_8 (int8x8_t a) {} -+void u64_8 (uint8x8_t a) {} -+void p64_8 (poly8x8_t a) {} -+void s64_16 (int16x4_t a) {} -+void u64_16 (uint16x4_t a) {} -+void p64_16 (poly16x4_t a) {} -+ -+void s128_8 (int8x16_t a) {} -+void u128_8 (uint8x16_t a) {} -+void p128_8 (poly8x16_t a) {} -+void s128_16 (int16x8_t a) {} -+void u128_16 (uint16x8_t a) {} -+void p128_16 (poly16x8_t a) {} -+ -+void foo () -+{ -+ poly8x8_t v64_8; -+ poly16x4_t v64_16; -+ poly8x16_t v128_8; -+ poly16x8_t v128_16; -+ -+ s64_8 (v64_8); /* { dg-message "use -flax-vector-conversions" } */ -+ /* { dg-error "incompatible type for argument 1 of 's64_8'" "" { target *-*-* } 31 } */ -+ u64_8 (v64_8); /* { dg-error "incompatible type for argument 1 of 'u64_8'" } */ -+ p64_8 (v64_8); -+ -+ s64_16 (v64_16); /* { dg-error "incompatible type for argument 1 of 's64_16'" } */ -+ u64_16 (v64_16); /* { dg-error "incompatible type for argument 1 of 'u64_16'" } */ -+ p64_16 (v64_16); -+ -+ s128_8 (v128_8); /* { dg-error "incompatible type for argument 1 of 's128_8'" } */ -+ u128_8 (v128_8); /* { dg-error "incompatible type for argument 1 of 'u128_8'" } */ -+ p128_8 (v128_8); -+ -+ s128_16 (v128_16); /* { dg-error "incompatible type for argument 1 of 's128_16'" } */ -+ u128_16 (v128_16); /* { dg-error "incompatible type for argument 1 of 'u128_16'" } */ -+ p128_16 (v128_16); -+} -+/* { dg-message "note: expected '\[^'\n\]*' but argument is of type '\[^'\n\]*'" "note: expected" { target *-*-* } 0 } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/pr37780_1.c -@@ -0,0 +1,48 @@ -+/* Test that we can remove the conditional move due to CLZ -+ being defined at zero. */ -+ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_arch_v6t2_ok } */ -+/* { dg-options "-O2" } */ -+/* { dg-add-options arm_arch_v6t2 } */ -+ -+int -+fooctz (int i) -+{ -+ return (i == 0) ? 32 : __builtin_ctz (i); -+} -+ -+int -+fooctz2 (int i) -+{ -+ return (i != 0) ? __builtin_ctz (i) : 32; -+} -+ -+unsigned int -+fooctz3 (unsigned int i) -+{ -+ return (i > 0) ? __builtin_ctz (i) : 32; -+} -+ -+/* { dg-final { scan-assembler-times "rbit\t*" 3 } } */ -+ -+int -+fooclz (int i) -+{ -+ return (i == 0) ? 32 : __builtin_clz (i); -+} -+ -+int -+fooclz2 (int i) -+{ -+ return (i != 0) ? __builtin_clz (i) : 32; -+} -+ -+unsigned int -+fooclz3 (unsigned int i) -+{ -+ return (i > 0) ? __builtin_clz (i) : 32; -+} -+ -+/* { dg-final { scan-assembler-times "clz\t" 6 } } */ -+/* { dg-final { scan-assembler-not "cmp\t.*0" } } */ ---- a/src/gcc/testsuite/gcc.target/arm/pr42574.c -+++ b/src/gcc/testsuite/gcc.target/arm/pr42574.c -@@ -1,5 +1,5 @@ -+/* { dg-do compile { target { arm_thumb1_ok && { ! arm_thumb1_movt_ok } } } } */ - /* { dg-options "-mthumb -Os -fpic" } */ --/* { dg-require-effective-target arm_thumb1_ok } */ - /* { dg-require-effective-target fpic } */ - /* Make sure the address of glob.c is calculated only once and using - a logical shift for the offset (200<<1). */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/pr51534.c -@@ -0,0 +1,83 @@ -+/* Test the vector comparison intrinsics when comparing to immediate zero. -+ */ -+ -+/* { dg-do assemble } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps -mfloat-abi=hard -O3" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include <arm_neon.h> -+ -+#define GEN_TEST(T, D, C, R) \ -+ R test_##C##_##T (T a) { return C (a, D (0)); } -+ -+#define GEN_DOUBLE_TESTS(S, T, C) \ -+ GEN_TEST (T, vdup_n_s##S, C##_s##S, u##T) \ -+ GEN_TEST (u##T, vdup_n_u##S, C##_u##S, u##T) -+ -+#define GEN_QUAD_TESTS(S, T, C) \ -+ GEN_TEST (T, vdupq_n_s##S, C##q_s##S, u##T) \ -+ GEN_TEST (u##T, vdupq_n_u##S, C##q_u##S, u##T) -+ -+#define GEN_COND_TESTS(C) \ -+ GEN_DOUBLE_TESTS (8, int8x8_t, C) \ -+ GEN_DOUBLE_TESTS (16, int16x4_t, C) \ -+ GEN_DOUBLE_TESTS (32, int32x2_t, C) \ -+ GEN_QUAD_TESTS (8, int8x16_t, C) \ -+ GEN_QUAD_TESTS (16, int16x8_t, C) \ -+ GEN_QUAD_TESTS (32, int32x4_t, C) -+ -+GEN_COND_TESTS(vcgt) -+GEN_COND_TESTS(vcge) -+GEN_COND_TESTS(vclt) -+GEN_COND_TESTS(vcle) -+GEN_COND_TESTS(vceq) -+ -+/* Scan for expected outputs. */ -+/* { dg-final { scan-assembler "vcgt\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler-times "vcgt\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" 2 } } */ -+/* { dg-final { scan-assembler "vcgt\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler-times "vcgt\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" 2 } } */ -+/* { dg-final { scan-assembler "vcgt\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler-times "vcgt\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" 2 } } */ -+/* { dg-final { scan-assembler "vcgt\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler-times "vcgt\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" 2 } } */ -+/* { dg-final { scan-assembler "vcgt\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler-times "vcgt\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" 2 } } */ -+/* { dg-final { scan-assembler "vcgt\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler-times "vcgt\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" 2 } } */ -+/* { dg-final { scan-assembler "vcge\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler-times "vcge\.u8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" 2 } } */ -+/* { dg-final { scan-assembler "vcge\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler-times "vcge\.u16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" 2 } } */ -+/* { dg-final { scan-assembler "vcge\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler-times "vcge\.u32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+" 2 } } */ -+/* { dg-final { scan-assembler "vcge\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler-times "vcge\.u8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" 2 } } */ -+/* { dg-final { scan-assembler "vcge\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler-times "vcge\.u16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" 2 } } */ -+/* { dg-final { scan-assembler "vcge\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler-times "vcge\.u32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+" 2 } } */ -+/* { dg-final { scan-assembler "vclt\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler "vclt\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler "vclt\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler "vclt\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler "vclt\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler "vclt\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler "vcle\.s8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler "vcle\.s16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler "vcle\.s32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler "vcle\.s8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler "vcle\.s16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler "vcle\.s32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" } } */ -+/* { dg-final { scan-assembler-times "vceq\.i8\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" 2 } } */ -+/* { dg-final { scan-assembler-times "vceq\.i16\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" 2 } } */ -+/* { dg-final { scan-assembler-times "vceq\.i32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #0" 2 } } */ -+/* { dg-final { scan-assembler-times "vceq\.i8\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" 2 } } */ -+/* { dg-final { scan-assembler-times "vceq\.i16\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" 2 } } */ -+/* { dg-final { scan-assembler-times "vceq\.i32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #0" 2 } } */ -+ -+/* And ensure we don't have unexpected output too. */ -+/* { dg-final { scan-assembler-not "vc\[gl\]\[te\]\.u\[0-9\]+\[ \]+\[qQdD\]\[0-9\]+, \[qQdD\]\[0-9\]+, #0" } } */ -+ -+/* Tidy up. */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/pr79145.c -@@ -0,0 +1,16 @@ -+/* { dg-do compile } */ -+/* { dg-skip-if "Test is specific to the iWMMXt" { arm*-*-* } { "-mcpu=*" } { "-mcpu=iwmmxt" } } */ -+/* { dg-skip-if "Test is specific to the iWMMXt" { arm*-*-* } { "-mabi=*" } { "-mabi=iwmmxt" } } */ -+/* { dg-skip-if "Test is specific to the iWMMXt" { arm*-*-* } { "-march=*" } { "-march=iwmmxt" } } */ -+/* { dg-skip-if "Test is specific to ARM mode" { arm*-*-* } { "-mthumb" } { "" } } */ -+/* { dg-require-effective-target arm32 } */ -+/* { dg-require-effective-target arm_iwmmxt_ok } */ -+/* { dg-options "-mcpu=iwmmxt" } */ -+ -+int -+main (void) -+{ -+ volatile long long t1; -+ t1 ^= 0x55; -+ return 0; -+} ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/short-vfp-1.c -@@ -0,0 +1,45 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_vfp_ok } -+/* { dg-options "-mfpu=vfp" } */ -+ -+int -+test_sisf (float x) -+{ -+ return (int)x; -+} -+ -+short -+test_hisf (float x) -+{ -+ return (short)x; -+} -+ -+float -+test_sfsi (int x) -+{ -+ return (float)x; -+} -+ -+float -+test_sfhi (short x) -+{ -+ return (float)x; -+} -+ -+short -+test_hisi (int x) -+{ -+ return (short)x; -+} -+ -+int -+test_sihi (short x) -+{ -+ return (int)x; -+} -+ -+/* {dg-final { scan-assembler-times {vcvt\.s32\.f32\ts[0-9]+,s[0-9]+} 2 }} */ -+/* {dg-final { scan-assembler-times {vcvt\.f32\.s32\ts[0-9]+,s[0-9]+} 2 }} */ -+/* {dg-final { scan-assembler-times {vmov\tr[0-9]+,s[0-9]+} 2 }} */ -+/* {dg-final { scan-assembler-times {vmov\ts[0-9]+,r[0-9]+} 2 }} */ -+/* {dg-final { scan-assembler-times {sxth\tr[0-9]+,r[0-9]+} 2 }} */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vmaxnm_f32_1.c -@@ -0,0 +1,159 @@ -+/* Test the `vmaxnmf32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_neon_hw } */ -+/* { dg-options "-save-temps -O3 -march=armv8-a" } */ -+/* { dg-add-options arm_v8_neon } */ -+ -+#include "arm_neon.h" -+ -+extern void abort (); -+ -+void __attribute__ ((noinline)) -+test_vmaxnm_f32__regular_input1 () -+{ -+ float32_t a1[] = {1,2}; -+ float32_t b1[] = {3,4}; -+ float32x2_t a = vld1_f32 (a1); -+ float32x2_t b = vld1_f32 (b1); -+ float32x2_t c = vmaxnm_f32 (a, b); -+ float32_t actual[2]; -+ vst1_f32 (actual, c); -+ -+ for (int i = 0; i < 2; ++i) -+ if (actual[i] != b1[i]) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vmaxnm_f32__regular_input2 () -+{ -+ float32_t a1[] = {3,2}; -+ float32_t b1[] = {1,4}; -+ float32_t e[] = {3,4}; -+ float32x2_t a = vld1_f32 (a1); -+ float32x2_t b = vld1_f32 (b1); -+ float32x2_t c = vmaxnm_f32 (a, b); -+ float32_t actual[2]; -+ vst1_f32 (actual, c); -+ -+ for (int i = 0; i < 2; ++i) -+ if (actual[i] != e[i]) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vmaxnm_f32__quiet_NaN_one_arg () -+{ -+ /* When given a quiet NaN, vmaxnm returns the other operand. -+ In this test case we have NaNs in only one operand. */ -+ float32_t n = __builtin_nanf (""); -+ float32_t a1[] = {1,2}; -+ float32_t b1[] = {n,n}; -+ float32_t e[] = {1,2}; -+ float32x2_t a = vld1_f32 (a1); -+ float32x2_t b = vld1_f32 (b1); -+ float32x2_t c = vmaxnm_f32 (a, b); -+ float32_t actual[2]; -+ vst1_f32 (actual, c); -+ -+ for (int i = 0; i < 2; ++i) -+ if (actual[i] != e[i]) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vmaxnm_f32__quiet_NaN_both_args () -+{ -+ /* When given a quiet NaN, vmaxnm returns the other operand. -+ In this test case we have NaNs in both operands. */ -+ float32_t n = __builtin_nanf (""); -+ float32_t a1[] = {n,2}; -+ float32_t b1[] = {1,n}; -+ float32_t e[] = {1,2}; -+ float32x2_t a = vld1_f32 (a1); -+ float32x2_t b = vld1_f32 (b1); -+ float32x2_t c = vmaxnm_f32 (a, b); -+ float32_t actual[2]; -+ vst1_f32 (actual, c); -+ -+ for (int i = 0; i < 2; ++i) -+ if (actual[i] != e[i]) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vmaxnm_f32__zero_both_args () -+{ -+ /* For 0 and -0, vmaxnm returns 0. Since 0 == -0, check sign bit. */ -+ float32_t a1[] = {0.0, 0.0}; -+ float32_t b1[] = {-0.0, -0.0}; -+ float32_t e[] = {0.0, 0.0}; -+ -+ float32x2_t a = vld1_f32 (a1); -+ float32x2_t b = vld1_f32 (b1); -+ float32x2_t c = vmaxnm_f32 (a, b); -+ -+ float32_t actual1[2]; -+ vst1_f32 (actual1, c); -+ -+ for (int i = 0; i < 2; ++i) -+ if (actual1[i] != e[i] || __builtin_signbit (actual1[i]) != 0) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vmaxnm_f32__inf_both_args () -+{ -+ /* The max of inf and inf is inf. The max of -inf and -inf is -inf. */ -+ float32_t inf = __builtin_huge_valf (); -+ float32_t a1[] = {inf, -inf}; -+ float32_t b1[] = {inf, -inf}; -+ float32_t e[] = {inf, -inf}; -+ -+ float32x2_t a = vld1_f32 (a1); -+ float32x2_t b = vld1_f32 (b1); -+ float32x2_t c = vmaxnm_f32 (a, b); -+ -+ float32_t actual1[2]; -+ vst1_f32 (actual1, c); -+ -+ for (int i = 0; i < 2; ++i) -+ if (actual1[i] != e[i]) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vmaxnm_f32__two_quiet_NaNs_both_args () -+{ -+ /* When given 2 NaNs, return a NaN. Since a NaN is not equal to anything, -+ not even another NaN, use __builtin_isnan () to check. */ -+ float32_t n = __builtin_nanf (""); -+ float32_t a1[] = {n,n}; -+ float32_t b1[] = {n,n}; -+ float32_t e[] = {n,n}; -+ float32x2_t a = vld1_f32 (a1); -+ float32x2_t b = vld1_f32 (b1); -+ float32x2_t c = vmaxnm_f32 (a, b); -+ float32_t actual[2]; -+ vst1_f32 (actual, c); -+ -+ for (int i = 0; i < 2; ++i) -+ if (!__builtin_isnan (actual[i])) -+ abort (); -+} -+ -+int -+main () -+{ -+ test_vmaxnm_f32__regular_input1 (); -+ test_vmaxnm_f32__regular_input2 (); -+ test_vmaxnm_f32__quiet_NaN_one_arg (); -+ test_vmaxnm_f32__quiet_NaN_both_args (); -+ test_vmaxnm_f32__zero_both_args (); -+ test_vmaxnm_f32__inf_both_args (); -+ test_vmaxnm_f32__two_quiet_NaNs_both_args (); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler-times "vmaxnm\.f32\t\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 7 } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vmaxnmq_f32_1.c -@@ -0,0 +1,160 @@ -+/* Test the `vmaxnmqf32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_neon_hw } */ -+/* { dg-options "-save-temps -O3 -march=armv8-a" } */ -+/* { dg-add-options arm_v8_neon } */ -+ -+#include "arm_neon.h" -+ -+extern void abort (); -+ -+void __attribute__ ((noinline)) -+test_vmaxnmq_f32__regular_input1 () -+{ -+ float32_t a1[] = {1,2,5,6}; -+ float32_t b1[] = {3,4,7,8}; -+ float32x4_t a = vld1q_f32 (a1); -+ float32x4_t b = vld1q_f32 (b1); -+ float32x4_t c = vmaxnmq_f32 (a, b); -+ float32_t actual[4]; -+ vst1q_f32 (actual, c); -+ -+ for (int i = 0; i < 4; ++i) -+ if (actual[i] != b1[i]) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vmaxnmq_f32__regular_input2 () -+{ -+ float32_t a1[] = {3,2,7,6}; -+ float32_t b1[] = {1,4,5,8}; -+ float32_t e[] = {3,4,7,8}; -+ float32x4_t a = vld1q_f32 (a1); -+ float32x4_t b = vld1q_f32 (b1); -+ float32x4_t c = vmaxnmq_f32 (a, b); -+ float32_t actual[4]; -+ vst1q_f32 (actual, c); -+ -+ for (int i = 0; i < 4; ++i) -+ if (actual[i] != e[i]) -+ abort (); -+} -+ -+ -+void __attribute__ ((noinline)) -+test_vmaxnmq_f32__quiet_NaN_one_arg () -+{ -+ /* When given a quiet NaN, vmaxnmq returns the other operand. -+ In this test case we have NaNs in only one operand. */ -+ float32_t n = __builtin_nanf (""); -+ float32_t a1[] = {1,2,3,4}; -+ float32_t b1[] = {n,n,n,n}; -+ float32_t e[] = {1,2,3,4}; -+ float32x4_t a = vld1q_f32 (a1); -+ float32x4_t b = vld1q_f32 (b1); -+ float32x4_t c = vmaxnmq_f32 (a, b); -+ float32_t actual[4]; -+ vst1q_f32 (actual, c); -+ -+ for (int i = 0; i < 4; ++i) -+ if (actual[i] != e[i]) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vmaxnmq_f32__quiet_NaN_both_args () -+{ -+ /* When given a quiet NaN, vmaxnmq returns the other operand. -+ In this test case we have NaNs in both operands. */ -+ float32_t n = __builtin_nanf (""); -+ float32_t a1[] = {n,2,n,4}; -+ float32_t b1[] = {1,n,3,n}; -+ float32_t e[] = {1,2,3,4}; -+ float32x4_t a = vld1q_f32 (a1); -+ float32x4_t b = vld1q_f32 (b1); -+ float32x4_t c = vmaxnmq_f32 (a, b); -+ float32_t actual[4]; -+ vst1q_f32 (actual, c); -+ -+ for (int i = 0; i < 4; ++i) -+ if (actual[i] != e[i]) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vmaxnmq_f32__zero_both_args () -+{ -+ /* For 0 and -0, vmaxnmq returns 0. Since 0 == -0, check sign bit. */ -+ float32_t a1[] = {0.0, 0.0, -0.0, -0.0}; -+ float32_t b1[] = {-0.0, -0.0, 0.0, 0.0}; -+ float32_t e[] = {0.0, 0.0, 0.0, 0.0}; -+ -+ float32x4_t a = vld1q_f32 (a1); -+ float32x4_t b = vld1q_f32 (b1); -+ float32x4_t c = vmaxnmq_f32 (a, b); -+ -+ float32_t actual1[4]; -+ vst1q_f32 (actual1, c); -+ -+ for (int i = 0; i < 4; ++i) -+ if (actual1[i] != e[i] || __builtin_signbit (actual1[i]) != 0) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vmaxnmq_f32__inf_both_args () -+{ -+ /* The max of inf and inf is inf. The max of -inf and -inf is -inf. */ -+ float32_t inf = __builtin_huge_valf (); -+ float32_t a1[] = {inf, -inf, inf, inf}; -+ float32_t b1[] = {inf, -inf, -inf, -inf}; -+ float32_t e[] = {inf, -inf, inf, inf}; -+ -+ float32x4_t a = vld1q_f32 (a1); -+ float32x4_t b = vld1q_f32 (b1); -+ float32x4_t c = vmaxnmq_f32 (a, b); -+ -+ float32_t actual1[4]; -+ vst1q_f32 (actual1, c); -+ -+ for (int i = 0; i < 4; ++i) -+ if (actual1[i] != e[i]) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vmaxnmq_f32__two_quiet_NaNs_both_args () -+{ -+ /* When given 2 NaNs, return a NaN. Since a NaN is not equal to anything, -+ not even another NaN, use __builtin_isnan () to check. */ -+ float32_t n = __builtin_nanf (""); -+ float32_t a1[] = {n,n,n,n}; -+ float32_t b1[] = {n,n,n,n}; -+ float32_t e[] = {n,n}; -+ float32x4_t a = vld1q_f32 (a1); -+ float32x4_t b = vld1q_f32 (b1); -+ float32x4_t c = vmaxnmq_f32 (a, b); -+ float32_t actual[4]; -+ vst1q_f32 (actual, c); -+ -+ for (int i = 0; i < 4; ++i) -+ if (!__builtin_isnan (actual[i])) -+ abort (); -+} -+ -+int -+main () -+{ -+ test_vmaxnmq_f32__regular_input1 (); -+ test_vmaxnmq_f32__regular_input2 (); -+ test_vmaxnmq_f32__quiet_NaN_one_arg (); -+ test_vmaxnmq_f32__quiet_NaN_both_args (); -+ test_vmaxnmq_f32__zero_both_args (); -+ test_vmaxnmq_f32__inf_both_args (); -+ test_vmaxnmq_f32__two_quiet_NaNs_both_args (); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler-times "vmaxnm\.f32\t\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+\n" 7 } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vminnm_f32_1.c -@@ -0,0 +1,159 @@ -+/* Test the `vminnmf32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_neon_hw } */ -+/* { dg-options "-save-temps -O3 -march=armv8-a" } */ -+/* { dg-add-options arm_v8_neon } */ -+ -+#include "arm_neon.h" -+ -+extern void abort (); -+ -+void __attribute__ ((noinline)) -+test_vminnm_f32__regular_input1 () -+{ -+ float32_t a1[] = {1,2}; -+ float32_t b1[] = {3,4}; -+ float32x2_t a = vld1_f32 (a1); -+ float32x2_t b = vld1_f32 (b1); -+ float32x2_t c = vminnm_f32 (a, b); -+ float32_t actual[2]; -+ vst1_f32 (actual, c); -+ -+ for (int i = 0; i < 2; ++i) -+ if (actual[i] != a1[i]) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vminnm_f32__regular_input2 () -+{ -+ float32_t a1[] = {3,2}; -+ float32_t b1[] = {1,4}; -+ float32_t e[] = {1,2}; -+ float32x2_t a = vld1_f32 (a1); -+ float32x2_t b = vld1_f32 (b1); -+ float32x2_t c = vminnm_f32 (a, b); -+ float32_t actual[2]; -+ vst1_f32 (actual, c); -+ -+ for (int i = 0; i < 2; ++i) -+ if (actual[i] != e[i]) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vminnm_f32__quiet_NaN_one_arg () -+{ -+ /* When given a quiet NaN, vminnm returns the other operand. -+ In this test case we have NaNs in only one operand. */ -+ float32_t n = __builtin_nanf (""); -+ float32_t a1[] = {1,2}; -+ float32_t b1[] = {n,n}; -+ float32_t e[] = {1,2}; -+ float32x2_t a = vld1_f32 (a1); -+ float32x2_t b = vld1_f32 (b1); -+ float32x2_t c = vminnm_f32 (a, b); -+ float32_t actual[2]; -+ vst1_f32 (actual, c); -+ -+ for (int i = 0; i < 2; ++i) -+ if (actual[i] != e[i]) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vminnm_f32__quiet_NaN_both_args () -+{ -+ /* When given a quiet NaN, vminnm returns the other operand. -+ In this test case we have NaNs in both operands. */ -+ float32_t n = __builtin_nanf (""); -+ float32_t a1[] = {n,2}; -+ float32_t b1[] = {1,n}; -+ float32_t e[] = {1,2}; -+ float32x2_t a = vld1_f32 (a1); -+ float32x2_t b = vld1_f32 (b1); -+ float32x2_t c = vminnm_f32 (a, b); -+ float32_t actual[2]; -+ vst1_f32 (actual, c); -+ -+ for (int i = 0; i < 2; ++i) -+ if (actual[i] != e[i]) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vminnm_f32__zero_both_args () -+{ -+ /* For 0 and -0, vminnm returns -0. Since 0 == -0, check sign bit. */ -+ float32_t a1[] = {0.0,0.0}; -+ float32_t b1[] = {-0.0, -0.0}; -+ float32_t e[] = {-0.0, -0.0}; -+ -+ float32x2_t a = vld1_f32 (a1); -+ float32x2_t b = vld1_f32 (b1); -+ float32x2_t c = vminnm_f32 (a, b); -+ -+ float32_t actual1[2]; -+ vst1_f32 (actual1, c); -+ -+ for (int i = 0; i < 2; ++i) -+ if (actual1[i] != e[i] || __builtin_signbit (actual1[i]) == 0) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vminnm_f32__inf_both_args () -+{ -+ /* The min of inf and inf is inf. The min of -inf and -inf is -inf. */ -+ float32_t inf = __builtin_huge_valf (); -+ float32_t a1[] = {inf, -inf}; -+ float32_t b1[] = {inf, -inf}; -+ float32_t e[] = {inf, -inf}; -+ -+ float32x2_t a = vld1_f32 (a1); -+ float32x2_t b = vld1_f32 (b1); -+ float32x2_t c = vminnm_f32 (a, b); -+ -+ float32_t actual1[2]; -+ vst1_f32 (actual1, c); -+ -+ for (int i = 0; i < 2; ++i) -+ if (actual1[i] != e[i]) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vminnm_f32__two_quiet_NaNs_both_args () -+{ -+ /* When given 2 NaNs, return a NaN. Since a NaN is not equal to anything, -+ not even another NaN, use __builtin_isnan () to check. */ -+ float32_t n = __builtin_nanf (""); -+ float32_t a1[] = {n,n}; -+ float32_t b1[] = {n,n}; -+ float32_t e[] = {n,n}; -+ float32x2_t a = vld1_f32 (a1); -+ float32x2_t b = vld1_f32 (b1); -+ float32x2_t c = vminnm_f32 (a, b); -+ float32_t actual[2]; -+ vst1_f32 (actual, c); -+ -+ for (int i = 0; i < 2; ++i) -+ if (!__builtin_isnan (actual[i])) -+ abort (); -+} -+ -+int -+main () -+{ -+ test_vminnm_f32__regular_input1 (); -+ test_vminnm_f32__regular_input2 (); -+ test_vminnm_f32__quiet_NaN_one_arg (); -+ test_vminnm_f32__quiet_NaN_both_args (); -+ test_vminnm_f32__zero_both_args (); -+ test_vminnm_f32__inf_both_args (); -+ test_vminnm_f32__two_quiet_NaNs_both_args (); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler-times "vminnm\.f32\t\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 7 } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/simd/vminnmq_f32_1.c -@@ -0,0 +1,159 @@ -+/* Test the `vminnmqf32' ARM Neon intrinsic. */ -+ -+/* { dg-do run } */ -+/* { dg-require-effective-target arm_v8_neon_hw } */ -+/* { dg-options "-save-temps -O3 -march=armv8-a" } */ -+/* { dg-add-options arm_v8_neon } */ -+ -+#include "arm_neon.h" -+ -+extern void abort (); -+ -+void __attribute__ ((noinline)) -+test_vminnmq_f32__regular_input1 () -+{ -+ float32_t a1[] = {1,2,5,6}; -+ float32_t b1[] = {3,4,7,8}; -+ float32x4_t a = vld1q_f32 (a1); -+ float32x4_t b = vld1q_f32 (b1); -+ float32x4_t c = vminnmq_f32 (a, b); -+ float32_t actual[4]; -+ vst1q_f32 (actual, c); -+ -+ for (int i = 0; i < 4; ++i) -+ if (actual[i] != a1[i]) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vminnmq_f32__regular_input2 () -+{ -+ float32_t a1[] = {3,2,7,6}; -+ float32_t b1[] = {1,4,5,8}; -+ float32_t e[] = {1,2,5,6}; -+ float32x4_t a = vld1q_f32 (a1); -+ float32x4_t b = vld1q_f32 (b1); -+ float32x4_t c = vminnmq_f32 (a, b); -+ float32_t actual[4]; -+ vst1q_f32 (actual, c); -+ -+ for (int i = 0; i < 4; ++i) -+ if (actual[i] != e[i]) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vminnmq_f32__quiet_NaN_one_arg () -+{ -+ /* When given a quiet NaN, vminnmq returns the other operand. -+ In this test case we have NaNs in only one operand. */ -+ float32_t n = __builtin_nanf (""); -+ float32_t a1[] = {1,2,3,4}; -+ float32_t b1[] = {n,n,n,n}; -+ float32_t e[] = {1,2,3,4}; -+ float32x4_t a = vld1q_f32 (a1); -+ float32x4_t b = vld1q_f32 (b1); -+ float32x4_t c = vminnmq_f32 (a, b); -+ float32_t actual[4]; -+ vst1q_f32 (actual, c); -+ -+ for (int i = 0; i < 4; ++i) -+ if (actual[i] != e[i]) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vminnmq_f32__quiet_NaN_both_args () -+{ -+ /* When given a quiet NaN, vminnmq returns the other operand. -+ In this test case we have NaNs in both operands. */ -+ float32_t n = __builtin_nanf (""); -+ float32_t a1[] = {n,2,n,4}; -+ float32_t b1[] = {1,n,3,n}; -+ float32_t e[] = {1,2,3,4}; -+ float32x4_t a = vld1q_f32 (a1); -+ float32x4_t b = vld1q_f32 (b1); -+ float32x4_t c = vminnmq_f32 (a, b); -+ float32_t actual[4]; -+ vst1q_f32 (actual, c); -+ -+ for (int i = 0; i < 4; ++i) -+ if (actual[i] != e[i]) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vminnmq_f32__zero_both_args () -+{ -+ /* For 0 and -0, vminnmq returns -0. Since 0 == -0, check sign bit. */ -+ float32_t a1[] = {0.0, 0.0, -0.0, -0.0}; -+ float32_t b1[] = {-0.0, -0.0, 0.0, 0.0}; -+ float32_t e[] = {-0.0, -0.0, -0.0, -0.0}; -+ -+ float32x4_t a = vld1q_f32 (a1); -+ float32x4_t b = vld1q_f32 (b1); -+ float32x4_t c = vminnmq_f32 (a, b); -+ -+ float32_t actual1[4]; -+ vst1q_f32 (actual1, c); -+ -+ for (int i = 0; i < 4; ++i) -+ if (actual1[i] != e[i] || __builtin_signbit (actual1[i]) == 0) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vminnmq_f32__inf_both_args () -+{ -+ /* The min of inf and inf is inf. The min of -inf and -inf is -inf. */ -+ float32_t inf = __builtin_huge_valf (); -+ float32_t a1[] = {inf, -inf, inf, inf}; -+ float32_t b1[] = {inf, -inf, -inf, -inf}; -+ float32_t e[] = {inf, -inf, -inf, -inf}; -+ -+ float32x4_t a = vld1q_f32 (a1); -+ float32x4_t b = vld1q_f32 (b1); -+ float32x4_t c = vminnmq_f32 (a, b); -+ -+ float32_t actual1[4]; -+ vst1q_f32 (actual1, c); -+ -+ for (int i = 0; i < 4; ++i) -+ if (actual1[i] != e[i]) -+ abort (); -+} -+ -+void __attribute__ ((noinline)) -+test_vminnmq_f32__two_quiet_NaNs_both_args () -+{ -+ /* When given 2 NaNs, return a NaN. Since a NaN is not equal to anything, -+ not even another NaN, use __builtin_isnan () to check. */ -+ float32_t n = __builtin_nanf (""); -+ float32_t a1[] = {n,n,n,n}; -+ float32_t b1[] = {n,n,n,n}; -+ float32_t e[] = {n,n}; -+ float32x4_t a = vld1q_f32 (a1); -+ float32x4_t b = vld1q_f32 (b1); -+ float32x4_t c = vminnmq_f32 (a, b); -+ float32_t actual[4]; -+ vst1q_f32 (actual, c); -+ -+ for (int i = 0; i < 4; ++i) -+ if (!__builtin_isnan (actual[i])) -+ abort (); -+} -+ -+int -+main () -+{ -+ test_vminnmq_f32__regular_input1 (); -+ test_vminnmq_f32__regular_input2 (); -+ test_vminnmq_f32__quiet_NaN_one_arg (); -+ test_vminnmq_f32__quiet_NaN_both_args (); -+ test_vminnmq_f32__zero_both_args (); -+ test_vminnmq_f32__inf_both_args (); -+ test_vminnmq_f32__two_quiet_NaNs_both_args (); -+ return 0; -+} -+ -+/* { dg-final { scan-assembler-times "vminnm\.f32\t\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+\n" 7 } } */ ---- a/src/gcc/testsuite/gcc.target/arm/unsigned-extend-2.c -+++ b/src/gcc/testsuite/gcc.target/arm/unsigned-extend-2.c -@@ -2,13 +2,13 @@ - /* { dg-require-effective-target arm_thumb2_ok } */ - /* { dg-options "-O" } */ - --unsigned short foo (unsigned short x) -+unsigned short foo (unsigned short x, unsigned short c) - { - unsigned char i = 0; - for (i = 0; i < 8; i++) - { - x >>= 1; -- x &= 0x7fff; -+ x &= c; - } - return x; - } ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/vect-vcvt.c -@@ -0,0 +1,27 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -mvectorize-with-neon-double" } */ -+/* { dg-add-options arm_neon } */ -+ -+#define N 32 -+ -+int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; -+float fa[N]; -+int ia[N]; -+ -+int convert() -+{ -+ int i; -+ -+ /* int -> float */ -+ for (i = 0; i < N; i++) -+ fa[i] = (float) ib[i]; -+ -+ /* float -> int */ -+ for (i = 0; i < N; i++) -+ ia[i] = (int) fa[i]; -+ -+ return 0; -+} -+ -+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/vect-vcvtq.c -@@ -0,0 +1,27 @@ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */ -+/* { dg-add-options arm_neon } */ -+ -+#define N 32 -+ -+int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45}; -+float fa[N]; -+int ia[N]; -+ -+int convert() -+{ -+ int i; -+ -+ /* int -> float */ -+ for (i = 0; i < N; i++) -+ fa[i] = (float) ib[i]; -+ -+ /* float -> int */ -+ for (i = 0; i < N; i++) -+ ia[i] = (int) fa[i]; -+ -+ return 0; -+} -+ -+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/vfp-shift-a2t2.c -@@ -0,0 +1,27 @@ -+/* Check that NEON vector shifts support immediate values == size. /* -+ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-options "-save-temps" } */ -+/* { dg-add-options arm_neon } */ -+ -+#include <arm_neon.h> -+ -+uint16x8_t test_vshll_n_u8 (uint8x8_t a) -+{ -+ return vshll_n_u8(a, 8); -+} -+ -+uint32x4_t test_vshll_n_u16 (uint16x4_t a) -+{ -+ return vshll_n_u16(a, 16); -+} -+ -+uint64x2_t test_vshll_n_u32 (uint32x2_t a) -+{ -+ return vshll_n_u32(a, 32); -+} -+ -+/* { dg-final { scan-assembler "vshll\.u16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vshll\.u32\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ -+/* { dg-final { scan-assembler "vshll\.u8\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */ ---- /dev/null -+++ b/src/gcc/testsuite/gcc.target/arm/vst1Q_laneu64-1.c -@@ -0,0 +1,25 @@ -+/* Test the `vst1Q_laneu64' ARM Neon intrinsic. */ -+ -+/* Detect ICE in the case of unaligned memory address. */ -+ -+/* { dg-do compile } */ -+/* { dg-require-effective-target arm_neon_ok } */ -+/* { dg-add-options arm_neon } */ -+ -+#include "arm_neon.h" -+ -+unsigned char dummy_store[1000]; -+ -+void -+foo (unsigned char* addr) -+{ -+ uint8x16_t vdata = vld1q_u8 (addr); -+ vst1q_lane_u64 ((uint64_t*) &dummy_store, vreinterpretq_u64_u8 (vdata), 0); -+} -+ -+uint64_t -+bar (uint64x2_t vdata) -+{ -+ vdata = vld1q_lane_u64 ((uint64_t*) &dummy_store, vdata, 0); -+ return vgetq_lane_u64 (vdata, 0); -+} ---- a/src/gcc/testsuite/lib/gcc-dg.exp -+++ b/src/gcc/testsuite/lib/gcc-dg.exp -@@ -403,6 +403,7 @@ if { [info procs ${tool}_load] != [list] \ - switch [lindex $result 0] { - "pass" { set status "fail" } - "fail" { set status "pass" } -+ default { set status [lindex $result 0] } - } - set result [list $status [lindex $result 1]] - } ---- a/src/gcc/testsuite/lib/target-supports.exp -+++ b/src/gcc/testsuite/lib/target-supports.exp -@@ -252,6 +252,20 @@ proc check_runtime {prop args} { - }] - } - -+# Return 1 if GCC was configured with $pattern. -+proc check_configured_with { pattern } { -+ global tool -+ -+ set gcc_output [${tool}_target_compile "-v" "" "none" ""] -+ if { [ regexp "Configured with: \[^\n\]*$pattern" $gcc_output ] } { -+ verbose "Matched: $pattern" 2 -+ return 1 -+ } -+ -+ verbose "Failed to match: $pattern" 2 -+ return 0 -+} -+ - ############################### - # proc check_weak_available { } - ############################### -@@ -2936,6 +2950,28 @@ proc add_options_for_arm_v8_1a_neon { flags } { - return "$flags $et_arm_v8_1a_neon_flags -march=armv8.1-a" - } - -+# Add the options needed for ARMv8.2 with the scalar FP16 extension. -+# Also adds the ARMv8 FP options for ARM and for AArch64. -+ -+proc add_options_for_arm_v8_2a_fp16_scalar { flags } { -+ if { ! [check_effective_target_arm_v8_2a_fp16_scalar_ok] } { -+ return "$flags" -+ } -+ global et_arm_v8_2a_fp16_scalar_flags -+ return "$flags $et_arm_v8_2a_fp16_scalar_flags" -+} -+ -+# Add the options needed for ARMv8.2 with the FP16 extension. Also adds -+# the ARMv8 NEON options for ARM and for AArch64. -+ -+proc add_options_for_arm_v8_2a_fp16_neon { flags } { -+ if { ! [check_effective_target_arm_v8_2a_fp16_neon_ok] } { -+ return "$flags" -+ } -+ global et_arm_v8_2a_fp16_neon_flags -+ return "$flags $et_arm_v8_2a_fp16_neon_flags" -+} -+ - proc add_options_for_arm_crc { flags } { - if { ! [check_effective_target_arm_crc_ok] } { - return "$flags" -@@ -3022,23 +3058,25 @@ proc check_effective_target_arm_crc_ok { } { - - proc check_effective_target_arm_neon_fp16_ok_nocache { } { - global et_arm_neon_fp16_flags -+ global et_arm_neon_flags - set et_arm_neon_fp16_flags "" -- if { [check_effective_target_arm32] } { -+ if { [check_effective_target_arm32] -+ && [check_effective_target_arm_neon_ok] } { - foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp16" - "-mfpu=neon-fp16 -mfloat-abi=softfp" - "-mfp16-format=ieee" - "-mfloat-abi=softfp -mfp16-format=ieee" - "-mfpu=neon-fp16 -mfp16-format=ieee" - "-mfpu=neon-fp16 -mfloat-abi=softfp -mfp16-format=ieee"} { -- if { [check_no_compiler_messages_nocache arm_neon_fp_16_ok object { -+ if { [check_no_compiler_messages_nocache arm_neon_fp16_ok object { - #include "arm_neon.h" - float16x4_t - foo (float32x4_t arg) - { - return vcvt_f16_f32 (arg); - } -- } "$flags"] } { -- set et_arm_neon_fp16_flags $flags -+ } "$et_arm_neon_flags $flags"] } { -+ set et_arm_neon_fp16_flags [concat $et_arm_neon_flags $flags] - return 1 - } - } -@@ -3075,6 +3113,65 @@ proc add_options_for_arm_neon_fp16 { flags } { - return "$flags $et_arm_neon_fp16_flags" - } - -+# Return 1 if this is an ARM target supporting the FP16 alternative -+# format. Some multilibs may be incompatible with the options needed. Also -+# set et_arm_neon_fp16_flags to the best options to add. -+ -+proc check_effective_target_arm_fp16_alternative_ok_nocache { } { -+ global et_arm_neon_fp16_flags -+ set et_arm_neon_fp16_flags "" -+ if { [check_effective_target_arm32] } { -+ foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp16" -+ "-mfpu=neon-fp16 -mfloat-abi=softfp"} { -+ if { [check_no_compiler_messages_nocache \ -+ arm_fp16_alternative_ok object { -+ #if !defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ #error __ARM_FP16_FORMAT_ALTERNATIVE not defined -+ #endif -+ } "$flags -mfp16-format=alternative"] } { -+ set et_arm_neon_fp16_flags "$flags -mfp16-format=alternative" -+ return 1 -+ } -+ } -+ } -+ -+ return 0 -+} -+ -+proc check_effective_target_arm_fp16_alternative_ok { } { -+ return [check_cached_effective_target arm_fp16_alternative_ok \ -+ check_effective_target_arm_fp16_alternative_ok_nocache] -+} -+ -+# Return 1 if this is an ARM target supports specifying the FP16 none -+# format. Some multilibs may be incompatible with the options needed. -+ -+proc check_effective_target_arm_fp16_none_ok_nocache { } { -+ if { [check_effective_target_arm32] } { -+ foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp16" -+ "-mfpu=neon-fp16 -mfloat-abi=softfp"} { -+ if { [check_no_compiler_messages_nocache \ -+ arm_fp16_none_ok object { -+ #if defined (__ARM_FP16_FORMAT_ALTERNATIVE) -+ #error __ARM_FP16_FORMAT_ALTERNATIVE defined -+ #endif -+ #if defined (__ARM_FP16_FORMAT_IEEE) -+ #error __ARM_FP16_FORMAT_IEEE defined -+ #endif -+ } "$flags -mfp16-format=none"] } { -+ return 1 -+ } -+ } -+ } -+ -+ return 0 -+} -+ -+proc check_effective_target_arm_fp16_none_ok { } { -+ return [check_cached_effective_target arm_fp16_none_ok \ -+ check_effective_target_arm_fp16_none_ok_nocache] -+} -+ - # Return 1 if this is an ARM target supporting -mfpu=neon-fp-armv8 - # -mfloat-abi=softfp or equivalent options. Some multilibs may be - # incompatible with these options. Also set et_arm_v8_neon_flags to the -@@ -3117,8 +3214,10 @@ proc check_effective_target_arm_v8_neon_ok { } { - - proc check_effective_target_arm_neonv2_ok_nocache { } { - global et_arm_neonv2_flags -+ global et_arm_neon_flags - set et_arm_neonv2_flags "" -- if { [check_effective_target_arm32] } { -+ if { [check_effective_target_arm32] -+ && [check_effective_target_arm_neon_ok] } { - foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-vfpv4" "-mfpu=neon-vfpv4 -mfloat-abi=softfp"} { - if { [check_no_compiler_messages_nocache arm_neonv2_ok object { - #include "arm_neon.h" -@@ -3127,8 +3226,8 @@ proc check_effective_target_arm_neonv2_ok_nocache { } { - { - return vfma_f32 (a, b, c); - } -- } "$flags"] } { -- set et_arm_neonv2_flags $flags -+ } "$et_arm_neon_flags $flags"] } { -+ set et_arm_neonv2_flags [concat $et_arm_neon_flags $flags] - return 1 - } - } -@@ -3142,9 +3241,9 @@ proc check_effective_target_arm_neonv2_ok { } { - check_effective_target_arm_neonv2_ok_nocache] - } - --# Add the options needed for NEON. We need either -mfloat-abi=softfp --# or -mfloat-abi=hard, but if one is already specified by the --# multilib, use it. -+# Add the options needed for VFP FP16 support. We need either -+# -mfloat-abi=softfp or -mfloat-abi=hard. If one is already specified by -+# the multilib, use it. - - proc add_options_for_arm_fp16 { flags } { - if { ! [check_effective_target_arm_fp16_ok] } { -@@ -3154,9 +3253,32 @@ proc add_options_for_arm_fp16 { flags } { - return "$flags $et_arm_fp16_flags" - } - -+# Add the options needed to enable support for IEEE format -+# half-precision support. This is valid for ARM targets. -+ -+proc add_options_for_arm_fp16_ieee { flags } { -+ if { ! [check_effective_target_arm_fp16_ok] } { -+ return "$flags" -+ } -+ global et_arm_fp16_flags -+ return "$flags $et_arm_fp16_flags -mfp16-format=ieee" -+} -+ -+# Add the options needed to enable support for ARM Alternative format -+# half-precision support. This is valid for ARM targets. -+ -+proc add_options_for_arm_fp16_alternative { flags } { -+ if { ! [check_effective_target_arm_fp16_ok] } { -+ return "$flags" -+ } -+ global et_arm_fp16_flags -+ return "$flags $et_arm_fp16_flags -mfp16-format=alternative" -+} -+ - # Return 1 if this is an ARM target that can support a VFP fp16 variant. - # Skip multilibs that are incompatible with these options and set --# et_arm_fp16_flags to the best options to add. -+# et_arm_fp16_flags to the best options to add. This test is valid for -+# ARM only. - - proc check_effective_target_arm_fp16_ok_nocache { } { - global et_arm_fp16_flags -@@ -3164,7 +3286,10 @@ proc check_effective_target_arm_fp16_ok_nocache { } { - if { ! [check_effective_target_arm32] } { - return 0; - } -- if [check-flags [list "" { *-*-* } { "-mfpu=*" } { "-mfpu=*fp16*" "-mfpu=*fpv[4-9]*" "-mfpu=*fpv[1-9][0-9]*" } ]] { -+ if [check-flags \ -+ [list "" { *-*-* } { "-mfpu=*" } \ -+ { "-mfpu=*fp16*" "-mfpu=*fpv[4-9]*" \ -+ "-mfpu=*fpv[1-9][0-9]*" "-mfpu=*fp-armv8*" } ]] { - # Multilib flags would override -mfpu. - return 0 - } -@@ -3200,6 +3325,28 @@ proc check_effective_target_arm_fp16_ok { } { - check_effective_target_arm_fp16_ok_nocache] - } - -+# Return 1 if the target supports executing VFP FP16 instructions, 0 -+# otherwise. This test is valid for ARM only. -+ -+proc check_effective_target_arm_fp16_hw { } { -+ if {! [check_effective_target_arm_fp16_ok] } { -+ return 0 -+ } -+ global et_arm_fp16_flags -+ check_runtime_nocache arm_fp16_hw { -+ int -+ main (int argc, char **argv) -+ { -+ __fp16 a = 1.0; -+ float r; -+ asm ("vcvtb.f32.f16 %0, %1" -+ : "=w" (r) : "w" (a) -+ : /* No clobbers. */); -+ return (r == 1.0) ? 0 : 1; -+ } -+ } "$et_arm_fp16_flags -mfp16-format=ieee" -+} -+ - # Creates a series of routines that return 1 if the given architecture - # can be selected and a routine to give the flags to select that architecture - # Note: Extra flags may be added to disable options from newer compilers -@@ -3209,22 +3356,26 @@ proc check_effective_target_arm_fp16_ok { } { - # Usage: /* { dg-require-effective-target arm_arch_v5_ok } */ - # /* { dg-add-options arm_arch_v5 } */ - # /* { dg-require-effective-target arm_arch_v5_multilib } */ --foreach { armfunc armflag armdef } { v4 "-march=armv4 -marm" __ARM_ARCH_4__ -- v4t "-march=armv4t" __ARM_ARCH_4T__ -- v5 "-march=armv5 -marm" __ARM_ARCH_5__ -- v5t "-march=armv5t" __ARM_ARCH_5T__ -- v5te "-march=armv5te" __ARM_ARCH_5TE__ -- v6 "-march=armv6" __ARM_ARCH_6__ -- v6k "-march=armv6k" __ARM_ARCH_6K__ -- v6t2 "-march=armv6t2" __ARM_ARCH_6T2__ -- v6z "-march=armv6z" __ARM_ARCH_6Z__ -- v6m "-march=armv6-m -mthumb" __ARM_ARCH_6M__ -- v7a "-march=armv7-a" __ARM_ARCH_7A__ -- v7r "-march=armv7-r" __ARM_ARCH_7R__ -- v7m "-march=armv7-m -mthumb" __ARM_ARCH_7M__ -- v7em "-march=armv7e-m -mthumb" __ARM_ARCH_7EM__ -- v8a "-march=armv8-a" __ARM_ARCH_8A__ -- v8_1a "-march=armv8.1a" __ARM_ARCH_8A__ } { -+foreach { armfunc armflag armdef } { -+ v4 "-march=armv4 -marm" __ARM_ARCH_4__ -+ v4t "-march=armv4t" __ARM_ARCH_4T__ -+ v5 "-march=armv5 -marm" __ARM_ARCH_5__ -+ v5t "-march=armv5t" __ARM_ARCH_5T__ -+ v5te "-march=armv5te" __ARM_ARCH_5TE__ -+ v6 "-march=armv6" __ARM_ARCH_6__ -+ v6k "-march=armv6k" __ARM_ARCH_6K__ -+ v6t2 "-march=armv6t2" __ARM_ARCH_6T2__ -+ v6z "-march=armv6z" __ARM_ARCH_6Z__ -+ v6m "-march=armv6-m -mthumb -mfloat-abi=soft" __ARM_ARCH_6M__ -+ v7a "-march=armv7-a" __ARM_ARCH_7A__ -+ v7r "-march=armv7-r" __ARM_ARCH_7R__ -+ v7m "-march=armv7-m -mthumb" __ARM_ARCH_7M__ -+ v7em "-march=armv7e-m -mthumb" __ARM_ARCH_7EM__ -+ v8a "-march=armv8-a" __ARM_ARCH_8A__ -+ v8_1a "-march=armv8.1a" __ARM_ARCH_8A__ -+ v8_2a "-march=armv8.2a" __ARM_ARCH_8A__ -+ v8m_base "-march=armv8-m.base -mthumb -mfloat-abi=soft" __ARM_ARCH_8M_BASE__ -+ v8m_main "-march=armv8-m.main -mthumb" __ARM_ARCH_8M_MAIN__ } { - eval [string map [list FUNC $armfunc FLAG $armflag DEF $armdef ] { - proc check_effective_target_arm_arch_FUNC_ok { } { - if { [ string match "*-marm*" "FLAG" ] && -@@ -3274,6 +3425,12 @@ proc add_options_for_arm_arch_v7ve { flags } { - return "$flags -march=armv7ve" - } - -+# Return 1 if GCC was configured with --with-mode= -+proc check_effective_target_default_mode { } { -+ -+ return [check_configured_with "with-mode="] -+} -+ - # Return 1 if this is an ARM target where -marm causes ARM to be - # used (not Thumb) - -@@ -3352,15 +3509,60 @@ proc check_effective_target_arm_cortex_m { } { - return 0 - } - return [check_no_compiler_messages arm_cortex_m assembly { -- #if !defined(__ARM_ARCH_7M__) \ -- && !defined (__ARM_ARCH_7EM__) \ -- && !defined (__ARM_ARCH_6M__) -- #error !__ARM_ARCH_7M__ && !__ARM_ARCH_7EM__ && !__ARM_ARCH_6M__ -+ #if defined(__ARM_ARCH_ISA_ARM) -+ #error __ARM_ARCH_ISA_ARM is defined - #endif - int i; - } "-mthumb"] - } - -+# Return 1 if this is an ARM target where -mthumb causes Thumb-1 to be -+# used and MOVT/MOVW instructions to be available. -+ -+proc check_effective_target_arm_thumb1_movt_ok {} { -+ if [check_effective_target_arm_thumb1_ok] { -+ return [check_no_compiler_messages arm_movt object { -+ int -+ foo (void) -+ { -+ asm ("movt r0, #42"); -+ } -+ } "-mthumb"] -+ } else { -+ return 0 -+ } -+} -+ -+# Return 1 if this is an ARM target where -mthumb causes Thumb-1 to be -+# used and CBZ and CBNZ instructions are available. -+ -+proc check_effective_target_arm_thumb1_cbz_ok {} { -+ if [check_effective_target_arm_thumb1_ok] { -+ return [check_no_compiler_messages arm_movt object { -+ int -+ foo (void) -+ { -+ asm ("cbz r0, 2f\n2:"); -+ } -+ } "-mthumb"] -+ } else { -+ return 0 -+ } -+} -+ -+# Return 1 if this is an ARM target where ARMv8-M Security Extensions is -+# available. -+ -+proc check_effective_target_arm_cmse_ok {} { -+ return [check_no_compiler_messages arm_cmse object { -+ int -+ foo (void) -+ { -+ asm ("bxns r0"); -+ } -+ } "-mcmse"]; -+} -+ - # Return 1 if this compilation turns on string_ops_prefer_neon on. - - proc check_effective_target_arm_tune_string_ops_prefer_neon { } { -@@ -3436,6 +3638,76 @@ proc check_effective_target_arm_v8_1a_neon_ok { } { - check_effective_target_arm_v8_1a_neon_ok_nocache] - } - -+# Return 1 if the target supports ARMv8.2 scalar FP16 arithmetic -+# instructions, 0 otherwise. The test is valid for ARM and for AArch64. -+# Record the command line options needed. -+ -+proc check_effective_target_arm_v8_2a_fp16_scalar_ok_nocache { } { -+ global et_arm_v8_2a_fp16_scalar_flags -+ set et_arm_v8_2a_fp16_scalar_flags "" -+ -+ if { ![istarget arm*-*-*] && ![istarget aarch64*-*-*] } { -+ return 0; -+ } -+ -+ # Iterate through sets of options to find the compiler flags that -+ # need to be added to the -march option. -+ foreach flags {"" "-mfpu=fp-armv8" "-mfloat-abi=softfp" \ -+ "-mfpu=fp-armv8 -mfloat-abi=softfp"} { -+ if { [check_no_compiler_messages_nocache \ -+ arm_v8_2a_fp16_scalar_ok object { -+ #if !defined (__ARM_FEATURE_FP16_SCALAR_ARITHMETIC) -+ #error "__ARM_FEATURE_FP16_SCALAR_ARITHMETIC not defined" -+ #endif -+ } "$flags -march=armv8.2-a+fp16"] } { -+ set et_arm_v8_2a_fp16_scalar_flags "$flags -march=armv8.2-a+fp16" -+ return 1 -+ } -+ } -+ -+ return 0; -+} -+ -+proc check_effective_target_arm_v8_2a_fp16_scalar_ok { } { -+ return [check_cached_effective_target arm_v8_2a_fp16_scalar_ok \ -+ check_effective_target_arm_v8_2a_fp16_scalar_ok_nocache] -+} -+ -+# Return 1 if the target supports ARMv8.2 Adv.SIMD FP16 arithmetic -+# instructions, 0 otherwise. The test is valid for ARM and for AArch64. -+# Record the command line options needed. -+ -+proc check_effective_target_arm_v8_2a_fp16_neon_ok_nocache { } { -+ global et_arm_v8_2a_fp16_neon_flags -+ set et_arm_v8_2a_fp16_neon_flags "" -+ -+ if { ![istarget arm*-*-*] && ![istarget aarch64*-*-*] } { -+ return 0; -+ } -+ -+ # Iterate through sets of options to find the compiler flags that -+ # need to be added to the -march option. -+ foreach flags {"" "-mfpu=neon-fp-armv8" "-mfloat-abi=softfp" \ -+ "-mfpu=neon-fp-armv8 -mfloat-abi=softfp"} { -+ if { [check_no_compiler_messages_nocache \ -+ arm_v8_2a_fp16_neon_ok object { -+ #if !defined (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) -+ #error "__ARM_FEATURE_FP16_VECTOR_ARITHMETIC not defined" -+ #endif -+ } "$flags -march=armv8.2-a+fp16"] } { -+ set et_arm_v8_2a_fp16_neon_flags "$flags -march=armv8.2-a+fp16" -+ return 1 -+ } -+ } -+ -+ return 0; -+} -+ -+proc check_effective_target_arm_v8_2a_fp16_neon_ok { } { -+ return [check_cached_effective_target arm_v8_2a_fp16_neon_ok \ -+ check_effective_target_arm_v8_2a_fp16_neon_ok_nocache] -+} -+ - # Return 1 if the target supports executing ARMv8 NEON instructions, 0 - # otherwise. - -@@ -3445,11 +3717,17 @@ proc check_effective_target_arm_v8_neon_hw { } { - int - main (void) - { -- float32x2_t a; -+ float32x2_t a = { 1.0f, 2.0f }; -+ #ifdef __ARM_ARCH_ISA_A64 -+ asm ("frinta %0.2s, %1.2s" -+ : "=w" (a) -+ : "w" (a)); -+ #else - asm ("vrinta.f32 %P0, %P1" - : "=w" (a) - : "0" (a)); -- return 0; -+ #endif -+ return a[0] == 2.0f; - } - } [add_options_for_arm_v8_neon ""]] - } -@@ -3492,6 +3770,81 @@ proc check_effective_target_arm_v8_1a_neon_hw { } { - } [add_options_for_arm_v8_1a_neon ""]] - } - -+# Return 1 if the target supports executing floating point instructions from -+# ARMv8.2 with the FP16 extension, 0 otherwise. The test is valid for ARM and -+# for AArch64. -+ -+proc check_effective_target_arm_v8_2a_fp16_scalar_hw { } { -+ if { ![check_effective_target_arm_v8_2a_fp16_scalar_ok] } { -+ return 0; -+ } -+ return [check_runtime arm_v8_2a_fp16_scalar_hw_available { -+ int -+ main (void) -+ { -+ __fp16 a = 1.0; -+ __fp16 result; -+ -+ #ifdef __ARM_ARCH_ISA_A64 -+ -+ asm ("fabs %h0, %h1" -+ : "=w"(result) -+ : "w"(a) -+ : /* No clobbers. */); -+ -+ #else -+ -+ asm ("vabs.f16 %0, %1" -+ : "=w"(result) -+ : "w"(a) -+ : /* No clobbers. */); -+ -+ #endif -+ -+ return (result == 1.0) ? 0 : 1; -+ } -+ } [add_options_for_arm_v8_2a_fp16_scalar ""]] -+} -+ -+# Return 1 if the target supports executing Adv.SIMD instructions from ARMv8.2 -+# with the FP16 extension, 0 otherwise. The test is valid for ARM and for -+# AArch64. -+ -+proc check_effective_target_arm_v8_2a_fp16_neon_hw { } { -+ if { ![check_effective_target_arm_v8_2a_fp16_neon_ok] } { -+ return 0; -+ } -+ return [check_runtime arm_v8_2a_fp16_neon_hw_available { -+ int -+ main (void) -+ { -+ #ifdef __ARM_ARCH_ISA_A64 -+ -+ __Float16x4_t a = {1.0, -1.0, 1.0, -1.0}; -+ __Float16x4_t result; -+ -+ asm ("fabs %0.4h, %1.4h" -+ : "=w"(result) -+ : "w"(a) -+ : /* No clobbers. */); -+ -+ #else -+ -+ __simd64_float16_t a = {1.0, -1.0, 1.0, -1.0}; -+ __simd64_float16_t result; -+ -+ asm ("vabs.f16 %P0, %P1" -+ : "=w"(result) -+ : "w"(a) -+ : /* No clobbers. */); -+ -+ #endif -+ -+ return (result[0] == 1.0) ? 0 : 1; -+ } -+ } [add_options_for_arm_v8_2a_fp16_neon ""]] -+} -+ - # Return 1 if this is a ARM target with NEON enabled. - - proc check_effective_target_arm_neon { } { -@@ -3526,6 +3879,25 @@ proc check_effective_target_arm_neonv2 { } { - } - } - -+# Return 1 if this is an ARM target with load acquire and store release -+# instructions for 8-, 16- and 32-bit types. -+ -+proc check_effective_target_arm_acq_rel { } { -+ return [check_no_compiler_messages arm_acq_rel object { -+ void -+ load_acquire_store_release (void) -+ { -+ asm ("lda r0, [r1]\n\t" -+ "stl r0, [r1]\n\t" -+ "ldah r0, [r1]\n\t" -+ "stlh r0, [r1]\n\t" -+ "ldab r0, [r1]\n\t" -+ "stlb r0, [r1]" -+ : : : "r0", "memory"); -+ } -+ }] -+} -+ - # Return 1 if this a Loongson-2E or -2F target using an ABI that supports - # the Loongson vector modes. - -@@ -4380,6 +4752,8 @@ proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } { - set et_vect_widen_sum_hi_to_si_pattern_saved 0 - if { [istarget powerpc*-*-*] - || [istarget aarch64*-*-*] -+ || ([istarget arm*-*-*] && -+ [check_effective_target_arm_neon_ok]) - || [istarget ia64-*-*] } { - set et_vect_widen_sum_hi_to_si_pattern_saved 1 - } -@@ -5755,6 +6129,8 @@ proc check_effective_target_sync_int_long { } { - || [istarget aarch64*-*-*] - || [istarget alpha*-*-*] - || [istarget arm*-*-linux-*] -+ || ([istarget arm*-*-*] -+ && [check_effective_target_arm_acq_rel]) - || [istarget bfin*-*linux*] - || [istarget hppa*-*linux*] - || [istarget s390*-*-*] -@@ -5788,6 +6164,8 @@ proc check_effective_target_sync_char_short { } { - || [istarget i?86-*-*] || [istarget x86_64-*-*] - || [istarget alpha*-*-*] - || [istarget arm*-*-linux-*] -+ || ([istarget arm*-*-*] -+ && [check_effective_target_arm_acq_rel]) - || [istarget hppa*-*linux*] - || [istarget s390*-*-*] - || [istarget powerpc*-*-*] ---- a/src/gcc/tree-inline.c -+++ b/src/gcc/tree-inline.c -@@ -244,6 +244,7 @@ remap_ssa_name (tree name, copy_body_data *id) - /* At least IPA points-to info can be directly transferred. */ - if (id->src_cfun->gimple_df - && id->src_cfun->gimple_df->ipa_pta -+ && POINTER_TYPE_P (TREE_TYPE (name)) - && (pi = SSA_NAME_PTR_INFO (name)) - && !pi->pt.anything) - { -@@ -276,6 +277,7 @@ remap_ssa_name (tree name, copy_body_data *id) - /* At least IPA points-to info can be directly transferred. */ - if (id->src_cfun->gimple_df - && id->src_cfun->gimple_df->ipa_pta -+ && POINTER_TYPE_P (TREE_TYPE (name)) - && (pi = SSA_NAME_PTR_INFO (name)) - && !pi->pt.anything) - { ---- a/src/gcc/tree-scalar-evolution.c -+++ b/src/gcc/tree-scalar-evolution.c -@@ -1937,6 +1937,36 @@ interpret_rhs_expr (struct loop *loop, gimple *at_stmt, - res = chrec_convert (type, chrec1, at_stmt); - break; - -+ case BIT_AND_EXPR: -+ /* Given int variable A, handle A&0xffff as (int)(unsigned short)A. -+ If A is SCEV and its value is in the range of representable set -+ of type unsigned short, the result expression is a (no-overflow) -+ SCEV. */ -+ res = chrec_dont_know; -+ if (tree_fits_uhwi_p (rhs2)) -+ { -+ int precision; -+ unsigned HOST_WIDE_INT val = tree_to_uhwi (rhs2); -+ -+ val ++; -+ /* Skip if value of rhs2 wraps in unsigned HOST_WIDE_INT or -+ it's not the maximum value of a smaller type than rhs1. */ -+ if (val != 0 -+ && (precision = exact_log2 (val)) > 0 -+ && (unsigned) precision < TYPE_PRECISION (TREE_TYPE (rhs1))) -+ { -+ tree utype = build_nonstandard_integer_type (precision, 1); -+ -+ if (TYPE_PRECISION (utype) < TYPE_PRECISION (TREE_TYPE (rhs1))) -+ { -+ chrec1 = analyze_scalar_evolution (loop, rhs1); -+ chrec1 = chrec_convert (utype, chrec1, at_stmt); -+ res = chrec_convert (TREE_TYPE (rhs1), chrec1, at_stmt); -+ } -+ } -+ } -+ break; -+ - default: - res = chrec_dont_know; - break; ---- a/src/gcc/tree-ssa-address.c -+++ b/src/gcc/tree-ssa-address.c -@@ -877,6 +877,10 @@ copy_ref_info (tree new_ref, tree old_ref) - && TREE_CODE (old_ref) == MEM_REF - && !(TREE_CODE (new_ref) == TARGET_MEM_REF - && (TMR_INDEX2 (new_ref) -+ /* TODO: Below conditions can be relaxed if TMR_INDEX -+ is an indcution variable and its initial value and -+ step are aligned. */ -+ || (TMR_INDEX (new_ref) && !TMR_STEP (new_ref)) - || (TMR_STEP (new_ref) - && (TREE_INT_CST_LOW (TMR_STEP (new_ref)) - < align))))) ---- a/src/gcc/tree-ssa-ccp.c -+++ b/src/gcc/tree-ssa-ccp.c -@@ -229,13 +229,12 @@ debug_lattice_value (ccp_prop_value_t val) - fprintf (stderr, "\n"); - } - --/* Extend NONZERO_BITS to a full mask, with the upper bits being set. */ -+/* Extend NONZERO_BITS to a full mask, based on sgn. */ - - static widest_int --extend_mask (const wide_int &nonzero_bits) -+extend_mask (const wide_int &nonzero_bits, signop sgn) - { -- return (wi::mask <widest_int> (wi::get_precision (nonzero_bits), true) -- | widest_int::from (nonzero_bits, UNSIGNED)); -+ return widest_int::from (nonzero_bits, sgn); - } - - /* Compute a default value for variable VAR and store it in the -@@ -284,7 +283,7 @@ get_default_value (tree var) - { - val.lattice_val = CONSTANT; - val.value = build_zero_cst (TREE_TYPE (var)); -- val.mask = extend_mask (nonzero_bits); -+ val.mask = extend_mask (nonzero_bits, TYPE_SIGN (TREE_TYPE (var))); - } - } - } -@@ -1939,7 +1938,7 @@ evaluate_stmt (gimple *stmt) - { - val.lattice_val = CONSTANT; - val.value = build_zero_cst (TREE_TYPE (lhs)); -- val.mask = extend_mask (nonzero_bits); -+ val.mask = extend_mask (nonzero_bits, TYPE_SIGN (TREE_TYPE (lhs))); - is_constant = true; - } - else -@@ -1950,7 +1949,8 @@ evaluate_stmt (gimple *stmt) - if (nonzero_bits == 0) - val.mask = 0; - else -- val.mask = val.mask & extend_mask (nonzero_bits); -+ val.mask = val.mask & extend_mask (nonzero_bits, -+ TYPE_SIGN (TREE_TYPE (lhs))); - } - } - } ---- a/src/gcc/tree-ssa-strlen.c -+++ b/src/gcc/tree-ssa-strlen.c -@@ -2263,7 +2263,7 @@ public: - }; - - /* Callback for walk_dominator_tree. Attempt to optimize various -- string ops by remembering string lenths pointed by pointer SSA_NAMEs. */ -+ string ops by remembering string lengths pointed by pointer SSA_NAMEs. */ - - edge - strlen_dom_walker::before_dom_children (basic_block bb) ---- a/src/gcc/tree-vect-data-refs.c -+++ b/src/gcc/tree-vect-data-refs.c -@@ -2250,6 +2250,7 @@ vect_analyze_group_access_1 (struct data_reference *dr) - { - GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt; - GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize; -+ GROUP_GAP (stmt_info) = groupsize - 1; - if (dump_enabled_p ()) - { - dump_printf_loc (MSG_NOTE, vect_location, ---- a/src/gcc/tree-vect-loop-manip.c -+++ b/src/gcc/tree-vect-loop-manip.c -@@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see - #include "cfgloop.h" - #include "tree-scalar-evolution.h" - #include "tree-vectorizer.h" -+#include "tree-ssa-loop-ivopts.h" - - /************************************************************************* - Simple Loop Peeling Utilities -@@ -1594,10 +1595,26 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo) - } - - /* FORNOW: We do not transform initial conditions of IVs -+ which evolution functions are not invariants in the loop. */ -+ -+ if (!expr_invariant_in_loop_p (loop, evolution_part)) -+ { -+ if (dump_enabled_p ()) -+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, -+ "evolution not invariant in loop.\n"); -+ return false; -+ } -+ -+ /* FORNOW: We do not transform initial conditions of IVs - which evolution functions are a polynomial of degree >= 2. */ - - if (tree_is_chrec (evolution_part)) -- return false; -+ { -+ if (dump_enabled_p ()) -+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, -+ "evolution is chrec.\n"); -+ return false; -+ } - } - - return true; ---- a/src/gcc/tree-vect-patterns.c -+++ b/src/gcc/tree-vect-patterns.c -@@ -2136,32 +2136,313 @@ vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts, - return pattern_stmt; - } - --/* Detect multiplication by constant which are postive or negatives of power 2, -- and convert them to shift patterns. -+/* Return true iff the target has a vector optab implementing the operation -+ CODE on type VECTYPE. */ - -- Mult with constants that are postive power of two. -- type a_t; -- type b_t -- S1: b_t = a_t * n -+static bool -+target_has_vecop_for_code (tree_code code, tree vectype) -+{ -+ optab voptab = optab_for_tree_code (code, vectype, optab_vector); -+ return voptab -+ && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing; -+} - -- or -+/* Verify that the target has optabs of VECTYPE to perform all the steps -+ needed by the multiplication-by-immediate synthesis algorithm described by -+ ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is -+ present. Return true iff the target supports all the steps. */ -+ -+static bool -+target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var, -+ tree vectype, bool synth_shift_p) -+{ -+ if (alg->op[0] != alg_zero && alg->op[0] != alg_m) -+ return false; -+ -+ bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype); -+ bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype); -+ -+ if (var == negate_variant -+ && !target_has_vecop_for_code (NEGATE_EXPR, vectype)) -+ return false; -+ -+ /* If we must synthesize shifts with additions make sure that vector -+ addition is available. */ -+ if ((var == add_variant || synth_shift_p) && !supports_vplus) -+ return false; -+ -+ for (int i = 1; i < alg->ops; i++) -+ { -+ switch (alg->op[i]) -+ { -+ case alg_shift: -+ break; -+ case alg_add_t_m2: -+ case alg_add_t2_m: -+ case alg_add_factor: -+ if (!supports_vplus) -+ return false; -+ break; -+ case alg_sub_t_m2: -+ case alg_sub_t2_m: -+ case alg_sub_factor: -+ if (!supports_vminus) -+ return false; -+ break; -+ case alg_unknown: -+ case alg_m: -+ case alg_zero: -+ case alg_impossible: -+ return false; -+ default: -+ gcc_unreachable (); -+ } -+ } -+ -+ return true; -+} -+ -+/* Synthesize a left shift of OP by AMNT bits using a series of additions and -+ putting the final result in DEST. Append all statements but the last into -+ VINFO. Return the last statement. */ -+ -+static gimple * -+synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt, -+ stmt_vec_info vinfo) -+{ -+ HOST_WIDE_INT i; -+ tree itype = TREE_TYPE (op); -+ tree prev_res = op; -+ gcc_assert (amnt >= 0); -+ for (i = 0; i < amnt; i++) -+ { -+ tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL) -+ : dest; -+ gimple *stmt -+ = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res); -+ prev_res = tmp_var; -+ if (i < amnt - 1) -+ append_pattern_def_seq (vinfo, stmt); -+ else -+ return stmt; -+ } -+ gcc_unreachable (); -+ return NULL; -+} -+ -+/* Helper for vect_synth_mult_by_constant. Apply a binary operation -+ CODE to operands OP1 and OP2, creating a new temporary SSA var in -+ the process if necessary. Append the resulting assignment statements -+ to the sequence in STMT_VINFO. Return the SSA variable that holds the -+ result of the binary operation. If SYNTH_SHIFT_P is true synthesize -+ left shifts using additions. */ -+ -+static tree -+apply_binop_and_append_stmt (tree_code code, tree op1, tree op2, -+ stmt_vec_info stmt_vinfo, bool synth_shift_p) -+{ -+ if (integer_zerop (op2) -+ && (code == LSHIFT_EXPR -+ || code == PLUS_EXPR)) -+ { -+ gcc_assert (TREE_CODE (op1) == SSA_NAME); -+ return op1; -+ } -+ -+ gimple *stmt; -+ tree itype = TREE_TYPE (op1); -+ tree tmp_var = vect_recog_temp_ssa_var (itype, NULL); -+ -+ if (code == LSHIFT_EXPR -+ && synth_shift_p) -+ { -+ stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2), -+ stmt_vinfo); -+ append_pattern_def_seq (stmt_vinfo, stmt); -+ return tmp_var; -+ } -+ -+ stmt = gimple_build_assign (tmp_var, code, op1, op2); -+ append_pattern_def_seq (stmt_vinfo, stmt); -+ return tmp_var; -+} -+ -+/* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts -+ and simple arithmetic operations to be vectorized. Record the statements -+ produced in STMT_VINFO and return the last statement in the sequence or -+ NULL if it's not possible to synthesize such a multiplication. -+ This function mirrors the behavior of expand_mult_const in expmed.c but -+ works on tree-ssa form. */ -+ -+static gimple * -+vect_synth_mult_by_constant (tree op, tree val, -+ stmt_vec_info stmt_vinfo) -+{ -+ tree itype = TREE_TYPE (op); -+ machine_mode mode = TYPE_MODE (itype); -+ struct algorithm alg; -+ mult_variant variant; -+ if (!tree_fits_shwi_p (val)) -+ return NULL; -+ -+ /* Multiplication synthesis by shifts, adds and subs can introduce -+ signed overflow where the original operation didn't. Perform the -+ operations on an unsigned type and cast back to avoid this. -+ In the future we may want to relax this for synthesis algorithms -+ that we can prove do not cause unexpected overflow. */ -+ bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype); -+ -+ tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype; -+ -+ /* Targets that don't support vector shifts but support vector additions -+ can synthesize shifts that way. */ -+ bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype); -+ -+ HOST_WIDE_INT hwval = tree_to_shwi (val); -+ /* Use MAX_COST here as we don't want to limit the sequence on rtx costs. -+ The vectorizer's benefit analysis will decide whether it's beneficial -+ to do this. */ -+ bool possible = choose_mult_variant (mode, hwval, &alg, -+ &variant, MAX_COST); -+ if (!possible) -+ return NULL; - -- Mult with constants that are negative power of two. -- S2: b_t = a_t * -n -+ tree vectype = get_vectype_for_scalar_type (multtype); -+ -+ if (!vectype -+ || !target_supports_mult_synth_alg (&alg, variant, -+ vectype, synth_shift_p)) -+ return NULL; -+ -+ tree accumulator; -+ -+ /* Clear out the sequence of statements so we can populate it below. */ -+ STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL; -+ gimple *stmt = NULL; -+ -+ if (cast_to_unsigned_p) -+ { -+ tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL); -+ stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op); -+ append_pattern_def_seq (stmt_vinfo, stmt); -+ op = tmp_op; -+ } -+ -+ if (alg.op[0] == alg_zero) -+ accumulator = build_int_cst (multtype, 0); -+ else -+ accumulator = op; -+ -+ bool needs_fixup = (variant == negate_variant) -+ || (variant == add_variant); -+ -+ for (int i = 1; i < alg.ops; i++) -+ { -+ tree shft_log = build_int_cst (multtype, alg.log[i]); -+ tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL); -+ tree tmp_var = NULL_TREE; -+ -+ switch (alg.op[i]) -+ { -+ case alg_shift: -+ if (synth_shift_p) -+ stmt -+ = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i], -+ stmt_vinfo); -+ else -+ stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator, -+ shft_log); -+ break; -+ case alg_add_t_m2: -+ tmp_var -+ = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log, -+ stmt_vinfo, synth_shift_p); -+ stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, -+ tmp_var); -+ break; -+ case alg_sub_t_m2: -+ tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op, -+ shft_log, stmt_vinfo, -+ synth_shift_p); -+ /* In some algorithms the first step involves zeroing the -+ accumulator. If subtracting from such an accumulator -+ just emit the negation directly. */ -+ if (integer_zerop (accumulator)) -+ stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var); -+ else -+ stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator, -+ tmp_var); -+ break; -+ case alg_add_t2_m: -+ tmp_var -+ = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log, -+ stmt_vinfo, synth_shift_p); -+ stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op); -+ break; -+ case alg_sub_t2_m: -+ tmp_var -+ = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log, -+ stmt_vinfo, synth_shift_p); -+ stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op); -+ break; -+ case alg_add_factor: -+ tmp_var -+ = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log, -+ stmt_vinfo, synth_shift_p); -+ stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, -+ tmp_var); -+ break; -+ case alg_sub_factor: -+ tmp_var -+ = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log, -+ stmt_vinfo, synth_shift_p); -+ stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, -+ accumulator); -+ break; -+ default: -+ gcc_unreachable (); -+ } -+ /* We don't want to append the last stmt in the sequence to stmt_vinfo -+ but rather return it directly. */ -+ -+ if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p) -+ append_pattern_def_seq (stmt_vinfo, stmt); -+ accumulator = accum_tmp; -+ } -+ if (variant == negate_variant) -+ { -+ tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL); -+ stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator); -+ accumulator = accum_tmp; -+ if (cast_to_unsigned_p) -+ append_pattern_def_seq (stmt_vinfo, stmt); -+ } -+ else if (variant == add_variant) -+ { -+ tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL); -+ stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op); -+ accumulator = accum_tmp; -+ if (cast_to_unsigned_p) -+ append_pattern_def_seq (stmt_vinfo, stmt); -+ } -+ /* Move back to a signed if needed. */ -+ if (cast_to_unsigned_p) -+ { -+ tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL); -+ stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator); -+ } -+ -+ return stmt; -+} -+ -+/* Detect multiplication by constant and convert it into a sequence of -+ shifts and additions, subtractions, negations. We reuse the -+ choose_mult_variant algorithms from expmed.c - - Input/Output: - - STMTS: Contains a stmt from which the pattern search begins, -- i.e. the mult stmt. Convert the mult operation to LSHIFT if -- constant operand is a power of 2. -- type a_t, b_t -- S1': b_t = a_t << log2 (n) -- -- Convert the mult operation to LSHIFT and followed by a NEGATE -- if constant operand is a negative power of 2. -- type a_t, b_t, res_T; -- S2': b_t = a_t << log2 (n) -- S3': res_T = - (b_t) -+ i.e. the mult stmt. - - Output: - -@@ -2169,8 +2450,8 @@ vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts, - - * TYPE_OUT: The type of the output of this pattern. - -- * Return value: A new stmt that will be used to replace the multiplication -- S1 or S2 stmt. */ -+ * Return value: A new stmt that will be used to replace -+ the multiplication. */ - - static gimple * - vect_recog_mult_pattern (vec<gimple *> *stmts, -@@ -2178,11 +2459,8 @@ vect_recog_mult_pattern (vec<gimple *> *stmts, - { - gimple *last_stmt = stmts->pop (); - tree oprnd0, oprnd1, vectype, itype; -- gimple *pattern_stmt, *def_stmt; -- optab optab; -+ gimple *pattern_stmt; - stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); -- int power2_val, power2_neg_val; -- tree shift; - - if (!is_gimple_assign (last_stmt)) - return NULL; -@@ -2206,52 +2484,17 @@ vect_recog_mult_pattern (vec<gimple *> *stmts, - - /* If the target can handle vectorized multiplication natively, - don't attempt to optimize this. */ -- optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default); -- if (optab != unknown_optab) -+ optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default); -+ if (mul_optab != unknown_optab) - { - machine_mode vec_mode = TYPE_MODE (vectype); -- int icode = (int) optab_handler (optab, vec_mode); -+ int icode = (int) optab_handler (mul_optab, vec_mode); - if (icode != CODE_FOR_nothing) -- return NULL; -+ return NULL; - } - -- /* If target cannot handle vector left shift then we cannot -- optimize and bail out. */ -- optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector); -- if (!optab -- || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing) -- return NULL; -- -- power2_val = wi::exact_log2 (oprnd1); -- power2_neg_val = wi::exact_log2 (wi::neg (oprnd1)); -- -- /* Handle constant operands that are postive or negative powers of 2. */ -- if (power2_val != -1) -- { -- shift = build_int_cst (itype, power2_val); -- pattern_stmt -- = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), -- LSHIFT_EXPR, oprnd0, shift); -- } -- else if (power2_neg_val != -1) -- { -- /* If the target cannot handle vector NEGATE then we cannot -- do the optimization. */ -- optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_vector); -- if (!optab -- || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing) -- return NULL; -- -- shift = build_int_cst (itype, power2_neg_val); -- def_stmt -- = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), -- LSHIFT_EXPR, oprnd0, shift); -- new_pattern_def_seq (stmt_vinfo, def_stmt); -- pattern_stmt -- = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL), -- NEGATE_EXPR, gimple_assign_lhs (def_stmt)); -- } -- else -+ pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo); -+ if (!pattern_stmt) - return NULL; - - /* Pattern detected. */ ---- a/src/gcc/tree-vect-stmts.c -+++ b/src/gcc/tree-vect-stmts.c -@@ -6354,12 +6354,22 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, - gcc_assert (!nested_in_vect_loop && !STMT_VINFO_GATHER_SCATTER_P (stmt_info)); - - first_stmt = GROUP_FIRST_ELEMENT (stmt_info); -+ group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); -+ -+ if (!slp -+ && !PURE_SLP_STMT (stmt_info) -+ && !STMT_VINFO_STRIDED_P (stmt_info)) -+ { -+ if (vect_load_lanes_supported (vectype, group_size)) -+ load_lanes_p = true; -+ else if (!vect_grouped_load_supported (vectype, group_size)) -+ return false; -+ } - - /* If this is single-element interleaving with an element distance - that leaves unused vector loads around punt - we at least create - very sub-optimal code in that case (and blow up memory, - see PR65518). */ -- bool force_peeling = false; - if (first_stmt == stmt - && !GROUP_NEXT_ELEMENT (stmt_info)) - { -@@ -6373,7 +6383,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, - } - - /* Single-element interleaving requires peeling for gaps. */ -- force_peeling = true; -+ gcc_assert (GROUP_GAP (stmt_info)); - } - - /* If there is a gap in the end of the group or the group size cannot -@@ -6381,9 +6391,8 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, - elements in the last iteration and thus need to peel that off. */ - if (loop_vinfo - && ! STMT_VINFO_STRIDED_P (stmt_info) -- && (force_peeling -- || GROUP_GAP (vinfo_for_stmt (first_stmt)) != 0 -- || (!slp && vf % GROUP_SIZE (vinfo_for_stmt (first_stmt)) != 0))) -+ && (GROUP_GAP (vinfo_for_stmt (first_stmt)) != 0 -+ || (!slp && !load_lanes_p && vf % group_size != 0))) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, -@@ -6403,8 +6412,6 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, - if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()) - slp_perm = true; - -- group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt)); -- - /* ??? The following is overly pessimistic (as well as the loop - case above) in the case we can statically determine the excess - elements loaded are within the bounds of a decl that is accessed. -@@ -6417,16 +6424,6 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, - return false; - } - -- if (!slp -- && !PURE_SLP_STMT (stmt_info) -- && !STMT_VINFO_STRIDED_P (stmt_info)) -- { -- if (vect_load_lanes_supported (vectype, group_size)) -- load_lanes_p = true; -- else if (!vect_grouped_load_supported (vectype, group_size)) -- return false; -- } -- - /* Invalidate assumptions made by dependence analysis when vectorization - on the unrolled body effectively re-orders stmts. */ - if (!PURE_SLP_STMT (stmt_info) ---- a/src/gcc/tree-vectorizer.c -+++ b/src/gcc/tree-vectorizer.c -@@ -794,38 +794,142 @@ make_pass_slp_vectorize (gcc::context *ctxt) - This should involve global alignment analysis and in the future also - array padding. */ - -+static unsigned get_vec_alignment_for_type (tree); -+static hash_map<tree, unsigned> *type_align_map; -+ -+/* Return alignment of array's vector type corresponding to scalar type. -+ 0 if no vector type exists. */ -+static unsigned -+get_vec_alignment_for_array_type (tree type) -+{ -+ gcc_assert (TREE_CODE (type) == ARRAY_TYPE); -+ -+ tree vectype = get_vectype_for_scalar_type (strip_array_types (type)); -+ if (!vectype -+ || !TYPE_SIZE (type) -+ || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST -+ || tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (vectype))) -+ return 0; -+ -+ return TYPE_ALIGN (vectype); -+} -+ -+/* Return alignment of field having maximum alignment of vector type -+ corresponding to it's scalar type. For now, we only consider fields whose -+ offset is a multiple of it's vector alignment. -+ 0 if no suitable field is found. */ -+static unsigned -+get_vec_alignment_for_record_type (tree type) -+{ -+ gcc_assert (TREE_CODE (type) == RECORD_TYPE); -+ -+ unsigned max_align = 0, alignment; -+ HOST_WIDE_INT offset; -+ tree offset_tree; -+ -+ if (TYPE_PACKED (type)) -+ return 0; -+ -+ unsigned *slot = type_align_map->get (type); -+ if (slot) -+ return *slot; -+ -+ for (tree field = first_field (type); -+ field != NULL_TREE; -+ field = DECL_CHAIN (field)) -+ { -+ /* Skip if not FIELD_DECL or if alignment is set by user. */ -+ if (TREE_CODE (field) != FIELD_DECL -+ || DECL_USER_ALIGN (field) -+ || DECL_ARTIFICIAL (field)) -+ continue; -+ -+ /* We don't need to process the type further if offset is variable, -+ since the offsets of remaining members will also be variable. */ -+ if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST -+ || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST) -+ break; -+ -+ /* Similarly stop processing the type if offset_tree -+ does not fit in unsigned HOST_WIDE_INT. */ -+ offset_tree = bit_position (field); -+ if (!tree_fits_uhwi_p (offset_tree)) -+ break; -+ -+ offset = tree_to_uhwi (offset_tree); -+ alignment = get_vec_alignment_for_type (TREE_TYPE (field)); -+ -+ /* Get maximum alignment of vectorized field/array among those members -+ whose offset is multiple of the vector alignment. */ -+ if (alignment -+ && (offset % alignment == 0) -+ && (alignment > max_align)) -+ max_align = alignment; -+ } -+ -+ type_align_map->put (type, max_align); -+ return max_align; -+} -+ -+/* Return alignment of vector type corresponding to decl's scalar type -+ or 0 if it doesn't exist or the vector alignment is lesser than -+ decl's alignment. */ -+static unsigned -+get_vec_alignment_for_type (tree type) -+{ -+ if (type == NULL_TREE) -+ return 0; -+ -+ gcc_assert (TYPE_P (type)); -+ -+ static unsigned alignment = 0; -+ switch (TREE_CODE (type)) -+ { -+ case ARRAY_TYPE: -+ alignment = get_vec_alignment_for_array_type (type); -+ break; -+ case RECORD_TYPE: -+ alignment = get_vec_alignment_for_record_type (type); -+ break; -+ default: -+ alignment = 0; -+ break; -+ } -+ -+ return (alignment > TYPE_ALIGN (type)) ? alignment : 0; -+} -+ -+/* Entry point to increase_alignment pass. */ - static unsigned int - increase_alignment (void) - { - varpool_node *vnode; - - vect_location = UNKNOWN_LOCATION; -+ type_align_map = new hash_map<tree, unsigned>; - - /* Increase the alignment of all global arrays for vectorization. */ - FOR_EACH_DEFINED_VARIABLE (vnode) - { -- tree vectype, decl = vnode->decl; -- tree t; -+ tree decl = vnode->decl; - unsigned int alignment; - -- t = TREE_TYPE (decl); -- if (TREE_CODE (t) != ARRAY_TYPE) -- continue; -- vectype = get_vectype_for_scalar_type (strip_array_types (t)); -- if (!vectype) -- continue; -- alignment = TYPE_ALIGN (vectype); -- if (DECL_ALIGN (decl) >= alignment) -- continue; -- -- if (vect_can_force_dr_alignment_p (decl, alignment)) -+ if ((decl_in_symtab_p (decl) -+ && !symtab_node::get (decl)->can_increase_alignment_p ()) -+ || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl)) -+ continue; -+ -+ alignment = get_vec_alignment_for_type (TREE_TYPE (decl)); -+ if (alignment && vect_can_force_dr_alignment_p (decl, alignment)) - { -- vnode->increase_alignment (TYPE_ALIGN (vectype)); -+ vnode->increase_alignment (alignment); - dump_printf (MSG_NOTE, "Increasing alignment of decl: "); - dump_generic_expr (MSG_NOTE, TDF_SLIM, decl); - dump_printf (MSG_NOTE, "\n"); - } - } -+ -+ delete type_align_map; - return 0; - } - ---- a/src/gcc/tree-vrp.c -+++ b/src/gcc/tree-vrp.c -@@ -3165,6 +3165,24 @@ extract_range_from_binary_expr_1 (value_range *vr, - if (int_cst_range1 && tree_int_cst_sgn (vr1.min) >= 0) - wmax = wi::min (wmax, vr1.max, TYPE_SIGN (expr_type)); - max = wide_int_to_tree (expr_type, wmax); -+ cmp = compare_values (min, max); -+ /* PR68217: In case of signed & sign-bit-CST should -+ result in [-INF, 0] instead of [-INF, INF]. */ -+ if (cmp == -2 || cmp == 1) -+ { -+ wide_int sign_bit -+ = wi::set_bit_in_zero (TYPE_PRECISION (expr_type) - 1, -+ TYPE_PRECISION (expr_type)); -+ if (!TYPE_UNSIGNED (expr_type) -+ && ((value_range_constant_singleton (&vr0) -+ && !wi::cmps (vr0.min, sign_bit)) -+ || (value_range_constant_singleton (&vr1) -+ && !wi::cmps (vr1.min, sign_bit)))) -+ { -+ min = TYPE_MIN_VALUE (expr_type); -+ max = build_int_cst (expr_type, 0); -+ } -+ } - } - else if (code == BIT_IOR_EXPR) - { -@@ -3859,7 +3877,8 @@ extract_range_basic (value_range *vr, gimple *stmt) - arg = gimple_call_arg (stmt, 0); - if (TREE_CODE (arg) == SSA_NAME - && SSA_NAME_IS_DEFAULT_DEF (arg) -- && TREE_CODE (SSA_NAME_VAR (arg)) == PARM_DECL) -+ && TREE_CODE (SSA_NAME_VAR (arg)) == PARM_DECL -+ && cfun->after_inlining) - { - set_value_range_to_null (vr, type); - return; -@@ -9935,6 +9954,40 @@ simplify_internal_call_using_ranges (gimple_stmt_iterator *gsi, gimple *stmt) - return true; - } - -+/* Return true if VAR is a two-valued variable. Set a and b with the -+ two-values when it is true. Return false otherwise. */ -+ -+static bool -+two_valued_val_range_p (tree var, tree *a, tree *b) -+{ -+ value_range *vr = get_value_range (var); -+ if ((vr->type != VR_RANGE -+ && vr->type != VR_ANTI_RANGE) -+ || TREE_CODE (vr->min) != INTEGER_CST -+ || TREE_CODE (vr->max) != INTEGER_CST) -+ return false; -+ -+ if (vr->type == VR_RANGE -+ && wi::sub (vr->max, vr->min) == 1) -+ { -+ *a = vr->min; -+ *b = vr->max; -+ return true; -+ } -+ -+ /* ~[TYPE_MIN + 1, TYPE_MAX - 1] */ -+ if (vr->type == VR_ANTI_RANGE -+ && wi::sub (vr->min, vrp_val_min (TREE_TYPE (var))) == 1 -+ && wi::sub (vrp_val_max (TREE_TYPE (var)), vr->max) == 1) -+ { -+ *a = vrp_val_min (TREE_TYPE (var)); -+ *b = vrp_val_max (TREE_TYPE (var)); -+ return true; -+ } -+ -+ return false; -+} -+ - /* Simplify STMT using ranges if possible. */ - - static bool -@@ -9945,6 +9998,68 @@ simplify_stmt_using_ranges (gimple_stmt_iterator *gsi) - { - enum tree_code rhs_code = gimple_assign_rhs_code (stmt); - tree rhs1 = gimple_assign_rhs1 (stmt); -+ tree rhs2 = gimple_assign_rhs2 (stmt); -+ tree lhs = gimple_assign_lhs (stmt); -+ tree val1 = NULL_TREE, val2 = NULL_TREE; -+ use_operand_p use_p; -+ gimple *use_stmt; -+ -+ /* Convert: -+ LHS = CST BINOP VAR -+ Where VAR is two-valued and LHS is used in GIMPLE_COND only -+ To: -+ LHS = VAR == VAL1 ? (CST BINOP VAL1) : (CST BINOP VAL2) -+ -+ Also handles: -+ LHS = VAR BINOP CST -+ Where VAR is two-valued and LHS is used in GIMPLE_COND only -+ To: -+ LHS = VAR == VAL1 ? (VAL1 BINOP CST) : (VAL2 BINOP CST) */ -+ -+ if (TREE_CODE_CLASS (rhs_code) == tcc_binary -+ && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) -+ && ((TREE_CODE (rhs1) == INTEGER_CST -+ && TREE_CODE (rhs2) == SSA_NAME) -+ || (TREE_CODE (rhs2) == INTEGER_CST -+ && TREE_CODE (rhs1) == SSA_NAME)) -+ && single_imm_use (lhs, &use_p, &use_stmt) -+ && gimple_code (use_stmt) == GIMPLE_COND) -+ -+ { -+ tree new_rhs1 = NULL_TREE; -+ tree new_rhs2 = NULL_TREE; -+ tree cmp_var = NULL_TREE; -+ -+ if (TREE_CODE (rhs2) == SSA_NAME -+ && two_valued_val_range_p (rhs2, &val1, &val2)) -+ { -+ /* Optimize RHS1 OP [VAL1, VAL2]. */ -+ new_rhs1 = int_const_binop (rhs_code, rhs1, val1); -+ new_rhs2 = int_const_binop (rhs_code, rhs1, val2); -+ cmp_var = rhs2; -+ } -+ else if (TREE_CODE (rhs1) == SSA_NAME -+ && two_valued_val_range_p (rhs1, &val1, &val2)) -+ { -+ /* Optimize [VAL1, VAL2] OP RHS2. */ -+ new_rhs1 = int_const_binop (rhs_code, val1, rhs2); -+ new_rhs2 = int_const_binop (rhs_code, val2, rhs2); -+ cmp_var = rhs1; -+ } -+ -+ /* If we could not find two-vals or the optimzation is invalid as -+ in divide by zero, new_rhs1 / new_rhs will be NULL_TREE. */ -+ if (new_rhs1 && new_rhs2) -+ { -+ tree cond = build2 (EQ_EXPR, TREE_TYPE (cmp_var), cmp_var, val1); -+ gimple_assign_set_rhs_with_ops (gsi, -+ COND_EXPR, cond, -+ new_rhs1, -+ new_rhs2); -+ update_stmt (gsi_stmt (*gsi)); -+ return true; -+ } -+ } - - switch (rhs_code) - { ---- a/src/gcc/tree.h -+++ b/src/gcc/tree.h -@@ -4628,69 +4628,6 @@ extern void warn_deprecated_use (tree, tree); - extern void cache_integer_cst (tree); - extern const char *combined_fn_name (combined_fn); - --/* Return the memory model from a host integer. */ --static inline enum memmodel --memmodel_from_int (unsigned HOST_WIDE_INT val) --{ -- return (enum memmodel) (val & MEMMODEL_MASK); --} -- --/* Return the base memory model from a host integer. */ --static inline enum memmodel --memmodel_base (unsigned HOST_WIDE_INT val) --{ -- return (enum memmodel) (val & MEMMODEL_BASE_MASK); --} -- --/* Return TRUE if the memory model is RELAXED. */ --static inline bool --is_mm_relaxed (enum memmodel model) --{ -- return (model & MEMMODEL_BASE_MASK) == MEMMODEL_RELAXED; --} -- --/* Return TRUE if the memory model is CONSUME. */ --static inline bool --is_mm_consume (enum memmodel model) --{ -- return (model & MEMMODEL_BASE_MASK) == MEMMODEL_CONSUME; --} -- --/* Return TRUE if the memory model is ACQUIRE. */ --static inline bool --is_mm_acquire (enum memmodel model) --{ -- return (model & MEMMODEL_BASE_MASK) == MEMMODEL_ACQUIRE; --} -- --/* Return TRUE if the memory model is RELEASE. */ --static inline bool --is_mm_release (enum memmodel model) --{ -- return (model & MEMMODEL_BASE_MASK) == MEMMODEL_RELEASE; --} -- --/* Return TRUE if the memory model is ACQ_REL. */ --static inline bool --is_mm_acq_rel (enum memmodel model) --{ -- return (model & MEMMODEL_BASE_MASK) == MEMMODEL_ACQ_REL; --} -- --/* Return TRUE if the memory model is SEQ_CST. */ --static inline bool --is_mm_seq_cst (enum memmodel model) --{ -- return (model & MEMMODEL_BASE_MASK) == MEMMODEL_SEQ_CST; --} -- --/* Return TRUE if the memory model is a SYNC variant. */ --static inline bool --is_mm_sync (enum memmodel model) --{ -- return (model & MEMMODEL_SYNC); --} -- - /* Compare and hash for any structure which begins with a canonical - pointer. Assumes all pointers are interchangeable, which is sort - of already assumed by gcc elsewhere IIRC. */ ---- a/src/gcc/tsan.c -+++ b/src/gcc/tsan.c -@@ -25,6 +25,7 @@ along with GCC; see the file COPYING3. If not see - #include "backend.h" - #include "rtl.h" - #include "tree.h" -+#include "memmodel.h" - #include "gimple.h" - #include "tree-pass.h" - #include "ssa.h" ---- a/src/gcc/varasm.c -+++ b/src/gcc/varasm.c -@@ -6776,6 +6776,16 @@ default_use_anchors_for_symbol_p (const_rtx symbol) - sections that should be marked as small in the section directive. */ - if (targetm.in_small_data_p (decl)) - return false; -+ -+ /* Don't use section anchors for decls that won't fit inside a single -+ anchor range to reduce the amount of instructions required to refer -+ to the entire declaration. */ -+ if (DECL_SIZE_UNIT (decl) == NULL_TREE -+ || !tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)) -+ || (tree_to_uhwi (DECL_SIZE_UNIT (decl)) -+ >= (unsigned HOST_WIDE_INT) targetm.max_anchor_offset)) -+ return false; -+ - } - return true; - } ---- a/src/libcpp/expr.c -+++ b/src/libcpp/expr.c -@@ -1073,7 +1073,7 @@ eval_token (cpp_reader *pfile, const cpp_token *token, - result.low = 0; - if (CPP_OPTION (pfile, warn_undef) && !pfile->state.skip_eval) - cpp_warning_with_line (pfile, CPP_W_UNDEF, virtual_location, 0, -- "\"%s\" is not defined", -+ "\"%s\" is not defined, evaluates to 0", - NODE_NAME (token->val.node.node)); - } - break; ---- a/src/libcpp/lex.c -+++ b/src/libcpp/lex.c -@@ -750,6 +750,101 @@ search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) - } - } - -+#elif defined (__ARM_NEON) && defined (__ARM_64BIT_STATE) -+#include "arm_neon.h" -+ -+/* This doesn't have to be the exact page size, but no system may use -+ a size smaller than this. ARMv8 requires a minimum page size of -+ 4k. The impact of being conservative here is a small number of -+ cases will take the slightly slower entry path into the main -+ loop. */ -+ -+#define AARCH64_MIN_PAGE_SIZE 4096 -+ -+static const uchar * -+search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED) -+{ -+ const uint8x16_t repl_nl = vdupq_n_u8 ('\n'); -+ const uint8x16_t repl_cr = vdupq_n_u8 ('\r'); -+ const uint8x16_t repl_bs = vdupq_n_u8 ('\\'); -+ const uint8x16_t repl_qm = vdupq_n_u8 ('?'); -+ const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL); -+ -+#ifdef __AARCH64EB -+ const int16x8_t shift = {8, 8, 8, 8, 0, 0, 0, 0}; -+#else -+ const int16x8_t shift = {0, 0, 0, 0, 8, 8, 8, 8}; -+#endif -+ -+ unsigned int found; -+ const uint8_t *p; -+ uint8x16_t data; -+ uint8x16_t t; -+ uint16x8_t m; -+ uint8x16_t u, v, w; -+ -+ /* Align the source pointer. */ -+ p = (const uint8_t *)((uintptr_t)s & -16); -+ -+ /* Assuming random string start positions, with a 4k page size we'll take -+ the slow path about 0.37% of the time. */ -+ if (__builtin_expect ((AARCH64_MIN_PAGE_SIZE -+ - (((uintptr_t) s) & (AARCH64_MIN_PAGE_SIZE - 1))) -+ < 16, 0)) -+ { -+ /* Slow path: the string starts near a possible page boundary. */ -+ uint32_t misalign, mask; -+ -+ misalign = (uintptr_t)s & 15; -+ mask = (-1u << misalign) & 0xffff; -+ data = vld1q_u8 (p); -+ t = vceqq_u8 (data, repl_nl); -+ u = vceqq_u8 (data, repl_cr); -+ v = vorrq_u8 (t, vceqq_u8 (data, repl_bs)); -+ w = vorrq_u8 (u, vceqq_u8 (data, repl_qm)); -+ t = vorrq_u8 (v, w); -+ t = vandq_u8 (t, xmask); -+ m = vpaddlq_u8 (t); -+ m = vshlq_u16 (m, shift); -+ found = vaddvq_u16 (m); -+ found &= mask; -+ if (found) -+ return (const uchar*)p + __builtin_ctz (found); -+ } -+ else -+ { -+ data = vld1q_u8 ((const uint8_t *) s); -+ t = vceqq_u8 (data, repl_nl); -+ u = vceqq_u8 (data, repl_cr); -+ v = vorrq_u8 (t, vceqq_u8 (data, repl_bs)); -+ w = vorrq_u8 (u, vceqq_u8 (data, repl_qm)); -+ t = vorrq_u8 (v, w); -+ if (__builtin_expect (vpaddd_u64 ((uint64x2_t)t), 0)) -+ goto done; -+ } -+ -+ do -+ { -+ p += 16; -+ data = vld1q_u8 (p); -+ t = vceqq_u8 (data, repl_nl); -+ u = vceqq_u8 (data, repl_cr); -+ v = vorrq_u8 (t, vceqq_u8 (data, repl_bs)); -+ w = vorrq_u8 (u, vceqq_u8 (data, repl_qm)); -+ t = vorrq_u8 (v, w); -+ } while (!vpaddd_u64 ((uint64x2_t)t)); -+ -+done: -+ /* Now that we've found the terminating substring, work out precisely where -+ we need to stop. */ -+ t = vandq_u8 (t, xmask); -+ m = vpaddlq_u8 (t); -+ m = vshlq_u16 (m, shift); -+ found = vaddvq_u16 (m); -+ return (((((uintptr_t) p) < (uintptr_t) s) ? s : (const uchar *)p) -+ + __builtin_ctz (found)); -+} -+ - #elif defined (__ARM_NEON) - #include "arm_neon.h" - ---- a/src/libgcc/Makefile.in -+++ b/src/libgcc/Makefile.in -@@ -414,8 +414,9 @@ lib2funcs = _muldi3 _negdi2 _lshrdi3 _ashldi3 _ashrdi3 _cmpdi2 _ucmpdi2 \ - _negvsi2 _negvdi2 _ctors _ffssi2 _ffsdi2 _clz _clzsi2 _clzdi2 \ - _ctzsi2 _ctzdi2 _popcount_tab _popcountsi2 _popcountdi2 \ - _paritysi2 _paritydi2 _powisf2 _powidf2 _powixf2 _powitf2 \ -- _mulsc3 _muldc3 _mulxc3 _multc3 _divsc3 _divdc3 _divxc3 \ -- _divtc3 _bswapsi2 _bswapdi2 _clrsbsi2 _clrsbdi2 -+ _mulhc3 _mulsc3 _muldc3 _mulxc3 _multc3 _divhc3 _divsc3 \ -+ _divdc3 _divxc3 _divtc3 _bswapsi2 _bswapdi2 _clrsbsi2 \ -+ _clrsbdi2 - - # The floating-point conversion routines that involve a single-word integer. - # XX stands for the integer mode. ---- a/src/libgcc/config.host -+++ b/src/libgcc/config.host -@@ -1399,4 +1399,8 @@ i[34567]86-*-linux* | x86_64-*-linux*) - fi - tm_file="${tm_file} i386/value-unwind.h" - ;; -+aarch64*-*-*) -+ # ILP32 needs an extra header for unwinding -+ tm_file="${tm_file} aarch64/value-unwind.h" -+ ;; - esac ---- /dev/null -+++ b/src/libgcc/config/aarch64/value-unwind.h -@@ -0,0 +1,25 @@ -+/* Store register values as _Unwind_Word type in DWARF2 EH unwind context. -+ Copyright (C) 2017 Free Software Foundation, Inc. -+ -+ This file is part of GCC. -+ -+ GCC is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published -+ by the Free Software Foundation; either version 3, or (at your -+ option) any later version. -+ -+ GCC is distributed in the hope that it will be useful, but WITHOUT -+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public -+ License for more details. -+ -+ You should have received a copy of the GNU General Public License and -+ a copy of the GCC Runtime Library Exception along with this program; -+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -+ <http://www.gnu.org/licenses/>. */ -+ -+/* Define this macro if the target stores register values as _Unwind_Word -+ type in unwind context. Only enable it for ilp32. */ -+#if defined __aarch64__ && !defined __LP64__ -+# define REG_VALUE_IN_UNWIND_CONTEXT -+#endif ---- a/src/libgcc/config/arm/bpabi-v6m.S -+++ b/src/libgcc/config/arm/bpabi-v6m.S -@@ -1,4 +1,5 @@ --/* Miscellaneous BPABI functions. ARMv6M implementation -+/* Miscellaneous BPABI functions. Thumb-1 implementation, suitable for ARMv4T, -+ ARMv6-M and ARMv8-M Baseline like ISA variants. - - Copyright (C) 2006-2016 Free Software Foundation, Inc. - Contributed by CodeSourcery. ---- /dev/null -+++ b/src/libgcc/config/arm/cmse.c -@@ -0,0 +1,108 @@ -+/* ARMv8-M Security Extensions routines. -+ Copyright (C) 2015-2016 Free Software Foundation, Inc. -+ Contributed by ARM Ltd. -+ -+ This file is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by the -+ Free Software Foundation; either version 3, or (at your option) any -+ later version. -+ -+ This file is distributed in the hope that it will be useful, but -+ WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ General Public License for more details. -+ -+ Under Section 7 of GPL version 3, you are granted additional -+ permissions described in the GCC Runtime Library Exception, version -+ 3.1, as published by the Free Software Foundation. -+ -+ You should have received a copy of the GNU General Public License and -+ a copy of the GCC Runtime Library Exception along with this program; -+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -+ <http://www.gnu.org/licenses/>. */ -+ -+ -+#if __ARM_FEATURE_CMSE & 1 -+ -+#include <arm_cmse.h> -+ -+/* ARM intrinsic function to perform a permission check on a given -+ address range. See ACLE changes for ARMv8-M. */ -+ -+void * -+cmse_check_address_range (void *p, size_t size, int flags) -+{ -+ cmse_address_info_t permb, perme; -+ char *pb = (char *) p, *pe; -+ -+ /* Check if the range wraps around. */ -+ if (UINTPTR_MAX - (uintptr_t) p < size) -+ return NULL; -+ -+ /* Check if an unknown flag is present. */ -+ int known = CMSE_MPU_UNPRIV | CMSE_MPU_READWRITE | CMSE_MPU_READ; -+ int known_secure_level = CMSE_MPU_UNPRIV; -+#if __ARM_FEATURE_CMSE & 2 -+ known |= CMSE_AU_NONSECURE | CMSE_MPU_NONSECURE; -+ known_secure_level |= CMSE_MPU_NONSECURE; -+#endif -+ if (flags & (~known)) -+ return NULL; -+ -+ /* Execute the right variant of the TT instructions. */ -+ pe = pb + size - 1; -+ const int singleCheck = (((uintptr_t) pb ^ (uintptr_t) pe) < 32); -+ switch (flags & known_secure_level) -+ { -+ case 0: -+ permb = cmse_TT (pb); -+ perme = singleCheck ? permb : cmse_TT (pe); -+ break; -+ case CMSE_MPU_UNPRIV: -+ permb = cmse_TTT (pb); -+ perme = singleCheck ? permb : cmse_TTT (pe); -+ break; -+#if __ARM_FEATURE_CMSE & 2 -+ case CMSE_MPU_NONSECURE: -+ permb = cmse_TTA (pb); -+ perme = singleCheck ? permb : cmse_TTA (pe); -+ break; -+ case CMSE_MPU_UNPRIV | CMSE_MPU_NONSECURE: -+ permb = cmse_TTAT (pb); -+ perme = singleCheck ? permb : cmse_TTAT (pe); -+ break; -+#endif -+ default: -+ /* Invalid flag, eg. CMSE_MPU_NONSECURE specified but -+ __ARM_FEATURE_CMSE & 2 == 0. */ -+ return NULL; -+ } -+ -+ /* Check that the range does not cross MPU, SAU, or IDAU boundaries. */ -+ if (permb.value != perme.value) -+ return NULL; -+ -+ /* Check the permissions on the range. */ -+ switch (flags & (~known_secure_level)) -+ { -+#if __ARM_FEATURE_CMSE & 2 -+ case CMSE_MPU_READ | CMSE_MPU_READWRITE | CMSE_AU_NONSECURE: -+ case CMSE_MPU_READWRITE | CMSE_AU_NONSECURE: -+ return permb.flags.nonsecure_readwrite_ok ? p : NULL; -+ case CMSE_MPU_READ | CMSE_AU_NONSECURE: -+ return permb.flags.nonsecure_read_ok ? p : NULL; -+ case CMSE_AU_NONSECURE: -+ return permb.flags.secure ? NULL : p; -+#endif -+ case CMSE_MPU_READ | CMSE_MPU_READWRITE: -+ case CMSE_MPU_READWRITE: -+ return permb.flags.readwrite_ok ? p : NULL; -+ case CMSE_MPU_READ: -+ return permb.flags.read_ok ? p : NULL; -+ default: -+ return NULL; -+ } -+} -+ -+ -+#endif /* __ARM_FEATURE_CMSE & 1. */ ---- /dev/null -+++ b/src/libgcc/config/arm/cmse_nonsecure_call.S -@@ -0,0 +1,131 @@ -+/* CMSE wrapper function used to save, clear and restore callee saved registers -+ for cmse_nonsecure_call's. -+ -+ Copyright (C) 2016 Free Software Foundation, Inc. -+ Contributed by ARM Ltd. -+ -+ This file is free software; you can redistribute it and/or modify it -+ under the terms of the GNU General Public License as published by the -+ Free Software Foundation; either version 3, or (at your option) any -+ later version. -+ -+ This file is distributed in the hope that it will be useful, but -+ WITHOUT ANY WARRANTY; without even the implied warranty of -+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ General Public License for more details. -+ -+ Under Section 7 of GPL version 3, you are granted additional -+ permissions described in the GCC Runtime Library Exception, version -+ 3.1, as published by the Free Software Foundation. -+ -+ You should have received a copy of the GNU General Public License and -+ a copy of the GCC Runtime Library Exception along with this program; -+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -+ <http://www.gnu.org/licenses/>. */ -+ -+.syntax unified -+.thumb -+.global __gnu_cmse_nonsecure_call -+__gnu_cmse_nonsecure_call: -+#if defined(__ARM_ARCH_8M_MAIN__) -+push {r5-r11,lr} -+mov r7, r4 -+mov r8, r4 -+mov r9, r4 -+mov r10, r4 -+mov r11, r4 -+mov ip, r4 -+ -+/* Save and clear callee-saved registers only if we are dealing with hard float -+ ABI. The unused caller-saved registers have already been cleared by GCC -+ generated code. */ -+#ifdef __ARM_PCS_VFP -+vpush.f64 {d8-d15} -+mov r5, #0 -+vmov d8, r5, r5 -+#if __ARM_FP & 0x04 -+vmov s18, s19, r5, r5 -+vmov s20, s21, r5, r5 -+vmov s22, s23, r5, r5 -+vmov s24, s25, r5, r5 -+vmov s26, s27, r5, r5 -+vmov s28, s29, r5, r5 -+vmov s30, s31, r5, r5 -+#elif __ARM_FP & 0x08 -+vmov.f64 d9, d8 -+vmov.f64 d10, d8 -+vmov.f64 d11, d8 -+vmov.f64 d12, d8 -+vmov.f64 d13, d8 -+vmov.f64 d14, d8 -+vmov.f64 d15, d8 -+#else -+#error "Half precision implementation not supported." -+#endif -+/* Clear the cumulative exception-status bits (0-4,7) and the -+ condition code bits (28-31) of the FPSCR. */ -+vmrs r5, fpscr -+movw r6, #65376 -+movt r6, #4095 -+ands r5, r6 -+vmsr fpscr, r5 -+ -+/* We are not dealing with hard float ABI, so we can safely use the vlstm and -+ vlldm instructions without needing to preserve the registers used for -+ argument passing. */ -+#else -+sub sp, sp, #0x88 /* Reserve stack space to save all floating point -+ registers, including FPSCR. */ -+vlstm sp /* Lazy store and clearance of d0-d16 and FPSCR. */ -+#endif /* __ARM_PCS_VFP */ -+ -+/* Make sure to clear the 'GE' bits of the APSR register if 32-bit SIMD -+ instructions are available. */ -+#if defined(__ARM_FEATURE_SIMD32) -+msr APSR_nzcvqg, r4 -+#else -+msr APSR_nzcvq, r4 -+#endif -+ -+mov r5, r4 -+mov r6, r4 -+blxns r4 -+ -+#ifdef __ARM_PCS_VFP -+vpop.f64 {d8-d15} -+#else -+vlldm sp /* Lazy restore of d0-d16 and FPSCR. */ -+add sp, sp, #0x88 /* Free space used to save floating point registers. */ -+#endif /* __ARM_PCS_VFP */ -+ -+pop {r5-r11, pc} -+ -+#elif defined (__ARM_ARCH_8M_BASE__) -+push {r5-r7, lr} -+mov r5, r8 -+mov r6, r9 -+mov r7, r10 -+push {r5-r7} -+mov r5, r11 -+push {r5} -+mov r5, r4 -+mov r6, r4 -+mov r7, r4 -+mov r8, r4 -+mov r9, r4 -+mov r10, r4 -+mov r11, r4 -+mov ip, r4 -+msr APSR_nzcvq, r4 -+blxns r4 -+pop {r5} -+mov r11, r5 -+pop {r5-r7} -+mov r10, r7 -+mov r9, r6 -+mov r8, r5 -+pop {r5-r7, pc} -+ -+#else -+#error "This should only be used for armv8-m base- and mainline." -+#endif ---- a/src/libgcc/config/arm/ieee754-df.S -+++ b/src/libgcc/config/arm/ieee754-df.S -@@ -160,8 +160,8 @@ ARM_FUNC_ALIAS aeabi_dadd adddf3 - teq r4, r5 - beq LSYM(Lad_d) - --@ CFI note: we're lucky that the branches to Lad_* that appear after this function --@ have a CFI state that's exactly the same as the one we're in at this -+@ CFI note: we're lucky that the branches to Lad_* that appear after this -+@ function have a CFI state that's exactly the same as the one we're in at this - @ point. Otherwise the CFI would change to a different state after the branch, - @ which would be disastrous for backtracing. - LSYM(Lad_x): -@@ -507,11 +507,15 @@ ARM_FUNC_ALIAS aeabi_f2d extendsfdf2 - eorne xh, xh, #0x38000000 @ fixup exponent otherwise. - RETc(ne) @ and return it. - -- teq r2, #0 @ if actually 0 -- do_it ne, e -- teqne r3, #0xff000000 @ or INF or NAN -+ bics r2, r2, #0xff000000 @ isolate mantissa -+ do_it eq @ if 0, that is ZERO or INF, - RETc(eq) @ we are done already. - -+ teq r3, #0xff000000 @ check for NAN -+ do_it eq, t -+ orreq xh, xh, #0x00080000 @ change to quiet NAN -+ RETc(eq) @ and return it. -+ - @ value was denormalized. We can normalize it now. - do_push {r4, r5, lr} - .cfi_adjust_cfa_offset 12 @ CFA is now sp + previousOffset + 12 -@@ -1158,8 +1162,8 @@ ARM_FUNC_ALIAS eqdf2 cmpdf2 - 1: str ip, [sp, #-4]! - .cfi_adjust_cfa_offset 4 @ CFA is now sp + previousOffset + 4. - @ We're not adding CFI for ip as it's pushed into the stack -- @ only because @ it may be popped off later as a return value -- @ (i.e. we're not preserving @ it anyways). -+ @ only because it may be popped off later as a return value -+ @ (i.e. we're not preserving it anyways). - - @ Trap any INF/NAN first. - mov ip, xh, lsl #1 -@@ -1169,14 +1173,14 @@ ARM_FUNC_ALIAS eqdf2 cmpdf2 - COND(mvn,s,ne) ip, ip, asr #21 - beq 3f - .cfi_remember_state -- @ Save the current CFI state. This is done because the branch -- @ is conditional, @ and if we don't take it we'll issue a -- @ .cfi_adjust_cfa_offset and return. @ If we do take it, -- @ however, the .cfi_adjust_cfa_offset from the non-branch @ code -- @ will affect the branch code as well. To avoid this we'll -- @ restore @ the current state before executing the branch code. -- -- @ Test for equality. @ Note that 0.0 is equal to -0.0. -+ @ Save the current CFI state. This is done because the branch -+ @ is conditional, and if we don't take it we'll issue a -+ @ .cfi_adjust_cfa_offset and return. If we do take it, -+ @ however, the .cfi_adjust_cfa_offset from the non-branch code -+ @ will affect the branch code as well. To avoid this we'll -+ @ restore the current state before executing the branch code. -+ -+ @ Test for equality. Note that 0.0 is equal to -0.0. - 2: add sp, sp, #4 - .cfi_adjust_cfa_offset -4 @ CFA is now sp + previousOffset. - ---- a/src/libgcc/config/arm/lib1funcs.S -+++ b/src/libgcc/config/arm/lib1funcs.S -@@ -108,7 +108,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - # define __ARM_ARCH__ 7 - #endif - --#if defined(__ARM_ARCH_8A__) -+#if defined(__ARM_ARCH_8A__) || defined(__ARM_ARCH_8M_BASE__) \ -+ || defined(__ARM_ARCH_8M_MAIN__) - # define __ARM_ARCH__ 8 - #endif - -@@ -124,10 +125,14 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - && !defined(__thumb2__) \ - && (!defined(__THUMB_INTERWORK__) \ - || defined (__OPTIMIZE_SIZE__) \ -- || defined(__ARM_ARCH_6M__))) -+ || !__ARM_ARCH_ISA_ARM)) - # define __prefer_thumb__ - #endif - -+#if !__ARM_ARCH_ISA_ARM && __ARM_ARCH_ISA_THUMB == 1 -+#define NOT_ISA_TARGET_32BIT 1 -+#endif -+ - /* How to return from a function call depends on the architecture variant. */ - - #if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__) -@@ -305,35 +310,14 @@ LSYM(Lend_fde): - - #ifdef __ARM_EABI__ - .macro THUMB_LDIV0 name signed --#if defined(__ARM_ARCH_6M__) -- .ifc \signed, unsigned -- cmp r0, #0 -- beq 1f -- mov r0, #0 -- mvn r0, r0 @ 0xffffffff --1: -- .else -- cmp r0, #0 -- beq 2f -- blt 3f -+#ifdef NOT_ISA_TARGET_32BIT -+ -+ push {r0, lr} - mov r0, #0 -- mvn r0, r0 -- lsr r0, r0, #1 @ 0x7fffffff -- b 2f --3: mov r0, #0x80 -- lsl r0, r0, #24 @ 0x80000000 --2: -- .endif -- push {r0, r1, r2} -- ldr r0, 4f -- adr r1, 4f -- add r0, r1 -- str r0, [sp, #8] -+ bl SYM(__aeabi_idiv0) - @ We know we are not on armv4t, so pop pc is safe. -- pop {r0, r1, pc} -- .align 2 --4: -- .word __aeabi_idiv0 - 4b -+ pop {r1, pc} -+ - #elif defined(__thumb2__) - .syntax unified - .ifc \signed, unsigned -@@ -478,7 +462,7 @@ _L__\name: - - #else /* !(__INTERWORKING_STUBS__ || __thumb2__) */ - --#ifdef __ARM_ARCH_6M__ -+#ifdef NOT_ISA_TARGET_32BIT - #define EQUIV .thumb_set - #else - .macro ARM_FUNC_START name sp_section= -@@ -510,7 +494,7 @@ SYM (__\name): - #endif - .endm - --#ifndef __ARM_ARCH_6M__ -+#ifndef NOT_ISA_TARGET_32BIT - .macro ARM_FUNC_ALIAS new old - .globl SYM (__\new) - EQUIV SYM (__\new), SYM (__\old) -@@ -945,7 +929,170 @@ LSYM(Lover7): - add dividend, work - .endif - LSYM(Lgot_result): --.endm -+.endm -+ -+/* If performance is preferred, the following functions are provided. */ -+#if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__) -+ -+/* Branch to div(n), and jump to label if curbit is lo than divisior. */ -+.macro BranchToDiv n, label -+ lsr curbit, dividend, \n -+ cmp curbit, divisor -+ blo \label -+.endm -+ -+/* Body of div(n). Shift the divisor in n bits and compare the divisor -+ and dividend. Update the dividend as the substruction result. */ -+.macro DoDiv n -+ lsr curbit, dividend, \n -+ cmp curbit, divisor -+ bcc 1f -+ lsl curbit, divisor, \n -+ sub dividend, dividend, curbit -+ -+1: adc result, result -+.endm -+ -+/* The body of division with positive divisor. Unless the divisor is very -+ big, shift it up in multiples of four bits, since this is the amount of -+ unwinding in the main division loop. Continue shifting until the divisor -+ is larger than the dividend. */ -+.macro THUMB1_Div_Positive -+ mov result, #0 -+ BranchToDiv #1, LSYM(Lthumb1_div1) -+ BranchToDiv #4, LSYM(Lthumb1_div4) -+ BranchToDiv #8, LSYM(Lthumb1_div8) -+ BranchToDiv #12, LSYM(Lthumb1_div12) -+ BranchToDiv #16, LSYM(Lthumb1_div16) -+LSYM(Lthumb1_div_large_positive): -+ mov result, #0xff -+ lsl divisor, divisor, #8 -+ rev result, result -+ lsr curbit, dividend, #16 -+ cmp curbit, divisor -+ blo 1f -+ asr result, #8 -+ lsl divisor, divisor, #8 -+ beq LSYM(Ldivbyzero_waypoint) -+ -+1: lsr curbit, dividend, #12 -+ cmp curbit, divisor -+ blo LSYM(Lthumb1_div12) -+ b LSYM(Lthumb1_div16) -+LSYM(Lthumb1_div_loop): -+ lsr divisor, divisor, #8 -+LSYM(Lthumb1_div16): -+ Dodiv #15 -+ Dodiv #14 -+ Dodiv #13 -+ Dodiv #12 -+LSYM(Lthumb1_div12): -+ Dodiv #11 -+ Dodiv #10 -+ Dodiv #9 -+ Dodiv #8 -+ bcs LSYM(Lthumb1_div_loop) -+LSYM(Lthumb1_div8): -+ Dodiv #7 -+ Dodiv #6 -+ Dodiv #5 -+LSYM(Lthumb1_div5): -+ Dodiv #4 -+LSYM(Lthumb1_div4): -+ Dodiv #3 -+LSYM(Lthumb1_div3): -+ Dodiv #2 -+LSYM(Lthumb1_div2): -+ Dodiv #1 -+LSYM(Lthumb1_div1): -+ sub divisor, dividend, divisor -+ bcs 1f -+ cpy divisor, dividend -+ -+1: adc result, result -+ cpy dividend, result -+ RET -+ -+LSYM(Ldivbyzero_waypoint): -+ b LSYM(Ldiv0) -+.endm -+ -+/* The body of division with negative divisor. Similar with -+ THUMB1_Div_Positive except that the shift steps are in multiples -+ of six bits. */ -+.macro THUMB1_Div_Negative -+ lsr result, divisor, #31 -+ beq 1f -+ neg divisor, divisor -+ -+1: asr curbit, dividend, #32 -+ bcc 2f -+ neg dividend, dividend -+ -+2: eor curbit, result -+ mov result, #0 -+ cpy ip, curbit -+ BranchToDiv #4, LSYM(Lthumb1_div_negative4) -+ BranchToDiv #8, LSYM(Lthumb1_div_negative8) -+LSYM(Lthumb1_div_large): -+ mov result, #0xfc -+ lsl divisor, divisor, #6 -+ rev result, result -+ lsr curbit, dividend, #8 -+ cmp curbit, divisor -+ blo LSYM(Lthumb1_div_negative8) -+ -+ lsl divisor, divisor, #6 -+ asr result, result, #6 -+ cmp curbit, divisor -+ blo LSYM(Lthumb1_div_negative8) -+ -+ lsl divisor, divisor, #6 -+ asr result, result, #6 -+ cmp curbit, divisor -+ blo LSYM(Lthumb1_div_negative8) -+ -+ lsl divisor, divisor, #6 -+ beq LSYM(Ldivbyzero_negative) -+ asr result, result, #6 -+ b LSYM(Lthumb1_div_negative8) -+LSYM(Lthumb1_div_negative_loop): -+ lsr divisor, divisor, #6 -+LSYM(Lthumb1_div_negative8): -+ DoDiv #7 -+ DoDiv #6 -+ DoDiv #5 -+ DoDiv #4 -+LSYM(Lthumb1_div_negative4): -+ DoDiv #3 -+ DoDiv #2 -+ bcs LSYM(Lthumb1_div_negative_loop) -+ DoDiv #1 -+ sub divisor, dividend, divisor -+ bcs 1f -+ cpy divisor, dividend -+ -+1: cpy curbit, ip -+ adc result, result -+ asr curbit, curbit, #1 -+ cpy dividend, result -+ bcc 2f -+ neg dividend, dividend -+ cmp curbit, #0 -+ -+2: bpl 3f -+ neg divisor, divisor -+ -+3: RET -+ -+LSYM(Ldivbyzero_negative): -+ cpy curbit, ip -+ asr curbit, curbit, #1 -+ bcc LSYM(Ldiv0) -+ neg dividend, dividend -+.endm -+#endif /* ARM Thumb version. */ -+ - /* ------------------------------------------------------------------------ */ - /* Start of the Real Functions */ - /* ------------------------------------------------------------------------ */ -@@ -955,6 +1102,7 @@ LSYM(Lgot_result): - - FUNC_START udivsi3 - FUNC_ALIAS aeabi_uidiv udivsi3 -+#if defined(__OPTIMIZE_SIZE__) - - cmp divisor, #0 - beq LSYM(Ldiv0) -@@ -972,6 +1120,14 @@ LSYM(udivsi3_skip_div0_test): - pop { work } - RET - -+/* Implementation of aeabi_uidiv for ARMv6m. This version is only -+ used in ARMv6-M when we need an efficient implementation. */ -+#else -+LSYM(udivsi3_skip_div0_test): -+ THUMB1_Div_Positive -+ -+#endif /* __OPTIMIZE_SIZE__ */ -+ - #elif defined(__ARM_ARCH_EXT_IDIV__) - - ARM_FUNC_START udivsi3 -@@ -1023,12 +1179,21 @@ LSYM(udivsi3_skip_div0_test): - FUNC_START aeabi_uidivmod - cmp r1, #0 - beq LSYM(Ldiv0) -+# if defined(__OPTIMIZE_SIZE__) - push {r0, r1, lr} - bl LSYM(udivsi3_skip_div0_test) - POP {r1, r2, r3} - mul r2, r0 - sub r1, r1, r2 - bx r3 -+# else -+ /* Both the quotient and remainder are calculated simultaneously -+ in THUMB1_Div_Positive. There is no need to calculate the -+ remainder again here. */ -+ b LSYM(udivsi3_skip_div0_test) -+ RET -+# endif /* __OPTIMIZE_SIZE__ */ -+ - #elif defined(__ARM_ARCH_EXT_IDIV__) - ARM_FUNC_START aeabi_uidivmod - cmp r1, #0 -@@ -1054,7 +1219,7 @@ ARM_FUNC_START aeabi_uidivmod - /* ------------------------------------------------------------------------ */ - #ifdef L_umodsi3 - --#ifdef __ARM_ARCH_EXT_IDIV__ -+#if defined(__ARM_ARCH_EXT_IDIV__) && __ARM_ARCH_ISA_THUMB != 1 - - ARM_FUNC_START umodsi3 - -@@ -1084,7 +1249,7 @@ LSYM(Lover10): - RET - - #else /* ARM version. */ -- -+ - FUNC_START umodsi3 - - subs r2, r1, #1 @ compare divisor with 1 -@@ -1109,8 +1274,9 @@ LSYM(Lover10): - - #if defined(__prefer_thumb__) - -- FUNC_START divsi3 -+ FUNC_START divsi3 - FUNC_ALIAS aeabi_idiv divsi3 -+#if defined(__OPTIMIZE_SIZE__) - - cmp divisor, #0 - beq LSYM(Ldiv0) -@@ -1133,7 +1299,7 @@ LSYM(Lover11): - blo LSYM(Lgot_result) - - THUMB_DIV_MOD_BODY 0 -- -+ - mov r0, result - mov work, ip - cmp work, #0 -@@ -1143,6 +1309,22 @@ LSYM(Lover12): - pop { work } - RET - -+/* Implementation of aeabi_idiv for ARMv6m. This version is only -+ used in ARMv6-M when we need an efficient implementation. */ -+#else -+LSYM(divsi3_skip_div0_test): -+ cpy curbit, dividend -+ orr curbit, divisor -+ bmi LSYM(Lthumb1_div_negative) -+ -+LSYM(Lthumb1_div_positive): -+ THUMB1_Div_Positive -+ -+LSYM(Lthumb1_div_negative): -+ THUMB1_Div_Negative -+ -+#endif /* __OPTIMIZE_SIZE__ */ -+ - #elif defined(__ARM_ARCH_EXT_IDIV__) - - ARM_FUNC_START divsi3 -@@ -1154,8 +1336,8 @@ LSYM(Lover12): - RET - - #else /* ARM/Thumb-2 version. */ -- -- ARM_FUNC_START divsi3 -+ -+ ARM_FUNC_START divsi3 - ARM_FUNC_ALIAS aeabi_idiv divsi3 - - cmp r1, #0 -@@ -1209,12 +1391,21 @@ LSYM(divsi3_skip_div0_test): - FUNC_START aeabi_idivmod - cmp r1, #0 - beq LSYM(Ldiv0) -+# if defined(__OPTIMIZE_SIZE__) - push {r0, r1, lr} - bl LSYM(divsi3_skip_div0_test) - POP {r1, r2, r3} - mul r2, r0 - sub r1, r1, r2 - bx r3 -+# else -+ /* Both the quotient and remainder are calculated simultaneously -+ in THUMB1_Div_Positive and THUMB1_Div_Negative. There is no -+ need to calculate the remainder again here. */ -+ b LSYM(divsi3_skip_div0_test) -+ RET -+# endif /* __OPTIMIZE_SIZE__ */ -+ - #elif defined(__ARM_ARCH_EXT_IDIV__) - ARM_FUNC_START aeabi_idivmod - cmp r1, #0 -@@ -1240,7 +1431,7 @@ ARM_FUNC_START aeabi_idivmod - /* ------------------------------------------------------------------------ */ - #ifdef L_modsi3 - --#if defined(__ARM_ARCH_EXT_IDIV__) -+#if defined(__ARM_ARCH_EXT_IDIV__) && __ARM_ARCH_ISA_THUMB != 1 - - ARM_FUNC_START modsi3 - -@@ -1508,14 +1699,15 @@ LSYM(Lover12): - - #endif /* __symbian__ */ - --#if ((__ARM_ARCH__ > 5) && !defined(__ARM_ARCH_6M__)) \ -- || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \ -- || defined(__ARM_ARCH_5TEJ__) -+#if (__ARM_ARCH_ISA_THUMB == 2 \ -+ || (__ARM_ARCH_ISA_ARM \ -+ && (__ARM_ARCH__ > 5 \ -+ || (__ARM_ARCH__ == 5 && __ARM_ARCH_ISA_THUMB)))) - #define HAVE_ARM_CLZ 1 - #endif - - #ifdef L_clzsi2 --#if defined(__ARM_ARCH_6M__) -+#ifdef NOT_ISA_TARGET_32BIT - FUNC_START clzsi2 - mov r1, #28 - mov r3, #1 -@@ -1576,7 +1768,7 @@ ARM_FUNC_START clzsi2 - #ifdef L_clzdi2 - #if !defined(HAVE_ARM_CLZ) - --# if defined(__ARM_ARCH_6M__) -+# ifdef NOT_ISA_TARGET_32BIT - FUNC_START clzdi2 - push {r4, lr} - # else -@@ -1601,7 +1793,7 @@ ARM_FUNC_START clzdi2 - bl __clzsi2 - # endif - 2: --# if defined(__ARM_ARCH_6M__) -+# ifdef NOT_ISA_TARGET_32BIT - pop {r4, pc} - # else - RETLDM r4 -@@ -1623,7 +1815,7 @@ ARM_FUNC_START clzdi2 - #endif /* L_clzdi2 */ - - #ifdef L_ctzsi2 --#if defined(__ARM_ARCH_6M__) -+#ifdef NOT_ISA_TARGET_32BIT - FUNC_START ctzsi2 - neg r1, r0 - and r0, r0, r1 -@@ -1738,7 +1930,7 @@ ARM_FUNC_START ctzsi2 - - /* Don't bother with the old interworking routines for Thumb-2. */ - /* ??? Maybe only omit these on "m" variants. */ --#if !defined(__thumb2__) && !defined(__ARM_ARCH_6M__) -+#if !defined(__thumb2__) && __ARM_ARCH_ISA_ARM - - #if defined L_interwork_call_via_rX - -@@ -1983,11 +2175,12 @@ LSYM(Lchange_\register): - .endm - - #ifndef __symbian__ --#ifndef __ARM_ARCH_6M__ -+/* The condition here must match the one in gcc/config/arm/elf.h. */ -+#ifndef NOT_ISA_TARGET_32BIT - #include "ieee754-df.S" - #include "ieee754-sf.S" - #include "bpabi.S" --#else /* __ARM_ARCH_6M__ */ -+#else /* NOT_ISA_TARGET_32BIT */ - #include "bpabi-v6m.S" --#endif /* __ARM_ARCH_6M__ */ -+#endif /* NOT_ISA_TARGET_32BIT */ - #endif /* !__symbian__ */ ---- a/src/libgcc/config/arm/libunwind.S -+++ b/src/libgcc/config/arm/libunwind.S -@@ -58,7 +58,7 @@ - #endif - #endif - --#ifdef __ARM_ARCH_6M__ -+#if !__ARM_ARCH_ISA_ARM && __ARM_ARCH_ISA_THUMB == 1 - - /* r0 points to a 16-word block. Upload these values to the actual core - state. */ -@@ -169,7 +169,7 @@ FUNC_START gnu_Unwind_Save_WMMXC - UNPREFIX \name - .endm - --#else /* !__ARM_ARCH_6M__ */ -+#else /* __ARM_ARCH_ISA_ARM || __ARM_ARCH_ISA_THUMB != 1 */ - - /* r0 points to a 16-word block. Upload these values to the actual core - state. */ -@@ -351,7 +351,7 @@ ARM_FUNC_START gnu_Unwind_Save_WMMXC - UNPREFIX \name - .endm - --#endif /* !__ARM_ARCH_6M__ */ -+#endif /* __ARM_ARCH_ISA_ARM || __ARM_ARCH_ISA_THUMB != 1 */ - - UNWIND_WRAPPER _Unwind_RaiseException 1 - UNWIND_WRAPPER _Unwind_Resume 1 ---- a/src/libgcc/config/arm/t-arm -+++ b/src/libgcc/config/arm/t-arm -@@ -1,3 +1,17 @@ - LIB1ASMSRC = arm/lib1funcs.S - LIB1ASMFUNCS = _thumb1_case_sqi _thumb1_case_uqi _thumb1_case_shi \ - _thumb1_case_uhi _thumb1_case_si -+ -+HAVE_CMSE:=$(findstring __ARM_FEATURE_CMSE,$(shell $(gcc_compile_bare) -dM -E - </dev/null)) -+ifneq ($(shell $(gcc_compile_bare) -E -mcmse - </dev/null 2>/dev/null),) -+CMSE_OPTS:=-mcmse -+endif -+ -+ifdef HAVE_CMSE -+libgcc-objects += cmse.o cmse_nonsecure_call.o -+ -+cmse.o: $(srcdir)/config/arm/cmse.c -+ $(gcc_compile) -c $(CMSE_OPTS) $< -+cmse_nonsecure_call.o: $(srcdir)/config/arm/cmse_nonsecure_call.S -+ $(gcc_compile) -c $< -+endif ---- a/src/libgcc/config/arm/t-softfp -+++ b/src/libgcc/config/arm/t-softfp -@@ -1,2 +1,2 @@ --softfp_wrap_start := '\#ifdef __ARM_ARCH_6M__' -+softfp_wrap_start := '\#if !__ARM_ARCH_ISA_ARM && __ARM_ARCH_ISA_THUMB == 1' - softfp_wrap_end := '\#endif' ---- a/src/libgcc/libgcc2.c -+++ b/src/libgcc/libgcc2.c -@@ -1852,7 +1852,8 @@ NAME (TYPE x, int m) - - #endif - --#if ((defined(L_mulsc3) || defined(L_divsc3)) && LIBGCC2_HAS_SF_MODE) \ -+#if((defined(L_mulhc3) || defined(L_divhc3)) && LIBGCC2_HAS_HF_MODE) \ -+ || ((defined(L_mulsc3) || defined(L_divsc3)) && LIBGCC2_HAS_SF_MODE) \ - || ((defined(L_muldc3) || defined(L_divdc3)) && LIBGCC2_HAS_DF_MODE) \ - || ((defined(L_mulxc3) || defined(L_divxc3)) && LIBGCC2_HAS_XF_MODE) \ - || ((defined(L_multc3) || defined(L_divtc3)) && LIBGCC2_HAS_TF_MODE) -@@ -1861,7 +1862,13 @@ NAME (TYPE x, int m) - #undef double - #undef long - --#if defined(L_mulsc3) || defined(L_divsc3) -+#if defined(L_mulhc3) || defined(L_divhc3) -+# define MTYPE HFtype -+# define CTYPE HCtype -+# define MODE hc -+# define CEXT __LIBGCC_HF_FUNC_EXT__ -+# define NOTRUNC (!__LIBGCC_HF_EXCESS_PRECISION__) -+#elif defined(L_mulsc3) || defined(L_divsc3) - # define MTYPE SFtype - # define CTYPE SCtype - # define MODE sc -@@ -1922,7 +1929,7 @@ extern void *compile_type_assert[sizeof(INFINITY) == sizeof(MTYPE) ? 1 : -1]; - # define TRUNC(x) __asm__ ("" : "=m"(x) : "m"(x)) - #endif - --#if defined(L_mulsc3) || defined(L_muldc3) \ -+#if defined(L_mulhc3) || defined(L_mulsc3) || defined(L_muldc3) \ - || defined(L_mulxc3) || defined(L_multc3) - - CTYPE -@@ -1992,7 +1999,7 @@ CONCAT3(__mul,MODE,3) (MTYPE a, MTYPE b, MTYPE c, MTYPE d) - } - #endif /* complex multiply */ - --#if defined(L_divsc3) || defined(L_divdc3) \ -+#if defined(L_divhc3) || defined(L_divsc3) || defined(L_divdc3) \ - || defined(L_divxc3) || defined(L_divtc3) - - CTYPE ---- a/src/libgcc/libgcc2.h -+++ b/src/libgcc/libgcc2.h -@@ -34,6 +34,12 @@ extern void __clear_cache (char *, char *); - extern void __eprintf (const char *, const char *, unsigned int, const char *) - __attribute__ ((__noreturn__)); - -+#ifdef __LIBGCC_HAS_HF_MODE__ -+#define LIBGCC2_HAS_HF_MODE 1 -+#else -+#define LIBGCC2_HAS_HF_MODE 0 -+#endif -+ - #ifdef __LIBGCC_HAS_SF_MODE__ - #define LIBGCC2_HAS_SF_MODE 1 - #else -@@ -133,6 +139,10 @@ typedef unsigned int UTItype __attribute__ ((mode (TI))); - #endif - #endif - -+#if LIBGCC2_HAS_HF_MODE -+typedef float HFtype __attribute__ ((mode (HF))); -+typedef _Complex float HCtype __attribute__ ((mode (HC))); -+#endif - #if LIBGCC2_HAS_SF_MODE - typedef float SFtype __attribute__ ((mode (SF))); - typedef _Complex float SCtype __attribute__ ((mode (SC))); -@@ -424,6 +434,10 @@ extern SItype __negvsi2 (SItype); - #endif /* COMPAT_SIMODE_TRAPPING_ARITHMETIC */ - - #undef int -+#if LIBGCC2_HAS_HF_MODE -+extern HCtype __divhc3 (HFtype, HFtype, HFtype, HFtype); -+extern HCtype __mulhc3 (HFtype, HFtype, HFtype, HFtype); -+#endif - #if LIBGCC2_HAS_SF_MODE - extern DWtype __fixsfdi (SFtype); - extern SFtype __floatdisf (DWtype); ---- a/src/libstdc++-v3/acinclude.m4 -+++ b/src/libstdc++-v3/acinclude.m4 -@@ -632,10 +632,10 @@ dnl baseline_dir - dnl baseline_subdir_switch - dnl - AC_DEFUN([GLIBCXX_CONFIGURE_TESTSUITE], [ -- if $GLIBCXX_IS_NATIVE ; then -- # Do checks for resource limit functions. -- GLIBCXX_CHECK_SETRLIMIT -+ # Do checks for resource limit functions. -+ GLIBCXX_CHECK_SETRLIMIT - -+ if $GLIBCXX_IS_NATIVE ; then - # Look for setenv, so that extended locale tests can be performed. - GLIBCXX_CHECK_STDLIB_DECL_AND_LINKAGE_3(setenv) - fi ---- a/src/libstdc++-v3/configure -+++ b/src/libstdc++-v3/configure -@@ -79519,8 +79519,7 @@ $as_echo "$ac_cv_x86_rdrand" >&6; } - - # This depends on GLIBCXX_ENABLE_SYMVERS and GLIBCXX_IS_NATIVE. - -- if $GLIBCXX_IS_NATIVE ; then -- # Do checks for resource limit functions. -+ # Do checks for resource limit functions. - - setrlimit_have_headers=yes - for ac_header in unistd.h sys/time.h sys/resource.h -@@ -79749,6 +79748,7 @@ $as_echo "#define _GLIBCXX_RES_LIMITS 1" >>confdefs.h - $as_echo "$ac_res_limits" >&6; } - - -+ if $GLIBCXX_IS_NATIVE ; then - # Look for setenv, so that extended locale tests can be performed. - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for setenv declaration" >&5 ---- a/src/libstdc++-v3/testsuite/29_atomics/atomic/65913.cc -+++ b/src/libstdc++-v3/testsuite/29_atomics/atomic/65913.cc -@@ -15,7 +15,8 @@ - // with this library; see the file COPYING3. If not see - // <http://www.gnu.org/licenses/>. - --// { dg-do run { target x86_64-*-linux* powerpc*-*-linux* } } -+// { dg-do run } -+// { dg-require-atomic-builtins "" } - // { dg-options "-std=gnu++11 -O0" } - - #include <atomic> |