# DP: Changes for the Linaro 6-2017.03 release.
MSG=$(git log origin/linaro/gcc-6-branch --format=format:"%s" -n 1 --grep "Merge branches"); SVN=${MSG##* }; git log origin/gcc-6-branch --format=format:"%H" -n 1 --grep "gcc-6-branch@${SVN%.}"
LANG=C git diff --no-renames 4b7882c54dabbb54686cb577f2a2cf28e93e743b..630c5507bb37d2caaef60a6f0773e4c820d76fe0 \
| egrep -v '^(diff|index) ' \
| filterdiff --strip=1 --addoldprefix=a/src/ --addnewprefix=b/src/ \
| sed 's,a/src//dev/null,/dev/null,'
--- a/src/contrib/compare_tests
+++ b/src/contrib/compare_tests
@@ -107,8 +107,8 @@ elif [ -d "$1" -o -d "$2" ] ; then
usage "Must specify either two directories or two files"
fi
-sed 's/^XFAIL/FAIL/; s/^XPASS/PASS/' < "$1" | awk '/^Running target / {target = $3} { if (target != "unix") { sub(/: /, "&"target": " ); }; print $0; }' | cut -c1-2000 >$tmp1
-sed 's/^XFAIL/FAIL/; s/^XPASS/PASS/' < "$2" | awk '/^Running target / {target = $3} { if (target != "unix") { sub(/: /, "&"target": " ); }; print $0; }' | cut -c1-2000 >$tmp2
+sed 's/^XFAIL/FAIL/; s/^ERROR/FAIL/; s/^XPASS/PASS/' < "$1" | awk '/^Running target / {target = $3} { if (target != "unix") { sub(/: /, "&"target": " ); }; print $0; }' | cut -c1-2000 >$tmp1
+sed 's/^XFAIL/FAIL/; s/^ERROR/FAIL/; s/^XPASS/PASS/' < "$2" | awk '/^Running target / {target = $3} { if (target != "unix") { sub(/: /, "&"target": " ); }; print $0; }' | cut -c1-2000 >$tmp2
before=$tmp1
now=$tmp2
--- a/src/contrib/dg-extract-results.py
+++ b/src/contrib/dg-extract-results.py
@@ -134,6 +134,7 @@ class Prog:
self.end_line = None
# Known summary types.
self.count_names = [
+ '# of DejaGnu errors\t\t',
'# of expected passes\t\t',
'# of unexpected failures\t',
'# of unexpected successes\t',
@@ -245,6 +246,10 @@ class Prog:
segment = Segment (filename, file.tell())
variation.header = segment
+ # Parse the rest of the summary (the '# of ' lines).
+ if len (variation.counts) == 0:
+ variation.counts = self.zero_counts()
+
# Parse up until the first line of the summary.
if num_variations == 1:
end = '\t\t=== ' + tool.name + ' Summary ===\n'
@@ -291,6 +296,11 @@ class Prog:
harness.results.append ((key, line))
if not first_key and sort_logs:
first_key = key
+ if line.startswith ('ERROR: (DejaGnu)'):
+ for i in range (len (self.count_names)):
+ if 'DejaGnu errors' in self.count_names[i]:
+ variation.counts[i] += 1
+ break
# 'Using ...' lines are only interesting in a header. Splitting
# the test up into parallel runs leads to more 'Using ...' lines
@@ -309,9 +319,6 @@ class Prog:
segment.lines -= final_using
harness.add_segment (first_key, segment)
- # Parse the rest of the summary (the '# of ' lines).
- if len (variation.counts) == 0:
- variation.counts = self.zero_counts()
while True:
before = file.tell()
line = file.readline()
--- a/src/contrib/dg-extract-results.sh
+++ b/src/contrib/dg-extract-results.sh
@@ -369,10 +369,11 @@ EOF
BEGIN {
variant="$VAR"
tool="$TOOL"
- passcnt=0; failcnt=0; untstcnt=0; xpasscnt=0; xfailcnt=0; kpasscnt=0; kfailcnt=0; unsupcnt=0; unrescnt=0;
+ passcnt=0; failcnt=0; untstcnt=0; xpasscnt=0; xfailcnt=0; kpasscnt=0; kfailcnt=0; unsupcnt=0; unrescnt=0; dgerrorcnt=0;
curvar=""; insummary=0
}
/^Running target / { curvar = \$3; next }
+/^ERROR: \(DejaGnu\)/ { if (variant == curvar) dgerrorcnt += 1 }
/^# of / { if (variant == curvar) insummary = 1 }
/^# of expected passes/ { if (insummary == 1) passcnt += \$5; next; }
/^# of unexpected successes/ { if (insummary == 1) xpasscnt += \$5; next; }
@@ -390,6 +391,7 @@ BEGIN {
{ next }
END {
printf ("\t\t=== %s Summary for %s ===\n\n", tool, variant)
+ if (dgerrorcnt != 0) printf ("# of DejaGnu errors\t\t%d\n", dgerrorcnt)
if (passcnt != 0) printf ("# of expected passes\t\t%d\n", passcnt)
if (failcnt != 0) printf ("# of unexpected failures\t%d\n", failcnt)
if (xpasscnt != 0) printf ("# of unexpected successes\t%d\n", xpasscnt)
@@ -419,8 +421,9 @@ TOTAL_AWK=${TMP}/total.awk
cat << EOF > $TOTAL_AWK
BEGIN {
tool="$TOOL"
- passcnt=0; failcnt=0; untstcnt=0; xpasscnt=0; xfailcnt=0; kfailcnt=0; unsupcnt=0; unrescnt=0
+ passcnt=0; failcnt=0; untstcnt=0; xpasscnt=0; xfailcnt=0; kfailcnt=0; unsupcnt=0; unrescnt=0; dgerrorcnt=0
}
+/^# of DejaGnu errors/ { dgerrorcnt += \$5 }
/^# of expected passes/ { passcnt += \$5 }
/^# of unexpected failures/ { failcnt += \$5 }
/^# of unexpected successes/ { xpasscnt += \$5 }
@@ -431,7 +434,8 @@ BEGIN {
/^# of unresolved testcases/ { unrescnt += \$5 }
/^# of unsupported tests/ { unsupcnt += \$5 }
END {
- printf ("\n\t\t=== %s Summary ===\n\n", tool)
+ printf ("\n\t\t=== %s MySummary ===\n\n", tool)
+ if (dgerrorcnt != 0) printf ("# of DejaGnu errors\t\t%d\n", dgerrorcnt)
if (passcnt != 0) printf ("# of expected passes\t\t%d\n", passcnt)
if (failcnt != 0) printf ("# of unexpected failures\t%d\n", failcnt)
if (xpasscnt != 0) printf ("# of unexpected successes\t%d\n", xpasscnt)
--- /dev/null
+++ b/src/gcc/LINARO-VERSION
@@ -0,0 +1 @@
+Snapshot 6.3-2017.03
--- a/src/gcc/Makefile.in
+++ b/src/gcc/Makefile.in
@@ -832,10 +832,12 @@ BASEVER := $(srcdir)/BASE-VER # 4.x.y
DEVPHASE := $(srcdir)/DEV-PHASE # experimental, prerelease, ""
DATESTAMP := $(srcdir)/DATESTAMP # YYYYMMDD or empty
REVISION := $(srcdir)/REVISION # [BRANCH revision XXXXXX]
+LINAROVER := $(srcdir)/LINARO-VERSION # M.x-YYYY.MM[-S][~dev]
BASEVER_c := $(shell cat $(BASEVER))
DEVPHASE_c := $(shell cat $(DEVPHASE))
DATESTAMP_c := $(shell cat $(DATESTAMP))
+LINAROVER_c := $(shell cat $(LINAROVER))
ifeq (,$(wildcard $(REVISION)))
REVISION_c :=
@@ -862,6 +864,7 @@ DATESTAMP_s := \
"\"$(if $(DEVPHASE_c)$(filter-out 0,$(PATCHLEVEL_c)), $(DATESTAMP_c))\""
PKGVERSION_s:= "\"@PKGVERSION@\""
BUGURL_s := "\"@REPORT_BUGS_TO@\""
+LINAROVER_s := "\"$(LINAROVER_c)\""
PKGVERSION := @PKGVERSION@
BUGURL_TEXI := @REPORT_BUGS_TEXI@
@@ -2701,8 +2704,9 @@ PREPROCESSOR_DEFINES = \
-DSTANDARD_EXEC_PREFIX=\"$(libdir)/gcc/\" \
@TARGET_SYSTEM_ROOT_DEFINE@
-CFLAGS-cppbuiltin.o += $(PREPROCESSOR_DEFINES) -DBASEVER=$(BASEVER_s)
-cppbuiltin.o: $(BASEVER)
+CFLAGS-cppbuiltin.o += $(PREPROCESSOR_DEFINES) -DBASEVER=$(BASEVER_s) \
+ -DLINAROVER=$(LINAROVER_s)
+cppbuiltin.o: $(BASEVER) $(LINAROVER)
CFLAGS-cppdefault.o += $(PREPROCESSOR_DEFINES)
--- a/src/gcc/ada/gcc-interface/misc.c
+++ b/src/gcc/ada/gcc-interface/misc.c
@@ -255,8 +255,7 @@ static bool
gnat_post_options (const char **pfilename ATTRIBUTE_UNUSED)
{
/* Excess precision other than "fast" requires front-end support. */
- if (flag_excess_precision_cmdline == EXCESS_PRECISION_STANDARD
- && TARGET_FLT_EVAL_METHOD_NON_DEFAULT)
+ if (flag_excess_precision_cmdline == EXCESS_PRECISION_STANDARD)
sorry ("-fexcess-precision=standard for Ada");
flag_excess_precision_cmdline = EXCESS_PRECISION_FAST;
--- a/src/gcc/builtins.c
+++ b/src/gcc/builtins.c
@@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see
#include "target.h"
#include "rtl.h"
#include "tree.h"
+#include "memmodel.h"
#include "gimple.h"
#include "predict.h"
#include "tm_p.h"
--- a/src/gcc/c-family/c-common.c
+++ b/src/gcc/c-family/c-common.c
@@ -25,6 +25,7 @@ along with GCC; see the file COPYING3. If not see
#include "target.h"
#include "function.h"
#include "tree.h"
+#include "memmodel.h"
#include "c-common.h"
#include "gimple-expr.h"
#include "tm_p.h"
--- a/src/gcc/c-family/c-opts.c
+++ b/src/gcc/c-family/c-opts.c
@@ -772,8 +772,7 @@ c_common_post_options (const char **pfilename)
support. */
if (c_dialect_cxx ())
{
- if (flag_excess_precision_cmdline == EXCESS_PRECISION_STANDARD
- && TARGET_FLT_EVAL_METHOD_NON_DEFAULT)
+ if (flag_excess_precision_cmdline == EXCESS_PRECISION_STANDARD)
sorry ("-fexcess-precision=standard for C++");
flag_excess_precision_cmdline = EXCESS_PRECISION_FAST;
}
--- a/src/gcc/calls.c
+++ b/src/gcc/calls.c
@@ -194,10 +194,19 @@ prepare_call_address (tree fndecl_or_type, rtx funexp, rtx static_chain_value,
&& targetm.small_register_classes_for_mode_p (FUNCTION_MODE))
? force_not_mem (memory_address (FUNCTION_MODE, funexp))
: memory_address (FUNCTION_MODE, funexp));
- else if (! sibcallp)
+ else
{
- if (!NO_FUNCTION_CSE && optimize && ! flag_no_function_cse)
- funexp = force_reg (Pmode, funexp);
+ /* funexp could be a SYMBOL_REF represents a function pointer which is
+ of ptr_mode. In this case, it should be converted into address mode
+ to be a valid address for memory rtx pattern. See PR 64971. */
+ if (GET_MODE (funexp) != Pmode)
+ funexp = convert_memory_address (Pmode, funexp);
+
+ if (! sibcallp)
+ {
+ if (!NO_FUNCTION_CSE && optimize && ! flag_no_function_cse)
+ funexp = force_reg (Pmode, funexp);
+ }
}
if (static_chain_value != 0
--- a/src/gcc/cfg.c
+++ b/src/gcc/cfg.c
@@ -1064,7 +1064,7 @@ free_original_copy_tables (void)
delete bb_copy;
bb_copy = NULL;
delete bb_original;
- bb_copy = NULL;
+ bb_original = NULL;
delete loop_copy;
loop_copy = NULL;
delete original_copy_bb_pool;
--- a/src/gcc/common/config/arm/arm-common.c
+++ b/src/gcc/common/config/arm/arm-common.c
@@ -97,6 +97,49 @@ arm_rewrite_mcpu (int argc, const char **argv)
return arm_rewrite_selected_cpu (argv[argc - 1]);
}
+struct arm_arch_core_flag
+{
+ const char *const name;
+ const arm_feature_set flags;
+};
+
+static const struct arm_arch_core_flag arm_arch_core_flags[] =
+{
+#undef ARM_CORE
+#define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
+ {NAME, FLAGS},
+#include "config/arm/arm-cores.def"
+#undef ARM_CORE
+#undef ARM_ARCH
+#define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
+ {NAME, FLAGS},
+#include "config/arm/arm-arches.def"
+#undef ARM_ARCH
+};
+
+/* Called by the driver to check whether the target denoted by current
+ command line options is a Thumb-only target. ARGV is an array of
+ -march and -mcpu values (ie. it contains the rhs after the equal
+ sign) and we use the last one of them to make a decision. The
+ number of elements in ARGV is given in ARGC. */
+const char *
+arm_target_thumb_only (int argc, const char **argv)
+{
+ unsigned int opt;
+
+ if (argc)
+ {
+ for (opt = 0; opt < (ARRAY_SIZE (arm_arch_core_flags)); opt++)
+ if ((strcmp (argv[argc - 1], arm_arch_core_flags[opt].name) == 0)
+ && !ARM_FSET_HAS_CPU1(arm_arch_core_flags[opt].flags, FL_NOTM))
+ return "-mthumb";
+
+ return NULL;
+ }
+ else
+ return NULL;
+}
+
#undef ARM_CPU_NAME_LENGTH
--- a/src/gcc/config.gcc
+++ b/src/gcc/config.gcc
@@ -307,7 +307,7 @@ m32c*-*-*)
;;
aarch64*-*-*)
cpu_type=aarch64
- extra_headers="arm_neon.h arm_acle.h"
+ extra_headers="arm_fp16.h arm_neon.h arm_acle.h"
c_target_objs="aarch64-c.o"
cxx_target_objs="aarch64-c.o"
extra_objs="aarch64-builtins.o aarch-common.o cortex-a57-fma-steering.o"
@@ -327,7 +327,7 @@ arc*-*-*)
arm*-*-*)
cpu_type=arm
extra_objs="arm-builtins.o aarch-common.o"
- extra_headers="mmintrin.h arm_neon.h arm_acle.h"
+ extra_headers="mmintrin.h arm_neon.h arm_acle.h arm_fp16.h arm_cmse.h"
target_type_format_char='%'
c_target_objs="arm-c.o"
cxx_target_objs="arm-c.o"
@@ -1500,7 +1500,7 @@ i[34567]86-*-linux* | i[34567]86-*-kfreebsd*-gnu | i[34567]86-*-knetbsd*-gnu | i
extra_options="${extra_options} linux-android.opt"
# Assume modern glibc if not targeting Android nor uclibc.
case ${target} in
- *-*-*android*|*-*-*uclibc*)
+ *-*-*android*|*-*-*uclibc*|*-*-*musl*)
;;
*)
default_gnu_indirect_function=yes
@@ -1569,7 +1569,7 @@ x86_64-*-linux* | x86_64-*-kfreebsd*-gnu | x86_64-*-knetbsd*-gnu)
extra_options="${extra_options} linux-android.opt"
# Assume modern glibc if not targeting Android nor uclibc.
case ${target} in
- *-*-*android*|*-*-*uclibc*)
+ *-*-*android*|*-*-*uclibc*|*-*-*musl*)
;;
*)
default_gnu_indirect_function=yes
@@ -3811,38 +3811,51 @@ case "${target}" in
# Add extra multilibs
if test "x$with_multilib_list" != x; then
arm_multilibs=`echo $with_multilib_list | sed -e 's/,/ /g'`
- for arm_multilib in ${arm_multilibs}; do
- case ${arm_multilib} in
- aprofile)
+ case ${arm_multilibs} in
+ aprofile)
# Note that arm/t-aprofile is a
# stand-alone make file fragment to be
# used only with itself. We do not
# specifically use the
# TM_MULTILIB_OPTION framework because
# this shorthand is more
- # pragmatic. Additionally it is only
- # designed to work without any
- # with-cpu, with-arch with-mode
- # with-fpu or with-float options.
- if test "x$with_arch" != x \
- || test "x$with_cpu" != x \
- || test "x$with_float" != x \
- || test "x$with_fpu" != x \
- || test "x$with_mode" != x ; then
- echo "Error: You cannot use any of --with-arch/cpu/fpu/float/mode with --with-multilib-list=aprofile" 1>&2
- exit 1
- fi
- tmake_file="${tmake_file} arm/t-aprofile"
- break
- ;;
- default)
- ;;
- *)
- echo "Error: --with-multilib-list=${with_multilib_list} not supported." 1>&2
- exit 1
- ;;
- esac
- done
+ # pragmatic.
+ tmake_profile_file="arm/t-aprofile"
+ ;;
+ rmprofile)
+ # Note that arm/t-rmprofile is a
+ # stand-alone make file fragment to be
+ # used only with itself. We do not
+ # specifically use the
+ # TM_MULTILIB_OPTION framework because
+ # this shorthand is more
+ # pragmatic.
+ tmake_profile_file="arm/t-rmprofile"
+ ;;
+ default)
+ ;;
+ *)
+ echo "Error: --with-multilib-list=${with_multilib_list} not supported." 1>&2
+ exit 1
+ ;;
+ esac
+
+ if test "x${tmake_profile_file}" != x ; then
+ # arm/t-aprofile and arm/t-rmprofile are only
+ # designed to work without any with-cpu,
+ # with-arch, with-mode, with-fpu or with-float
+ # options.
+ if test "x$with_arch" != x \
+ || test "x$with_cpu" != x \
+ || test "x$with_float" != x \
+ || test "x$with_fpu" != x \
+ || test "x$with_mode" != x ; then
+ echo "Error: You cannot use any of --with-arch/cpu/fpu/float/mode with --with-multilib-list=${with_multilib_list}" 1>&2
+ exit 1
+ fi
+
+ tmake_file="${tmake_file} ${tmake_profile_file}"
+ fi
fi
;;
--- a/src/gcc/config/aarch64/aarch64-arches.def
+++ b/src/gcc/config/aarch64/aarch64-arches.def
@@ -32,4 +32,6 @@
AARCH64_ARCH("armv8-a", generic, 8A, 8, AARCH64_FL_FOR_ARCH8)
AARCH64_ARCH("armv8.1-a", generic, 8_1A, 8, AARCH64_FL_FOR_ARCH8_1)
+AARCH64_ARCH("armv8.2-a", generic, 8_2A, 8, AARCH64_FL_FOR_ARCH8_2)
+AARCH64_ARCH("armv8.3-a", generic, 8_3A, 8, AARCH64_FL_FOR_ARCH8_3)
--- a/src/gcc/config/aarch64/aarch64-builtins.c
+++ b/src/gcc/config/aarch64/aarch64-builtins.c
@@ -62,6 +62,7 @@
#define si_UP SImode
#define sf_UP SFmode
#define hi_UP HImode
+#define hf_UP HFmode
#define qi_UP QImode
#define UP(X) X##_UP
@@ -139,6 +140,10 @@ aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_unsigned };
#define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers)
static enum aarch64_type_qualifiers
+aarch64_types_binop_uss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_unsigned, qualifier_none, qualifier_none };
+#define TYPES_BINOP_USS (aarch64_types_binop_uss_qualifiers)
+static enum aarch64_type_qualifiers
aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_poly, qualifier_poly, qualifier_poly };
#define TYPES_BINOPP (aarch64_types_binopp_qualifiers)
@@ -164,6 +169,10 @@ aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
#define TYPES_QUADOP_LANE (aarch64_types_quadop_lane_qualifiers)
static enum aarch64_type_qualifiers
+aarch64_types_binop_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_poly, qualifier_none, qualifier_immediate };
+#define TYPES_GETREGP (aarch64_types_binop_imm_p_qualifiers)
+static enum aarch64_type_qualifiers
aarch64_types_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_immediate };
#define TYPES_GETREG (aarch64_types_binop_imm_qualifiers)
@@ -173,16 +182,29 @@ aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_unsigned, qualifier_none, qualifier_immediate };
#define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers)
static enum aarch64_type_qualifiers
+aarch64_types_fcvt_from_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_none, qualifier_unsigned, qualifier_immediate };
+#define TYPES_FCVTIMM_SUS (aarch64_types_fcvt_from_unsigned_qualifiers)
+static enum aarch64_type_qualifiers
aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_unsigned, qualifier_unsigned, qualifier_immediate };
#define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
static enum aarch64_type_qualifiers
-aarch64_types_ternop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
- = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate };
-#define TYPES_SETREG (aarch64_types_ternop_imm_qualifiers)
-#define TYPES_SHIFTINSERT (aarch64_types_ternop_imm_qualifiers)
-#define TYPES_SHIFTACC (aarch64_types_ternop_imm_qualifiers)
+aarch64_types_ternop_s_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_none, qualifier_none, qualifier_poly, qualifier_immediate};
+#define TYPES_SETREGP (aarch64_types_ternop_s_imm_p_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate};
+#define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers)
+#define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers)
+#define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers)
+
+static enum aarch64_type_qualifiers
+aarch64_types_ternop_p_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_poly, qualifier_poly, qualifier_poly, qualifier_immediate};
+#define TYPES_SHIFTINSERTP (aarch64_types_ternop_p_imm_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_unsigned_shiftacc_qualifiers[SIMD_MAX_BUILTIN_ARGS]
@@ -197,6 +219,11 @@ aarch64_types_combine_qualifiers[SIMD_MAX_BUILTIN_ARGS]
#define TYPES_COMBINE (aarch64_types_combine_qualifiers)
static enum aarch64_type_qualifiers
+aarch64_types_combine_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_poly, qualifier_poly, qualifier_poly };
+#define TYPES_COMBINEP (aarch64_types_combine_p_qualifiers)
+
+static enum aarch64_type_qualifiers
aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_const_pointer_map_mode };
#define TYPES_LOAD1 (aarch64_types_load1_qualifiers)
@@ -229,6 +256,10 @@ aarch64_types_bsl_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
qualifier_map_mode | qualifier_pointer to build a pointer to the
element type of the vector. */
static enum aarch64_type_qualifiers
+aarch64_types_store1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+ = { qualifier_void, qualifier_pointer_map_mode, qualifier_poly };
+#define TYPES_STORE1P (aarch64_types_store1_p_qualifiers)
+static enum aarch64_type_qualifiers
aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_void, qualifier_pointer_map_mode, qualifier_none };
#define TYPES_STORE1 (aarch64_types_store1_qualifiers)
@@ -753,16 +784,16 @@ aarch64_init_simd_builtins (void)
if (qualifiers & qualifier_unsigned)
{
- type_signature[arg_num] = 'u';
+ type_signature[op_num] = 'u';
print_type_signature_p = true;
}
else if (qualifiers & qualifier_poly)
{
- type_signature[arg_num] = 'p';
+ type_signature[op_num] = 'p';
print_type_signature_p = true;
}
else
- type_signature[arg_num] = 's';
+ type_signature[op_num] = 's';
/* Skip an internal operand for vget_{low, high}. */
if (qualifiers & qualifier_internal)
--- a/src/gcc/config/aarch64/aarch64-c.c
+++ b/src/gcc/config/aarch64/aarch64-c.c
@@ -95,6 +95,11 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
else
cpp_undef (pfile, "__ARM_FP");
+ aarch64_def_or_undef (TARGET_FP_F16INST,
+ "__ARM_FEATURE_FP16_SCALAR_ARITHMETIC", pfile);
+ aarch64_def_or_undef (TARGET_SIMD_F16INST,
+ "__ARM_FEATURE_FP16_VECTOR_ARITHMETIC", pfile);
+
aarch64_def_or_undef (TARGET_SIMD, "__ARM_FEATURE_NUMERIC_MAXMIN", pfile);
aarch64_def_or_undef (TARGET_SIMD, "__ARM_NEON", pfile);
--- a/src/gcc/config/aarch64/aarch64-cores.def
+++ b/src/gcc/config/aarch64/aarch64-cores.def
@@ -40,17 +40,33 @@
/* V8 Architecture Processors. */
+/* ARM ('A') cores. */
AARCH64_CORE("cortex-a35", cortexa35, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, "0x41", "0xd04")
AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, "0x41", "0xd03")
AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07")
AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08")
+AARCH64_CORE("cortex-a73", cortexa73, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, "0x41", "0xd09")
+
+/* Samsung ('S') cores. */
AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, "0x53", "0x001")
-AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa57, "0x51", "0x800")
+
+/* Qualcomm ('Q') cores. */
+AARCH64_CORE("qdf24xx", qdf24xx, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, "0x51", "0x800")
+
+/* Cavium ('C') cores. */
AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, "0x43", "0x0a1")
+
+/* APM ('P') cores. */
AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xgene1, "0x50", "0x000")
+/* V8.1 Architecture Processors. */
+
+/* Broadcom ('B') cores. */
+AARCH64_CORE("vulcan", vulcan, cortexa57, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, vulcan, "0x42", "0x516")
+
/* V8 big.LITTLE implementations. */
AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07.0xd03")
AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08.0xd03")
-
+AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, "0x41", "0xd09.0xd04")
+AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, "0x41", "0xd09.0xd03")
--- a/src/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/src/gcc/config/aarch64/aarch64-cost-tables.h
@@ -127,6 +127,108 @@ const struct cpu_cost_table thunderx_extra_costs =
}
};
+const struct cpu_cost_table vulcan_extra_costs =
+{
+ /* ALU */
+ {
+ 0, /* Arith. */
+ 0, /* Logical. */
+ 0, /* Shift. */
+ 0, /* Shift_reg. */
+ COSTS_N_INSNS (1), /* Arith_shift. */
+ COSTS_N_INSNS (1), /* Arith_shift_reg. */
+ COSTS_N_INSNS (1), /* Log_shift. */
+ COSTS_N_INSNS (1), /* Log_shift_reg. */
+ 0, /* Extend. */
+ COSTS_N_INSNS (1), /* Extend_arith. */
+ 0, /* Bfi. */
+ 0, /* Bfx. */
+ COSTS_N_INSNS (3), /* Clz. */
+ 0, /* Rev. */
+ 0, /* Non_exec. */
+ true /* Non_exec_costs_exec. */
+ },
+ {
+ /* MULT SImode */
+ {
+ COSTS_N_INSNS (4), /* Simple. */
+ COSTS_N_INSNS (4), /* Flag_setting. */
+ COSTS_N_INSNS (4), /* Extend. */
+ COSTS_N_INSNS (5), /* Add. */
+ COSTS_N_INSNS (5), /* Extend_add. */
+ COSTS_N_INSNS (18) /* Idiv. */
+ },
+ /* MULT DImode */
+ {
+ COSTS_N_INSNS (4), /* Simple. */
+ 0, /* Flag_setting. */
+ COSTS_N_INSNS (4), /* Extend. */
+ COSTS_N_INSNS (5), /* Add. */
+ COSTS_N_INSNS (5), /* Extend_add. */
+ COSTS_N_INSNS (26) /* Idiv. */
+ }
+ },
+ /* LD/ST */
+ {
+ COSTS_N_INSNS (4), /* Load. */
+ COSTS_N_INSNS (4), /* Load_sign_extend. */
+ COSTS_N_INSNS (5), /* Ldrd. */
+ COSTS_N_INSNS (4), /* Ldm_1st. */
+ 1, /* Ldm_regs_per_insn_1st. */
+ 1, /* Ldm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (4), /* Loadf. */
+ COSTS_N_INSNS (4), /* Loadd. */
+ COSTS_N_INSNS (4), /* Load_unaligned. */
+ 0, /* Store. */
+ 0, /* Strd. */
+ 0, /* Stm_1st. */
+ 1, /* Stm_regs_per_insn_1st. */
+ 1, /* Stm_regs_per_insn_subsequent. */
+ 0, /* Storef. */
+ 0, /* Stored. */
+ 0, /* Store_unaligned. */
+ COSTS_N_INSNS (1), /* Loadv. */
+ COSTS_N_INSNS (1) /* Storev. */
+ },
+ {
+ /* FP SFmode */
+ {
+ COSTS_N_INSNS (4), /* Div. */
+ COSTS_N_INSNS (1), /* Mult. */
+ COSTS_N_INSNS (1), /* Mult_addsub. */
+ COSTS_N_INSNS (1), /* Fma. */
+ COSTS_N_INSNS (1), /* Addsub. */
+ COSTS_N_INSNS (1), /* Fpconst. */
+ COSTS_N_INSNS (1), /* Neg. */
+ COSTS_N_INSNS (1), /* Compare. */
+ COSTS_N_INSNS (2), /* Widen. */
+ COSTS_N_INSNS (2), /* Narrow. */
+ COSTS_N_INSNS (2), /* Toint. */
+ COSTS_N_INSNS (2), /* Fromint. */
+ COSTS_N_INSNS (2) /* Roundint. */
+ },
+ /* FP DFmode */
+ {
+ COSTS_N_INSNS (6), /* Div. */
+ COSTS_N_INSNS (1), /* Mult. */
+ COSTS_N_INSNS (1), /* Mult_addsub. */
+ COSTS_N_INSNS (1), /* Fma. */
+ COSTS_N_INSNS (1), /* Addsub. */
+ COSTS_N_INSNS (1), /* Fpconst. */
+ COSTS_N_INSNS (1), /* Neg. */
+ COSTS_N_INSNS (1), /* Compare. */
+ COSTS_N_INSNS (2), /* Widen. */
+ COSTS_N_INSNS (2), /* Narrow. */
+ COSTS_N_INSNS (2), /* Toint. */
+ COSTS_N_INSNS (2), /* Fromint. */
+ COSTS_N_INSNS (2) /* Roundint. */
+ }
+ },
+ /* Vector */
+ {
+ COSTS_N_INSNS (1) /* Alu. */
+ }
+};
#endif
--- a/src/gcc/config/aarch64/aarch64-elf.h
+++ b/src/gcc/config/aarch64/aarch64-elf.h
@@ -25,15 +25,6 @@
#define ASM_OUTPUT_LABELREF(FILE, NAME) \
aarch64_asm_output_labelref (FILE, NAME)
-#define ASM_OUTPUT_DEF(FILE, NAME1, NAME2) \
- do \
- { \
- assemble_name (FILE, NAME1); \
- fputs (" = ", FILE); \
- assemble_name (FILE, NAME2); \
- fputc ('\n', FILE); \
- } while (0)
-
#define TEXT_SECTION_ASM_OP "\t.text"
#define DATA_SECTION_ASM_OP "\t.data"
#define BSS_SECTION_ASM_OP "\t.bss"
--- a/src/gcc/config/aarch64/aarch64-modes.def
+++ b/src/gcc/config/aarch64/aarch64-modes.def
@@ -21,8 +21,6 @@
CC_MODE (CCFP);
CC_MODE (CCFPE);
CC_MODE (CC_SWP);
-CC_MODE (CC_ZESWP); /* zero-extend LHS (but swap to make it RHS). */
-CC_MODE (CC_SESWP); /* sign-extend LHS (but swap to make it RHS). */
CC_MODE (CC_NZ); /* Only N and Z bits of condition flags are valid. */
CC_MODE (CC_Z); /* Only Z bit of condition flags is valid. */
CC_MODE (CC_C); /* Only C bit of condition flags is valid. */
--- a/src/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/src/gcc/config/aarch64/aarch64-option-extensions.def
@@ -39,8 +39,8 @@
that are required. Their order is not important. */
/* Enabling "fp" just enables "fp".
- Disabling "fp" also disables "simd", "crypto". */
-AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, 0, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO, "fp")
+ Disabling "fp" also disables "simd", "crypto" and "fp16". */
+AARCH64_OPT_EXTENSION("fp", AARCH64_FL_FP, 0, AARCH64_FL_SIMD | AARCH64_FL_CRYPTO | AARCH64_FL_F16, "fp")
/* Enabling "simd" also enables "fp".
Disabling "simd" also disables "crypto". */
@@ -55,3 +55,7 @@ AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, 0, 0, "crc32")
/* Enabling or disabling "lse" only changes "lse". */
AARCH64_OPT_EXTENSION("lse", AARCH64_FL_LSE, 0, 0, "atomics")
+
+/* Enabling "fp16" also enables "fp".
+ Disabling "fp16" just disables "fp16". */
+AARCH64_OPT_EXTENSION("fp16", AARCH64_FL_F16, AARCH64_FL_FP, 0, "fp16")
--- /dev/null
+++ b/src/gcc/config/aarch64/aarch64-passes.def
@@ -0,0 +1,21 @@
+/* AArch64-specific passes declarations.
+ Copyright (C) 2016 Free Software Foundation, Inc.
+ Contributed by ARM Ltd.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with GCC; see the file COPYING3. If not see
+ . */
+
+INSERT_PASS_AFTER (pass_regrename, 1, pass_fma_steering);
--- a/src/gcc/config/aarch64/aarch64-protos.h
+++ b/src/gcc/config/aarch64/aarch64-protos.h
@@ -178,6 +178,25 @@ struct cpu_branch_cost
const int unpredictable; /* Unpredictable branch or optimizing for speed. */
};
+/* Control approximate alternatives to certain FP operators. */
+#define AARCH64_APPROX_MODE(MODE) \
+ ((MIN_MODE_FLOAT <= (MODE) && (MODE) <= MAX_MODE_FLOAT) \
+ ? (1 << ((MODE) - MIN_MODE_FLOAT)) \
+ : (MIN_MODE_VECTOR_FLOAT <= (MODE) && (MODE) <= MAX_MODE_VECTOR_FLOAT) \
+ ? (1 << ((MODE) - MIN_MODE_VECTOR_FLOAT \
+ + MAX_MODE_FLOAT - MIN_MODE_FLOAT + 1)) \
+ : (0))
+#define AARCH64_APPROX_NONE (0)
+#define AARCH64_APPROX_ALL (-1)
+
+/* Allowed modes for approximations. */
+struct cpu_approx_modes
+{
+ const unsigned int division; /* Division. */
+ const unsigned int sqrt; /* Square root. */
+ const unsigned int recip_sqrt; /* Reciprocal square root. */
+};
+
struct tune_params
{
const struct cpu_cost_table *insn_extra_cost;
@@ -185,6 +204,7 @@ struct tune_params
const struct cpu_regmove_cost *regmove_cost;
const struct cpu_vector_cost *vec_costs;
const struct cpu_branch_cost *branch_costs;
+ const struct cpu_approx_modes *approx_modes;
int memmov_cost;
int issue_rate;
unsigned int fusible_ops;
@@ -282,14 +302,14 @@ int aarch64_get_condition_code (rtx);
bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
int aarch64_branch_cost (bool, bool);
enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx);
-bool aarch64_cannot_change_mode_class (machine_mode,
- machine_mode,
- enum reg_class);
bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
bool aarch64_constant_address_p (rtx);
+bool aarch64_emit_approx_div (rtx, rtx, rtx);
+bool aarch64_emit_approx_sqrt (rtx, rtx, bool);
bool aarch64_expand_movmem (rtx *);
bool aarch64_float_const_zero_rtx_p (rtx);
bool aarch64_function_arg_regno_p (unsigned);
+bool aarch64_fusion_enabled_p (enum aarch64_fusion_pairs);
bool aarch64_gen_movmemqi (rtx *);
bool aarch64_gimple_fold_builtin (gimple_stmt_iterator *);
bool aarch64_is_extend_from_extract (machine_mode, rtx, rtx);
@@ -298,6 +318,7 @@ bool aarch64_is_noplt_call_p (rtx);
bool aarch64_label_mentioned_p (rtx);
void aarch64_declare_function_name (FILE *, const char*, tree);
bool aarch64_legitimate_pic_operand_p (rtx);
+bool aarch64_mask_and_shift_for_ubfiz_p (machine_mode, rtx, rtx);
bool aarch64_modes_tieable_p (machine_mode mode1,
machine_mode mode2);
bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
@@ -320,6 +341,7 @@ bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode);
bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
bool aarch64_simd_valid_immediate (rtx, machine_mode, bool,
struct simd_immediate_info *);
+bool aarch64_split_dimode_const_store (rtx, rtx);
bool aarch64_symbolic_address_p (rtx);
bool aarch64_uimm12_shift (HOST_WIDE_INT);
bool aarch64_use_return_insn_p (void);
@@ -335,11 +357,9 @@ machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned,
machine_mode);
int aarch64_hard_regno_mode_ok (unsigned, machine_mode);
int aarch64_hard_regno_nregs (unsigned, machine_mode);
-int aarch64_simd_attr_length_move (rtx_insn *);
int aarch64_uxt_size (int, HOST_WIDE_INT);
int aarch64_vec_fpconst_pow_of_2 (rtx);
rtx aarch64_eh_return_handler_rtx (void);
-rtx aarch64_legitimize_reload_address (rtx *, machine_mode, int, int, int);
rtx aarch64_mask_from_zextract_ops (rtx, rtx);
const char *aarch64_output_move_struct (rtx *operands);
rtx aarch64_return_addr (int, rtx);
@@ -352,7 +372,6 @@ unsigned aarch64_dbx_register_number (unsigned);
unsigned aarch64_trampoline_size (void);
void aarch64_asm_output_labelref (FILE *, const char *);
void aarch64_cpu_cpp_builtins (cpp_reader *);
-void aarch64_elf_asm_named_section (const char *, unsigned, tree);
const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *);
const char * aarch64_output_probe_stack_range (rtx, rtx);
void aarch64_err_no_fpadvsimd (machine_mode, const char *);
@@ -369,7 +388,6 @@ void aarch64_register_pragmas (void);
void aarch64_relayout_simd_types (void);
void aarch64_reset_previous_fndecl (void);
void aarch64_save_restore_target_globals (tree);
-void aarch64_emit_approx_rsqrt (rtx, rtx);
/* Initialize builtins for SIMD intrinsics. */
void init_aarch64_simd_builtins (void);
@@ -436,7 +454,6 @@ int aarch64_ccmp_mode_to_code (enum machine_mode mode);
bool extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset);
bool aarch64_operands_ok_for_ldpstp (rtx *, bool, enum machine_mode);
bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, enum machine_mode);
-extern bool aarch64_nopcrelative_literal_loads;
extern void aarch64_asm_output_pool_epilogue (FILE *, const char *,
tree, HOST_WIDE_INT);
@@ -450,4 +467,6 @@ enum aarch64_parse_opt_result aarch64_parse_extension (const char *,
std::string aarch64_get_extension_string_for_isa_flags (unsigned long,
unsigned long);
+rtl_opt_pass *make_pass_fma_steering (gcc::context *ctxt);
+
#endif /* GCC_AARCH64_PROTOS_H */
--- a/src/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/src/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -40,9 +40,10 @@
10 - CODE_FOR_. */
BUILTIN_VDC (COMBINE, combine, 0)
+ VAR1 (COMBINEP, combine, 0, di)
BUILTIN_VB (BINOP, pmul, 0)
- BUILTIN_VALLF (BINOP, fmulx, 0)
- BUILTIN_VDQF_DF (UNOP, sqrt, 2)
+ BUILTIN_VHSDF_HSDF (BINOP, fmulx, 0)
+ BUILTIN_VHSDF_DF (UNOP, sqrt, 2)
BUILTIN_VD_BHSI (BINOP, addp, 0)
VAR1 (UNOP, addp, 0, di)
BUILTIN_VDQ_BHSI (UNOP, clrsb, 2)
@@ -68,14 +69,23 @@
BUILTIN_VDC (GETREG, get_dregoi, 0)
BUILTIN_VDC (GETREG, get_dregci, 0)
BUILTIN_VDC (GETREG, get_dregxi, 0)
+ VAR1 (GETREGP, get_dregoi, 0, di)
+ VAR1 (GETREGP, get_dregci, 0, di)
+ VAR1 (GETREGP, get_dregxi, 0, di)
/* Implemented by aarch64_get_qreg. */
BUILTIN_VQ (GETREG, get_qregoi, 0)
BUILTIN_VQ (GETREG, get_qregci, 0)
BUILTIN_VQ (GETREG, get_qregxi, 0)
+ VAR1 (GETREGP, get_qregoi, 0, v2di)
+ VAR1 (GETREGP, get_qregci, 0, v2di)
+ VAR1 (GETREGP, get_qregxi, 0, v2di)
/* Implemented by aarch64_set_qreg. */
BUILTIN_VQ (SETREG, set_qregoi, 0)
BUILTIN_VQ (SETREG, set_qregci, 0)
BUILTIN_VQ (SETREG, set_qregxi, 0)
+ VAR1 (SETREGP, set_qregoi, 0, v2di)
+ VAR1 (SETREGP, set_qregci, 0, v2di)
+ VAR1 (SETREGP, set_qregxi, 0, v2di)
/* Implemented by aarch64_ld. */
BUILTIN_VDC (LOADSTRUCT, ld2, 0)
BUILTIN_VDC (LOADSTRUCT, ld3, 0)
@@ -224,6 +234,7 @@
BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0)
BUILTIN_VSDQ_I_DI (USHIFTACC, usri_n, 0)
BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0)
+ VAR2 (SHIFTINSERTP, ssli_n, 0, di, v2di)
BUILTIN_VSDQ_I_DI (USHIFTACC, usli_n, 0)
/* Implemented by aarch64_qshl_n. */
BUILTIN_VSDQ_I (SHIFTIMM_USS, sqshlu_n, 0)
@@ -234,105 +245,145 @@
BUILTIN_VALL (UNOP, reduc_plus_scal_, 10)
/* Implemented by reduc__scal_ (producing scalar). */
- BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10)
- BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10)
+ BUILTIN_VDQIF_F16 (UNOP, reduc_smax_scal_, 10)
+ BUILTIN_VDQIF_F16 (UNOP, reduc_smin_scal_, 10)
BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10)
BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10)
- BUILTIN_VDQF (UNOP, reduc_smax_nan_scal_, 10)
- BUILTIN_VDQF (UNOP, reduc_smin_nan_scal_, 10)
+ BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10)
+ BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10)
- /* Implemented by 3.
+ /* Implemented by 3.
smax variants map to fmaxnm,
smax_nan variants map to fmax. */
BUILTIN_VDQ_BHSI (BINOP, smax, 3)
BUILTIN_VDQ_BHSI (BINOP, smin, 3)
BUILTIN_VDQ_BHSI (BINOP, umax, 3)
BUILTIN_VDQ_BHSI (BINOP, umin, 3)
- BUILTIN_VDQF (BINOP, smax_nan, 3)
- BUILTIN_VDQF (BINOP, smin_nan, 3)
+ BUILTIN_VHSDF_DF (BINOP, smax_nan, 3)
+ BUILTIN_VHSDF_DF (BINOP, smin_nan, 3)
- /* Implemented by 3. */
- BUILTIN_VDQF (BINOP, fmax, 3)
- BUILTIN_VDQF (BINOP, fmin, 3)
+ /* Implemented by 3. */
+ BUILTIN_VHSDF_HSDF (BINOP, fmax, 3)
+ BUILTIN_VHSDF_HSDF (BINOP, fmin, 3)
/* Implemented by aarch64_p. */
BUILTIN_VDQ_BHSI (BINOP, smaxp, 0)
BUILTIN_VDQ_BHSI (BINOP, sminp, 0)
BUILTIN_VDQ_BHSI (BINOP, umaxp, 0)
BUILTIN_VDQ_BHSI (BINOP, uminp, 0)
- BUILTIN_VDQF (BINOP, smaxp, 0)
- BUILTIN_VDQF (BINOP, sminp, 0)
- BUILTIN_VDQF (BINOP, smax_nanp, 0)
- BUILTIN_VDQF (BINOP, smin_nanp, 0)
+ BUILTIN_VHSDF (BINOP, smaxp, 0)
+ BUILTIN_VHSDF (BINOP, sminp, 0)
+ BUILTIN_VHSDF (BINOP, smax_nanp, 0)
+ BUILTIN_VHSDF (BINOP, smin_nanp, 0)
/* Implemented by 2. */
- BUILTIN_VDQF (UNOP, btrunc, 2)
- BUILTIN_VDQF (UNOP, ceil, 2)
- BUILTIN_VDQF (UNOP, floor, 2)
- BUILTIN_VDQF (UNOP, nearbyint, 2)
- BUILTIN_VDQF (UNOP, rint, 2)
- BUILTIN_VDQF (UNOP, round, 2)
- BUILTIN_VDQF_DF (UNOP, frintn, 2)
+ BUILTIN_VHSDF (UNOP, btrunc, 2)
+ BUILTIN_VHSDF (UNOP, ceil, 2)
+ BUILTIN_VHSDF (UNOP, floor, 2)
+ BUILTIN_VHSDF (UNOP, nearbyint, 2)
+ BUILTIN_VHSDF (UNOP, rint, 2)
+ BUILTIN_VHSDF (UNOP, round, 2)
+ BUILTIN_VHSDF_DF (UNOP, frintn, 2)
+
+ VAR1 (UNOP, btrunc, 2, hf)
+ VAR1 (UNOP, ceil, 2, hf)
+ VAR1 (UNOP, floor, 2, hf)
+ VAR1 (UNOP, frintn, 2, hf)
+ VAR1 (UNOP, nearbyint, 2, hf)
+ VAR1 (UNOP, rint, 2, hf)
+ VAR1 (UNOP, round, 2, hf)
/* Implemented by l2. */
+ VAR1 (UNOP, lbtruncv4hf, 2, v4hi)
+ VAR1 (UNOP, lbtruncv8hf, 2, v8hi)
VAR1 (UNOP, lbtruncv2sf, 2, v2si)
VAR1 (UNOP, lbtruncv4sf, 2, v4si)
VAR1 (UNOP, lbtruncv2df, 2, v2di)
+ VAR1 (UNOPUS, lbtruncuv4hf, 2, v4hi)
+ VAR1 (UNOPUS, lbtruncuv8hf, 2, v8hi)
VAR1 (UNOPUS, lbtruncuv2sf, 2, v2si)
VAR1 (UNOPUS, lbtruncuv4sf, 2, v4si)
VAR1 (UNOPUS, lbtruncuv2df, 2, v2di)
+ VAR1 (UNOP, lroundv4hf, 2, v4hi)
+ VAR1 (UNOP, lroundv8hf, 2, v8hi)
VAR1 (UNOP, lroundv2sf, 2, v2si)
VAR1 (UNOP, lroundv4sf, 2, v4si)
VAR1 (UNOP, lroundv2df, 2, v2di)
- /* Implemented by l2. */
+ /* Implemented by l2. */
+ BUILTIN_GPI_I16 (UNOP, lroundhf, 2)
VAR1 (UNOP, lroundsf, 2, si)
VAR1 (UNOP, lrounddf, 2, di)
+ VAR1 (UNOPUS, lrounduv4hf, 2, v4hi)
+ VAR1 (UNOPUS, lrounduv8hf, 2, v8hi)
VAR1 (UNOPUS, lrounduv2sf, 2, v2si)
VAR1 (UNOPUS, lrounduv4sf, 2, v4si)
VAR1 (UNOPUS, lrounduv2df, 2, v2di)
+ BUILTIN_GPI_I16 (UNOPUS, lrounduhf, 2)
VAR1 (UNOPUS, lroundusf, 2, si)
VAR1 (UNOPUS, lroundudf, 2, di)
+ VAR1 (UNOP, lceilv4hf, 2, v4hi)
+ VAR1 (UNOP, lceilv8hf, 2, v8hi)
VAR1 (UNOP, lceilv2sf, 2, v2si)
VAR1 (UNOP, lceilv4sf, 2, v4si)
VAR1 (UNOP, lceilv2df, 2, v2di)
+ BUILTIN_GPI_I16 (UNOP, lceilhf, 2)
+ VAR1 (UNOPUS, lceiluv4hf, 2, v4hi)
+ VAR1 (UNOPUS, lceiluv8hf, 2, v8hi)
VAR1 (UNOPUS, lceiluv2sf, 2, v2si)
VAR1 (UNOPUS, lceiluv4sf, 2, v4si)
VAR1 (UNOPUS, lceiluv2df, 2, v2di)
+ BUILTIN_GPI_I16 (UNOPUS, lceiluhf, 2)
VAR1 (UNOPUS, lceilusf, 2, si)
VAR1 (UNOPUS, lceiludf, 2, di)
+ VAR1 (UNOP, lfloorv4hf, 2, v4hi)
+ VAR1 (UNOP, lfloorv8hf, 2, v8hi)
VAR1 (UNOP, lfloorv2sf, 2, v2si)
VAR1 (UNOP, lfloorv4sf, 2, v4si)
VAR1 (UNOP, lfloorv2df, 2, v2di)
+ BUILTIN_GPI_I16 (UNOP, lfloorhf, 2)
+ VAR1 (UNOPUS, lflooruv4hf, 2, v4hi)
+ VAR1 (UNOPUS, lflooruv8hf, 2, v8hi)
VAR1 (UNOPUS, lflooruv2sf, 2, v2si)
VAR1 (UNOPUS, lflooruv4sf, 2, v4si)
VAR1 (UNOPUS, lflooruv2df, 2, v2di)
+ BUILTIN_GPI_I16 (UNOPUS, lflooruhf, 2)
VAR1 (UNOPUS, lfloorusf, 2, si)
VAR1 (UNOPUS, lfloorudf, 2, di)
+ VAR1 (UNOP, lfrintnv4hf, 2, v4hi)
+ VAR1 (UNOP, lfrintnv8hf, 2, v8hi)
VAR1 (UNOP, lfrintnv2sf, 2, v2si)
VAR1 (UNOP, lfrintnv4sf, 2, v4si)
VAR1 (UNOP, lfrintnv2df, 2, v2di)
+ BUILTIN_GPI_I16 (UNOP, lfrintnhf, 2)
VAR1 (UNOP, lfrintnsf, 2, si)
VAR1 (UNOP, lfrintndf, 2, di)
+ VAR1 (UNOPUS, lfrintnuv4hf, 2, v4hi)
+ VAR1 (UNOPUS, lfrintnuv8hf, 2, v8hi)
VAR1 (UNOPUS, lfrintnuv2sf, 2, v2si)
VAR1 (UNOPUS, lfrintnuv4sf, 2, v4si)
VAR1 (UNOPUS, lfrintnuv2df, 2, v2di)
+ BUILTIN_GPI_I16 (UNOPUS, lfrintnuhf, 2)
VAR1 (UNOPUS, lfrintnusf, 2, si)
VAR1 (UNOPUS, lfrintnudf, 2, di)
/* Implemented by 2. */
+ VAR1 (UNOP, floatv4hi, 2, v4hf)
+ VAR1 (UNOP, floatv8hi, 2, v8hf)
VAR1 (UNOP, floatv2si, 2, v2sf)
VAR1 (UNOP, floatv4si, 2, v4sf)
VAR1 (UNOP, floatv2di, 2, v2df)
+ VAR1 (UNOP, floatunsv4hi, 2, v4hf)
+ VAR1 (UNOP, floatunsv8hi, 2, v8hf)
VAR1 (UNOP, floatunsv2si, 2, v2sf)
VAR1 (UNOP, floatunsv4si, 2, v4sf)
VAR1 (UNOP, floatunsv2di, 2, v2df)
@@ -352,19 +403,19 @@
/* Implemented by
aarch64_frecp. */
- BUILTIN_GPF (UNOP, frecpe, 0)
- BUILTIN_GPF (BINOP, frecps, 0)
- BUILTIN_GPF (UNOP, frecpx, 0)
+ BUILTIN_GPF_F16 (UNOP, frecpe, 0)
+ BUILTIN_GPF_F16 (UNOP, frecpx, 0)
BUILTIN_VDQ_SI (UNOP, urecpe, 0)
- BUILTIN_VDQF (UNOP, frecpe, 0)
- BUILTIN_VDQF (BINOP, frecps, 0)
+ BUILTIN_VHSDF (UNOP, frecpe, 0)
+ BUILTIN_VHSDF_HSDF (BINOP, frecps, 0)
/* Implemented by a mixture of abs2 patterns. Note the DImode builtin is
only ever used for the int64x1_t intrinsic, there is no scalar version. */
BUILTIN_VSDQ_I_DI (UNOP, abs, 0)
- BUILTIN_VDQF (UNOP, abs, 2)
+ BUILTIN_VHSDF (UNOP, abs, 2)
+ VAR1 (UNOP, abs, 2, hf)
BUILTIN_VQ_HSF (UNOP, vec_unpacks_hi_, 10)
VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
@@ -376,15 +427,22 @@
/* Implemented by aarch64_ld1. */
BUILTIN_VALL_F16 (LOAD1, ld1, 0)
+ VAR1(STORE1P, ld1, 0, v2di)
/* Implemented by aarch64_st1. */
BUILTIN_VALL_F16 (STORE1, st1, 0)
+ VAR1(STORE1P, st1, 0, v2di)
/* Implemented by fma4. */
- BUILTIN_VDQF (TERNOP, fma, 4)
+ BUILTIN_VHSDF (TERNOP, fma, 4)
+ VAR1 (TERNOP, fma, 4, hf)
+ /* Implemented by fnma4. */
+ BUILTIN_VHSDF (TERNOP, fnma, 4)
+ VAR1 (TERNOP, fnma, 4, hf)
/* Implemented by aarch64_simd_bsl. */
BUILTIN_VDQQH (BSL_P, simd_bsl, 0)
+ VAR2 (BSL_P, simd_bsl,0, di, v2di)
BUILTIN_VSDQ_I_DI (BSL_U, simd_bsl, 0)
BUILTIN_VALLDIF (BSL_S, simd_bsl, 0)
@@ -436,7 +494,7 @@
VAR1 (TERNOP, qtbx4, 0, v8qi)
VAR1 (TERNOP, qtbx4, 0, v16qi)
- /* Builtins for ARMv8.1 Adv.SIMD instructions. */
+ /* Builtins for ARMv8.1-A Adv.SIMD instructions. */
/* Implemented by aarch64_sqrdmlh. */
BUILTIN_VSDQ_HSI (TERNOP, sqrdmlah, 0)
@@ -449,3 +507,60 @@
/* Implemented by aarch64_sqrdmlh_laneq. */
BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlah_laneq, 0)
BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_laneq, 0)
+
+ /* Implemented by <*><*>3. */
+ BUILTIN_VSDQ_HSDI (SHIFTIMM, scvtf, 3)
+ BUILTIN_VSDQ_HSDI (FCVTIMM_SUS, ucvtf, 3)
+ BUILTIN_VHSDF_HSDF (SHIFTIMM, fcvtzs, 3)
+ BUILTIN_VHSDF_HSDF (SHIFTIMM_USS, fcvtzu, 3)
+ VAR1 (SHIFTIMM, scvtfsi, 3, hf)
+ VAR1 (SHIFTIMM, scvtfdi, 3, hf)
+ VAR1 (FCVTIMM_SUS, ucvtfsi, 3, hf)
+ VAR1 (FCVTIMM_SUS, ucvtfdi, 3, hf)
+ BUILTIN_GPI (SHIFTIMM, fcvtzshf, 3)
+ BUILTIN_GPI (SHIFTIMM_USS, fcvtzuhf, 3)
+
+ /* Implemented by aarch64_rsqrte. */
+ BUILTIN_VHSDF_HSDF (UNOP, rsqrte, 0)
+
+ /* Implemented by aarch64_rsqrts. */
+ BUILTIN_VHSDF_HSDF (BINOP, rsqrts, 0)
+
+ /* Implemented by fabd3. */
+ BUILTIN_VHSDF_HSDF (BINOP, fabd, 3)
+
+ /* Implemented by aarch64_faddp. */
+ BUILTIN_VHSDF (BINOP, faddp, 0)
+
+ /* Implemented by aarch64_cm. */
+ BUILTIN_VHSDF_HSDF (BINOP_USS, cmeq, 0)
+ BUILTIN_VHSDF_HSDF (BINOP_USS, cmge, 0)
+ BUILTIN_VHSDF_HSDF (BINOP_USS, cmgt, 0)
+ BUILTIN_VHSDF_HSDF (BINOP_USS, cmle, 0)
+ BUILTIN_VHSDF_HSDF (BINOP_USS, cmlt, 0)
+
+ /* Implemented by neg2. */
+ BUILTIN_VHSDF_HSDF (UNOP, neg, 2)
+
+ /* Implemented by aarch64_fac. */
+ BUILTIN_VHSDF_HSDF (BINOP_USS, faclt, 0)
+ BUILTIN_VHSDF_HSDF (BINOP_USS, facle, 0)
+ BUILTIN_VHSDF_HSDF (BINOP_USS, facgt, 0)
+ BUILTIN_VHSDF_HSDF (BINOP_USS, facge, 0)
+
+ /* Implemented by sqrt2. */
+ VAR1 (UNOP, sqrt, 2, hf)
+
+ /* Implemented by hf2. */
+ VAR1 (UNOP, floatdi, 2, hf)
+ VAR1 (UNOP, floatsi, 2, hf)
+ VAR1 (UNOP, floathi, 2, hf)
+ VAR1 (UNOPUS, floatunsdi, 2, hf)
+ VAR1 (UNOPUS, floatunssi, 2, hf)
+ VAR1 (UNOPUS, floatunshi, 2, hf)
+ BUILTIN_GPI_I16 (UNOP, fix_trunchf, 2)
+ BUILTIN_GPI (UNOP, fix_truncsf, 2)
+ BUILTIN_GPI (UNOP, fix_truncdf, 2)
+ BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2)
+ BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2)
+ BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
\ No newline at end of file
--- a/src/gcc/config/aarch64/aarch64-simd.md
+++ b/src/gcc/config/aarch64/aarch64-simd.md
@@ -351,7 +351,7 @@
operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2])));
return "mul\\t%0., %3., %1.[%2]";
}
- [(set_attr "type" "neon_mul__scalar")]
+ [(set_attr "type" "neon_mul__scalar")]
)
(define_insn "*aarch64_mul3_elt_"
@@ -371,33 +371,33 @@
[(set_attr "type" "neon_mul__scalar")]
)
-(define_insn "*aarch64_mul3_elt_to_128df"
- [(set (match_operand:V2DF 0 "register_operand" "=w")
- (mult:V2DF
- (vec_duplicate:V2DF
- (match_operand:DF 2 "register_operand" "w"))
- (match_operand:V2DF 1 "register_operand" "w")))]
+(define_insn "*aarch64_mul3_elt_from_dup"
+ [(set (match_operand:VMUL 0 "register_operand" "=w")
+ (mult:VMUL
+ (vec_duplicate:VMUL
+ (match_operand: 1 "register_operand" ""))
+ (match_operand:VMUL 2 "register_operand" "w")))]
"TARGET_SIMD"
- "fmul\\t%0.2d, %1.2d, %2.d[0]"
- [(set_attr "type" "neon_fp_mul_d_scalar_q")]
+ "mul\t%0., %2., %1.[0]";
+ [(set_attr "type" "neon_mul__scalar")]
)
-(define_insn "aarch64_rsqrte_2"
- [(set (match_operand:VALLF 0 "register_operand" "=w")
- (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
+(define_insn "aarch64_rsqrte"
+ [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
+ (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
UNSPEC_RSQRTE))]
"TARGET_SIMD"
"frsqrte\\t%0, %1"
- [(set_attr "type" "neon_fp_rsqrte_")])
+ [(set_attr "type" "neon_fp_rsqrte_")])
-(define_insn "aarch64_rsqrts_3"
- [(set (match_operand:VALLF 0 "register_operand" "=w")
- (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
- (match_operand:VALLF 2 "register_operand" "w")]
- UNSPEC_RSQRTS))]
+(define_insn "aarch64_rsqrts"
+ [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
+ (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
+ (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
+ UNSPEC_RSQRTS))]
"TARGET_SIMD"
"frsqrts\\t%0, %1, %2"
- [(set_attr "type" "neon_fp_rsqrts_")])
+ [(set_attr "type" "neon_fp_rsqrts_")])
(define_expand "rsqrt2"
[(set (match_operand:VALLF 0 "register_operand" "=w")
@@ -405,7 +405,7 @@
UNSPEC_RSQRT))]
"TARGET_SIMD"
{
- aarch64_emit_approx_rsqrt (operands[0], operands[1]);
+ aarch64_emit_approx_sqrt (operands[0], operands[1], true);
DONE;
})
@@ -474,24 +474,15 @@
[(set_attr "type" "neon_arith_acc")]
)
-(define_insn "fabd_3"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (abs:VDQF (minus:VDQF
- (match_operand:VDQF 1 "register_operand" "w")
- (match_operand:VDQF 2 "register_operand" "w"))))]
- "TARGET_SIMD"
- "fabd\t%0., %1., %2."
- [(set_attr "type" "neon_fp_abd_")]
-)
-
-(define_insn "*fabd_scalar3"
- [(set (match_operand:GPF 0 "register_operand" "=w")
- (abs:GPF (minus:GPF
- (match_operand:GPF 1 "register_operand" "w")
- (match_operand:GPF 2 "register_operand" "w"))))]
+(define_insn "fabd3"
+ [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
+ (abs:VHSDF_HSDF
+ (minus:VHSDF_HSDF
+ (match_operand:VHSDF_HSDF 1 "register_operand" "w")
+ (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
"TARGET_SIMD"
- "fabd\t%0, %1, %2"
- [(set_attr "type" "neon_fp_abd_")]
+ "fabd\t%0, %1, %2"
+ [(set_attr "type" "neon_fp_abd_")]
)
(define_insn "and3"
@@ -555,6 +546,49 @@
[(set_attr "type" "neon_from_gp, neon_ins, neon_load1_1reg")]
)
+(define_insn "*aarch64_simd_vec_copy_lane"
+ [(set (match_operand:VALL 0 "register_operand" "=w")
+ (vec_merge:VALL
+ (vec_duplicate:VALL
+ (vec_select:
+ (match_operand:VALL 3 "register_operand" "w")
+ (parallel
+ [(match_operand:SI 4 "immediate_operand" "i")])))
+ (match_operand:VALL 1 "register_operand" "0")
+ (match_operand:SI 2 "immediate_operand" "i")))]
+ "TARGET_SIMD"
+ {
+ int elt = ENDIAN_LANE_N (mode, exact_log2 (INTVAL (operands[2])));
+ operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
+ operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4])));
+
+ return "ins\t%0.[%p2], %3.[%4]";
+ }
+ [(set_attr "type" "neon_ins")]
+)
+
+(define_insn "*aarch64_simd_vec_copy_lane_"
+ [(set (match_operand:VALL 0 "register_operand" "=w")
+ (vec_merge:VALL
+ (vec_duplicate:VALL
+ (vec_select:
+ (match_operand: 3 "register_operand" "w")
+ (parallel
+ [(match_operand:SI 4 "immediate_operand" "i")])))
+ (match_operand:VALL 1 "register_operand" "0")
+ (match_operand:SI 2 "immediate_operand" "i")))]
+ "TARGET_SIMD"
+ {
+ int elt = ENDIAN_LANE_N (mode, exact_log2 (INTVAL (operands[2])));
+ operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
+ operands[4] = GEN_INT (ENDIAN_LANE_N (mode,
+ INTVAL (operands[4])));
+
+ return "ins\t%0.[%p2], %3.[%4]";
+ }
+ [(set_attr "type" "neon_ins")]
+)
+
(define_insn "aarch64_simd_lshr"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
@@ -1071,10 +1105,10 @@
;; Pairwise FP Max/Min operations.
(define_insn "aarch64_p"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
- (match_operand:VDQF 2 "register_operand" "w")]
- FMAXMINV))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
+ (match_operand:VHSDF 2 "register_operand" "w")]
+ FMAXMINV))]
"TARGET_SIMD"
"p\t%0., %1., %2."
[(set_attr "type" "neon_minmax")]
@@ -1483,65 +1517,77 @@
;; FP arithmetic operations.
(define_insn "add3"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (plus:VDQF (match_operand:VDQF 1 "register_operand" "w")
- (match_operand:VDQF 2 "register_operand" "w")))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
+ (match_operand:VHSDF 2 "register_operand" "w")))]
"TARGET_SIMD"
"fadd\\t%0., %1., %2."
- [(set_attr "type" "neon_fp_addsub_")]
+ [(set_attr "type" "neon_fp_addsub_")]
)
(define_insn "sub3"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (minus:VDQF (match_operand:VDQF 1 "register_operand" "w")
- (match_operand:VDQF 2 "register_operand" "w")))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
+ (match_operand:VHSDF 2 "register_operand" "w")))]
"TARGET_SIMD"
"fsub\\t%0., %1., %2."
- [(set_attr "type" "neon_fp_addsub_")]
+ [(set_attr "type" "neon_fp_addsub_")]
)
(define_insn "mul3"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (mult:VDQF (match_operand:VDQF 1 "register_operand" "w")
- (match_operand:VDQF 2 "register_operand" "w")))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
+ (match_operand:VHSDF 2 "register_operand" "w")))]
"TARGET_SIMD"
"fmul\\t%0., %1., %2."
- [(set_attr "type" "neon_fp_mul_")]
+ [(set_attr "type" "neon_fp_mul_")]
)
-(define_insn "div3"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (div:VDQF (match_operand:VDQF 1 "register_operand" "w")
- (match_operand:VDQF 2 "register_operand" "w")))]
+(define_expand "div3"
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
+ (match_operand:VHSDF 2 "register_operand" "w")))]
+ "TARGET_SIMD"
+{
+ if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
+ DONE;
+
+ operands[1] = force_reg (mode, operands[1]);
+})
+
+(define_insn "*div3"
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
+ (match_operand:VHSDF 2 "register_operand" "w")))]
"TARGET_SIMD"
"fdiv\\t%0., %1., %2."
- [(set_attr "type" "neon_fp_div_")]
+ [(set_attr "type" "neon_fp_div_")]
)
(define_insn "neg2"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
"TARGET_SIMD"
"fneg\\t%0., %1."
- [(set_attr "type" "neon_fp_neg_")]
+ [(set_attr "type" "neon_fp_neg_")]
)
(define_insn "abs2"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
"TARGET_SIMD"
"fabs\\t%0., %1."
- [(set_attr "type" "neon_fp_abs_")]
+ [(set_attr "type" "neon_fp_abs_")]
)
(define_insn "fma4"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (fma:VDQF (match_operand:VDQF 1 "register_operand" "w")
- (match_operand:VDQF 2 "register_operand" "w")
- (match_operand:VDQF 3 "register_operand" "0")))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
+ (match_operand:VHSDF 2 "register_operand" "w")
+ (match_operand:VHSDF 3 "register_operand" "0")))]
"TARGET_SIMD"
"fmla\\t%0., %1., %2."
- [(set_attr "type" "neon_fp_mla_")]
+ [(set_attr "type" "neon_fp_mla_")]
)
(define_insn "*aarch64_fma4_elt"
@@ -1579,16 +1625,16 @@
[(set_attr "type" "neon_fp_mla__scalar")]
)
-(define_insn "*aarch64_fma4_elt_to_128df"
- [(set (match_operand:V2DF 0 "register_operand" "=w")
- (fma:V2DF
- (vec_duplicate:V2DF
- (match_operand:DF 1 "register_operand" "w"))
- (match_operand:V2DF 2 "register_operand" "w")
- (match_operand:V2DF 3 "register_operand" "0")))]
+(define_insn "*aarch64_fma4_elt_from_dup"
+ [(set (match_operand:VMUL 0 "register_operand" "=w")
+ (fma:VMUL
+ (vec_duplicate:VMUL
+ (match_operand: 1 "register_operand" "w"))
+ (match_operand:VMUL 2 "register_operand" "w")
+ (match_operand:VMUL 3 "register_operand" "0")))]
"TARGET_SIMD"
- "fmla\\t%0.2d, %2.2d, %1.2d[0]"
- [(set_attr "type" "neon_fp_mla_d_scalar_q")]
+ "fmla\t%0., %2., %1.[0]"
+ [(set_attr "type" "neon_mla__scalar")]
)
(define_insn "*aarch64_fma4_elt_to_64v2df"
@@ -1608,15 +1654,15 @@
)
(define_insn "fnma4"
- [(set (match_operand:VDQF 0 "register_operand" "=w")
- (fma:VDQF
- (match_operand:VDQF 1 "register_operand" "w")
- (neg:VDQF
- (match_operand:VDQF 2 "register_operand" "w"))
- (match_operand:VDQF 3 "register_operand" "0")))]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+ (fma:VHSDF
+ (match_operand:VHSDF 1 "register_operand" "w")
+ (neg:VHSDF
+ (match_operand:VHSDF 2 "register_operand" "w"))
+ (match_operand:VHSDF 3 "register_operand" "0")))]
"TARGET_SIMD"
- "fmls\\t%0., %1., %2."
- [(set_attr "type" "neon_fp_mla_")]
+ "fmls\\t%0., %1., %2."
+ [(set_attr "type" "neon_fp_mla_")]
)
(define_insn "*aarch64_fnma4_elt"
@@ -1656,17 +1702,17 @@
[(set_attr "type" "neon_fp_mla__scalar")]
)
-(define_insn "*aarch64_fnma4_elt_to_128df"
- [(set (match_operand:V2DF 0 "register_operand" "=w")
- (fma:V2DF
- (neg:V2DF
- (match_operand:V2DF 2 "register_operand" "w"))
- (vec_duplicate:V2DF
- (match_operand:DF 1 "register_operand" "w"))
- (match_operand:V2DF 3 "register_operand" "0")))]
+(define_insn "*aarch64_fnma4_elt_from_dup"
+ [(set (match_operand:VMUL 0 "register_operand" "=w")
+ (fma:VMUL
+ (neg:VMUL
+ (match_operand:VMUL 2 "register_operand" "w"))
+ (vec_duplicate:VMUL
+ (match_operand: 1 "register_operand" "w"))
+ (match_operand:VMUL 3 "register_operand" "0")))]
"TARGET_SIMD"
- "fmls\\t%0.2d, %2.2d, %1.2d[0]"
- [(set_attr "type" "neon_fp_mla_d_scalar_q")]
+ "fmls\t%0., %2., %1.[0]"
+ [(set_attr "type" "neon_mla__scalar")]
)
(define_insn "*aarch64_fnma4_elt_to_64v2df"
@@ -1689,24 +1735,50 @@
;; Vector versions of the floating-point frint patterns.
;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
(define_insn "