# DP: Changes for the Linaro 4.9-2014.07 release. LANG=C svn diff svn://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@212635 \ svn://gcc.gnu.org/svn/gcc/branches/linaro/gcc-4_9-branch@212977 \ | filterdiff --remove-timestamps --addoldprefix=a/src/ --addnewprefix=b/src/ --- a/src/libitm/ChangeLog.linaro +++ b/src/libitm/ChangeLog.linaro @@ -0,0 +1,28 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-26 Yvan Roux + + Backport from trunk r210615. + 2014-05-19 Richard Henderson + + * config/aarch64/sjlj.S: New file. + * config/aarch64/target.h: New file. + * configure.tgt: Enable aarch64. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/libgomp/ChangeLog.linaro +++ b/src/libgomp/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/libquadmath/ChangeLog.linaro +++ b/src/libquadmath/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/libsanitizer/ChangeLog.linaro +++ b/src/libsanitizer/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/zlib/ChangeLog.linaro +++ b/src/zlib/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/libstdc++-v3/ChangeLog.linaro +++ b/src/libstdc++-v3/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/intl/ChangeLog.linaro +++ b/src/intl/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/ChangeLog.linaro +++ b/src/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/boehm-gc/ChangeLog.linaro +++ b/src/boehm-gc/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/include/ChangeLog.linaro +++ b/src/include/ChangeLog.linaro @@ -0,0 +1,26 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-23 Yvan Roux + + Backport from trunk r209649. + 2014-04-22 Yufeng Zhang + + * longlong.h: Merge from glibc. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/include/longlong.h +++ b/src/include/longlong.h @@ -1,5 +1,5 @@ /* longlong.h -- definitions for mixed size 32/64 bit arithmetic. - Copyright (C) 1991-2013 Free Software Foundation, Inc. + Copyright (C) 1991-2014 Free Software Foundation, Inc. This file is part of the GNU C Library. @@ -122,6 +122,22 @@ #define __AND_CLOBBER_CC , "cc" #endif /* __GNUC__ < 2 */ +#if defined (__aarch64__) + +#if W_TYPE_SIZE == 32 +#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X)) +#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X)) +#define COUNT_LEADING_ZEROS_0 32 +#endif /* W_TYPE_SIZE == 32 */ + +#if W_TYPE_SIZE == 64 +#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clzll (X)) +#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctzll (X)) +#define COUNT_LEADING_ZEROS_0 64 +#endif /* W_TYPE_SIZE == 64 */ + +#endif /* __aarch64__ */ + #if defined (__alpha) && W_TYPE_SIZE == 64 #define umul_ppmm(ph, pl, m0, m1) \ do { \ --- a/src/libiberty/ChangeLog.linaro +++ b/src/libiberty/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/lto-plugin/ChangeLog.linaro +++ b/src/lto-plugin/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/contrib/regression/ChangeLog.linaro +++ b/src/contrib/regression/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/contrib/ChangeLog.linaro +++ b/src/contrib/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/contrib/reghunt/ChangeLog.linaro +++ b/src/contrib/reghunt/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/libatomic/ChangeLog.linaro +++ b/src/libatomic/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/config/ChangeLog.linaro +++ b/src/config/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/libbacktrace/ChangeLog.linaro +++ b/src/libbacktrace/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/libjava/libltdl/ChangeLog.linaro +++ b/src/libjava/libltdl/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/libjava/ChangeLog.linaro +++ b/src/libjava/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/libjava/classpath/ChangeLog.linaro +++ b/src/libjava/classpath/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/gnattools/ChangeLog.linaro +++ b/src/gnattools/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/maintainer-scripts/ChangeLog.linaro +++ b/src/maintainer-scripts/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/libgcc/ChangeLog.linaro +++ b/src/libgcc/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/libgcc/config/libbid/ChangeLog.linaro +++ b/src/libgcc/config/libbid/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/libdecnumber/ChangeLog.linaro +++ b/src/libdecnumber/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/gcc/LINARO-VERSION +++ b/src/gcc/LINARO-VERSION @@ -0,0 +1 @@ +4.9-2014.07-1~dev --- a/src/gcc/c-family/ChangeLog.linaro +++ b/src/gcc/c-family/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/gcc/java/ChangeLog.linaro +++ b/src/gcc/java/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/gcc/c/c-parser.c +++ b/src/gcc/c/c-parser.c @@ -4210,7 +4210,8 @@ init.original_type = NULL; c_parser_error (parser, "expected identifier"); c_parser_skip_until_found (parser, CPP_COMMA, NULL); - process_init_element (init, false, braced_init_obstack); + process_init_element (input_location, init, false, + braced_init_obstack); return; } } @@ -4342,7 +4343,8 @@ init.original_type = NULL; c_parser_error (parser, "expected %<=%>"); c_parser_skip_until_found (parser, CPP_COMMA, NULL); - process_init_element (init, false, braced_init_obstack); + process_init_element (input_location, init, false, + braced_init_obstack); return; } } @@ -4363,11 +4365,12 @@ { struct c_expr init; gcc_assert (!after || c_dialect_objc ()); + location_t loc = c_parser_peek_token (parser)->location; + if (c_parser_next_token_is (parser, CPP_OPEN_BRACE) && !after) init = c_parser_braced_init (parser, NULL_TREE, true); else { - location_t loc = c_parser_peek_token (parser)->location; init = c_parser_expr_no_commas (parser, after); if (init.value != NULL_TREE && TREE_CODE (init.value) != STRING_CST @@ -4374,7 +4377,7 @@ && TREE_CODE (init.value) != COMPOUND_LITERAL_EXPR) init = convert_lvalue_to_rvalue (loc, init, true, true); } - process_init_element (init, false, braced_init_obstack); + process_init_element (loc, init, false, braced_init_obstack); } /* Parse a compound statement (possibly a function body) (C90 6.6.2, --- a/src/gcc/c/c-typeck.c +++ b/src/gcc/c/c-typeck.c @@ -102,8 +102,8 @@ static char *print_spelling (char *); static void warning_init (int, const char *); static tree digest_init (location_t, tree, tree, tree, bool, bool, int); -static void output_init_element (tree, tree, bool, tree, tree, int, bool, - struct obstack *); +static void output_init_element (location_t, tree, tree, bool, tree, tree, int, + bool, struct obstack *); static void output_pending_init_elements (int, struct obstack *); static int set_designator (int, struct obstack *); static void push_range_stack (tree, struct obstack *); @@ -7183,13 +7183,15 @@ if ((TREE_CODE (constructor_type) == RECORD_TYPE || TREE_CODE (constructor_type) == UNION_TYPE) && constructor_fields == 0) - process_init_element (pop_init_level (1, braced_init_obstack), + process_init_element (input_location, + pop_init_level (1, braced_init_obstack), true, braced_init_obstack); else if (TREE_CODE (constructor_type) == ARRAY_TYPE && constructor_max_index && tree_int_cst_lt (constructor_max_index, constructor_index)) - process_init_element (pop_init_level (1, braced_init_obstack), + process_init_element (input_location, + pop_init_level (1, braced_init_obstack), true, braced_init_obstack); else break; @@ -7389,10 +7391,9 @@ /* When we come to an explicit close brace, pop any inner levels that didn't have explicit braces. */ while (constructor_stack->implicit) - { - process_init_element (pop_init_level (1, braced_init_obstack), - true, braced_init_obstack); - } + process_init_element (input_location, + pop_init_level (1, braced_init_obstack), + true, braced_init_obstack); gcc_assert (!constructor_range_stack); } @@ -7570,10 +7571,9 @@ /* Designator list starts at the level of closest explicit braces. */ while (constructor_stack->implicit) - { - process_init_element (pop_init_level (1, braced_init_obstack), - true, braced_init_obstack); - } + process_init_element (input_location, + pop_init_level (1, braced_init_obstack), + true, braced_init_obstack); constructor_designated = 1; return 0; } @@ -8193,9 +8193,9 @@ existing initializer. */ static void -output_init_element (tree value, tree origtype, bool strict_string, tree type, - tree field, int pending, bool implicit, - struct obstack * braced_init_obstack) +output_init_element (location_t loc, tree value, tree origtype, + bool strict_string, tree type, tree field, int pending, + bool implicit, struct obstack * braced_init_obstack) { tree semantic_type = NULL_TREE; bool maybe_const = true; @@ -8293,8 +8293,8 @@ if (semantic_type) value = build1 (EXCESS_PRECISION_EXPR, semantic_type, value); - value = digest_init (input_location, type, value, origtype, npc, - strict_string, require_constant_value); + value = digest_init (loc, type, value, origtype, npc, strict_string, + require_constant_value); if (value == error_mark_node) { constructor_erroneous = 1; @@ -8421,8 +8421,8 @@ { if (tree_int_cst_equal (elt->purpose, constructor_unfilled_index)) - output_init_element (elt->value, elt->origtype, true, - TREE_TYPE (constructor_type), + output_init_element (input_location, elt->value, elt->origtype, + true, TREE_TYPE (constructor_type), constructor_unfilled_index, 0, false, braced_init_obstack); else if (tree_int_cst_lt (constructor_unfilled_index, @@ -8476,8 +8476,8 @@ if (tree_int_cst_equal (elt_bitpos, ctor_unfilled_bitpos)) { constructor_unfilled_fields = elt->purpose; - output_init_element (elt->value, elt->origtype, true, - TREE_TYPE (elt->purpose), + output_init_element (input_location, elt->value, elt->origtype, + true, TREE_TYPE (elt->purpose), elt->purpose, 0, false, braced_init_obstack); } @@ -8550,7 +8550,7 @@ existing initializer. */ void -process_init_element (struct c_expr value, bool implicit, +process_init_element (location_t loc, struct c_expr value, bool implicit, struct obstack * braced_init_obstack) { tree orig_value = value.value; @@ -8594,7 +8594,7 @@ if ((TREE_CODE (constructor_type) == RECORD_TYPE || TREE_CODE (constructor_type) == UNION_TYPE) && constructor_fields == 0) - process_init_element (pop_init_level (1, braced_init_obstack), + process_init_element (loc, pop_init_level (1, braced_init_obstack), true, braced_init_obstack); else if ((TREE_CODE (constructor_type) == ARRAY_TYPE || TREE_CODE (constructor_type) == VECTOR_TYPE) @@ -8601,7 +8601,7 @@ && constructor_max_index && tree_int_cst_lt (constructor_max_index, constructor_index)) - process_init_element (pop_init_level (1, braced_init_obstack), + process_init_element (loc, pop_init_level (1, braced_init_obstack), true, braced_init_obstack); else break; @@ -8679,7 +8679,7 @@ if (value.value) { push_member_name (constructor_fields); - output_init_element (value.value, value.original_type, + output_init_element (loc, value.value, value.original_type, strict_string, fieldtype, constructor_fields, 1, implicit, braced_init_obstack); @@ -8771,7 +8771,7 @@ if (value.value) { push_member_name (constructor_fields); - output_init_element (value.value, value.original_type, + output_init_element (loc, value.value, value.original_type, strict_string, fieldtype, constructor_fields, 1, implicit, braced_init_obstack); @@ -8823,7 +8823,7 @@ if (value.value) { push_array_bounds (tree_to_uhwi (constructor_index)); - output_init_element (value.value, value.original_type, + output_init_element (loc, value.value, value.original_type, strict_string, elttype, constructor_index, 1, implicit, braced_init_obstack); @@ -8858,7 +8858,7 @@ { if (TREE_CODE (value.value) == VECTOR_CST) elttype = TYPE_MAIN_VARIANT (constructor_type); - output_init_element (value.value, value.original_type, + output_init_element (loc, value.value, value.original_type, strict_string, elttype, constructor_index, 1, implicit, braced_init_obstack); @@ -8887,7 +8887,7 @@ else { if (value.value) - output_init_element (value.value, value.original_type, + output_init_element (loc, value.value, value.original_type, strict_string, constructor_type, NULL_TREE, 1, implicit, braced_init_obstack); @@ -8906,8 +8906,8 @@ while (constructor_stack != range_stack->stack) { gcc_assert (constructor_stack->implicit); - process_init_element (pop_init_level (1, - braced_init_obstack), + process_init_element (loc, + pop_init_level (1, braced_init_obstack), true, braced_init_obstack); } for (p = range_stack; @@ -8915,7 +8915,8 @@ p = p->prev) { gcc_assert (constructor_stack->implicit); - process_init_element (pop_init_level (1, braced_init_obstack), + process_init_element (loc, + pop_init_level (1, braced_init_obstack), true, braced_init_obstack); } --- a/src/gcc/c/c-tree.h +++ b/src/gcc/c/c-tree.h @@ -612,7 +612,8 @@ extern struct c_expr pop_init_level (int, struct obstack *); extern void set_init_index (tree, tree, struct obstack *); extern void set_init_label (tree, struct obstack *); -extern void process_init_element (struct c_expr, bool, struct obstack *); +extern void process_init_element (location_t, struct c_expr, bool, + struct obstack *); extern tree build_compound_literal (location_t, tree, tree, bool); extern void check_compound_literal_type (location_t, struct c_type_name *); extern tree c_start_case (location_t, location_t, tree); --- a/src/gcc/c/ChangeLog.linaro +++ b/src/gcc/c/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/gcc/configure +++ b/src/gcc/configure @@ -1677,7 +1677,8 @@ use sysroot as the system root during the build --with-sysroot[=DIR] search for usr/lib, usr/include, et al, within DIR --with-specs=SPECS add SPECS to driver command-line processing - --with-pkgversion=PKG Use PKG in the version string in place of "GCC" + --with-pkgversion=PKG Use PKG in the version string in place of "Linaro + GCC `cat $srcdir/LINARO-VERSION`" --with-bugurl=URL Direct users to URL to report a bug --with-multilib-list select multilibs (AArch64, SH and x86-64 only) --with-gnu-ld assume the C compiler uses GNU ld default=no @@ -7222,7 +7223,7 @@ *) PKGVERSION="($withval) " ;; esac else - PKGVERSION="(GCC) " + PKGVERSION="(Linaro GCC `cat $srcdir/LINARO-VERSION`) " fi @@ -17927,7 +17928,7 @@ lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 17930 "configure" +#line 17931 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -18033,7 +18034,7 @@ lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 18036 "configure" +#line 18037 "configure" #include "confdefs.h" #if HAVE_DLFCN_H --- a/src/gcc/objc/ChangeLog.linaro +++ b/src/gcc/objc/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/gcc/ChangeLog.linaro +++ b/src/gcc/ChangeLog.linaro @@ -0,0 +1,2648 @@ +2014-07-20 Yvan Roux + + Revert: + 2014-07-16 Yvan Roux + + Backport from trunk r211129. + 2014-06-02 Ramana Radhakrishnan + + PR target/61154 + * config/arm/arm.h (TARGET_SUPPORTS_WIDE_INT): Define. + * config/arm/arm.md (mov64 splitter): Replace const_double_operand + with immediate_operand. + +2014-07-19 Yvan Roux + + * LINARO-VERSION: Bump version. + +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + * LINARO-VERSION: Update. + +2014-07-17 Yvan Roux + + Backport from trunk r211887, r211899. + 2014-06-23 James Greenhalgh + + * config/aarch64/aarch64.md (addsi3_aarch64): Set "simd" attr to + "yes" where needed. + + 2014-06-23 James Greenhalgh + + * config/aarch64/aarch64.md (*addsi3_aarch64): Add alternative in + vector registers. + +2014-07-17 Yvan Roux + + Backport from trunk r211440. + 2014-06-11 Kyrylo Tkachov + + * config.gcc (aarch64*-*-*): Add arm_acle.h to extra headers. + * Makefile.in (TEXI_GCC_FILES): Add aarch64-acle-intrinsics.texi to + dependencies. + * config/aarch64/aarch64-builtins.c (AARCH64_CRC32_BUILTINS): Define. + (aarch64_crc_builtin_datum): New struct. + (aarch64_crc_builtin_data): New. + (aarch64_init_crc32_builtins): New function. + (aarch64_init_builtins): Initialise CRC32 builtins when appropriate. + (aarch64_crc32_expand_builtin): New. + (aarch64_expand_builtin): Add CRC32 builtin expansion case. + * config/aarch64/aarch64.h (TARGET_CPU_CPP_BUILTINS): Define + __ARM_FEATURE_CRC32 when appropriate. + (TARGET_CRC32): Define. + * config/aarch64/aarch64.md (UNSPEC_CRC32B, UNSPEC_CRC32H, + UNSPEC_CRC32W, UNSPEC_CRC32X, UNSPEC_CRC32CB, UNSPEC_CRC32CH, + UNSPEC_CRC32CW, UNSPEC_CRC32CX): New unspec values. + (aarch64_): New pattern. + * config/aarch64/arm_acle.h: New file. + * config/aarch64/iterators.md (CRC): New int iterator. + (crc_variant, crc_mode): New int attributes. + * doc/aarch64-acle-intrinsics.texi: New file. + * doc/extend.texi (aarch64): Document aarch64 ACLE intrinsics. + Include aarch64-acle-intrinsics.texi. + +2014-07-17 Yvan Roux + + Backport from trunk r211174. + 2014-06-03 Alan Lawrence + + * config/aarch64/aarch64-simd.md (aarch64_rev): + New pattern. + * config/aarch64/aarch64.c (aarch64_evpc_rev): New function. + (aarch64_expand_vec_perm_const_1): Add call to aarch64_evpc_rev. + * config/aarch64/iterators.md (REVERSE): New iterator. + (UNSPEC_REV64, UNSPEC_REV32, UNSPEC_REV16): New enum elements. + (rev_op): New int_attribute. + * config/aarch64/arm_neon.h (vrev16_p8, vrev16_s8, vrev16_u8, + vrev16q_p8, vrev16q_s8, vrev16q_u8, vrev32_p8, vrev32_p16, vrev32_s8, + vrev32_s16, vrev32_u8, vrev32_u16, vrev32q_p8, vrev32q_p16, vrev32q_s8, + vrev32q_s16, vrev32q_u8, vrev32q_u16, vrev64_f32, vrev64_p8, + vrev64_p16, vrev64_s8, vrev64_s16, vrev64_s32, vrev64_u8, vrev64_u16, + vrev64_u32, vrev64q_f32, vrev64q_p8, vrev64q_p16, vrev64q_s8, + vrev64q_s16, vrev64q_s32, vrev64q_u8, vrev64q_u16, vrev64q_u32): + Replace temporary __asm__ with __builtin_shuffle. + +2014-07-17 Yvan Roux + + Backport from trunk r210216, r210218, r210219. + 2014-05-08 Ramana Radhakrishnan + + * config/arm/arm_neon.h: Update comment. + * config/arm/neon-docgen.ml: Delete. + * config/arm/neon-gen.ml: Delete. + * doc/arm-neon-intrinsics.texi: Update comment. + + 2014-05-08 Ramana Radhakrishnan + + * config/arm/arm_neon_builtins.def (vadd, vsub): Only define the v2sf + and v4sf versions. + (vand, vorr, veor, vorn, vbic): Remove. + * config/arm/neon.md (neon_vadd, neon_vsub, neon_vadd_unspec): Adjust + iterator. + (neon_vsub_unspec): Likewise. + (neon_vorr, neon_vand, neon_vbic, neon_veor, neon_vorn): Remove. + + 2014-05-08 Ramana Radhakrishnan + + * config/arm/arm_neon.h (vadd_s8): GNU C implementation + (vadd_s16): Likewise. + (vadd_s32): Likewise. + (vadd_f32): Likewise. + (vadd_u8): Likewise. + (vadd_u16): Likewise. + (vadd_u32): Likewise. + (vadd_s64): Likewise. + (vadd_u64): Likewise. + (vaddq_s8): Likewise. + (vaddq_s16): Likewise. + (vaddq_s32): Likewise. + (vaddq_s64): Likewise. + (vaddq_f32): Likewise. + (vaddq_u8): Likewise. + (vaddq_u16): Likewise. + (vaddq_u32): Likewise. + (vaddq_u64): Likewise. + (vmul_s8): Likewise. + (vmul_s16): Likewise. + (vmul_s32): Likewise. + (vmul_f32): Likewise. + (vmul_u8): Likewise. + (vmul_u16): Likewise. + (vmul_u32): Likewise. + (vmul_p8): Likewise. + (vmulq_s8): Likewise. + (vmulq_s16): Likewise. + (vmulq_s32): Likewise. + (vmulq_f32): Likewise. + (vmulq_u8): Likewise. + (vmulq_u16): Likewise. + (vmulq_u32): Likewise. + (vsub_s8): Likewise. + (vsub_s16): Likewise. + (vsub_s32): Likewise. + (vsub_f32): Likewise. + (vsub_u8): Likewise. + (vsub_u16): Likewise. + (vsub_u32): Likewise. + (vsub_s64): Likewise. + (vsub_u64): Likewise. + (vsubq_s8): Likewise. + (vsubq_s16): Likewise. + (vsubq_s32): Likewise. + (vsubq_s64): Likewise. + (vsubq_f32): Likewise. + (vsubq_u8): Likewise. + (vsubq_u16): Likewise. + (vsubq_u32): Likewise. + (vsubq_u64): Likewise. + (vand_s8): Likewise. + (vand_s16): Likewise. + (vand_s32): Likewise. + (vand_u8): Likewise. + (vand_u16): Likewise. + (vand_u32): Likewise. + (vand_s64): Likewise. + (vand_u64): Likewise. + (vandq_s8): Likewise. + (vandq_s16): Likewise. + (vandq_s32): Likewise. + (vandq_s64): Likewise. + (vandq_u8): Likewise. + (vandq_u16): Likewise. + (vandq_u32): Likewise. + (vandq_u64): Likewise. + (vorr_s8): Likewise. + (vorr_s16): Likewise. + (vorr_s32): Likewise. + (vorr_u8): Likewise. + (vorr_u16): Likewise. + (vorr_u32): Likewise. + (vorr_s64): Likewise. + (vorr_u64): Likewise. + (vorrq_s8): Likewise. + (vorrq_s16): Likewise. + (vorrq_s32): Likewise. + (vorrq_s64): Likewise. + (vorrq_u8): Likewise. + (vorrq_u16): Likewise. + (vorrq_u32): Likewise. + (vorrq_u64): Likewise. + (veor_s8): Likewise. + (veor_s16): Likewise. + (veor_s32): Likewise. + (veor_u8): Likewise. + (veor_u16): Likewise. + (veor_u32): Likewise. + (veor_s64): Likewise. + (veor_u64): Likewise. + (veorq_s8): Likewise. + (veorq_s16): Likewise. + (veorq_s32): Likewise. + (veorq_s64): Likewise. + (veorq_u8): Likewise. + (veorq_u16): Likewise. + (veorq_u32): Likewise. + (veorq_u64): Likewise. + (vbic_s8): Likewise. + (vbic_s16): Likewise. + (vbic_s32): Likewise. + (vbic_u8): Likewise. + (vbic_u16): Likewise. + (vbic_u32): Likewise. + (vbic_s64): Likewise. + (vbic_u64): Likewise. + (vbicq_s8): Likewise. + (vbicq_s16): Likewise. + (vbicq_s32): Likewise. + (vbicq_s64): Likewise. + (vbicq_u8): Likewise. + (vbicq_u16): Likewise. + (vbicq_u32): Likewise. + (vbicq_u64): Likewise. + (vorn_s8): Likewise. + (vorn_s16): Likewise. + (vorn_s32): Likewise. + (vorn_u8): Likewise. + (vorn_u16): Likewise. + (vorn_u32): Likewise. + (vorn_s64): Likewise. + (vorn_u64): Likewise. + (vornq_s8): Likewise. + (vornq_s16): Likewise. + (vornq_s32): Likewise. + (vornq_s64): Likewise. + (vornq_u8): Likewise. + (vornq_u16): Likewise. + (vornq_u32): Likewise. + (vornq_u64): Likewise. + +2014-07-16 Yvan Roux + + Backport from trunk r210151. + 2014-05-07 Alan Lawrence + + * config/aarch64/arm_neon.h (vtrn1_f32, vtrn1_p8, vtrn1_p16, vtrn1_s8, + vtrn1_s16, vtrn1_s32, vtrn1_u8, vtrn1_u16, vtrn1_u32, vtrn1q_f32, + vtrn1q_f64, vtrn1q_p8, vtrn1q_p16, vtrn1q_s8, vtrn1q_s16, vtrn1q_s32, + vtrn1q_s64, vtrn1q_u8, vtrn1q_u16, vtrn1q_u32, vtrn1q_u64, vtrn2_f32, + vtrn2_p8, vtrn2_p16, vtrn2_s8, vtrn2_s16, vtrn2_s32, vtrn2_u8, + vtrn2_u16, vtrn2_u32, vtrn2q_f32, vtrn2q_f64, vtrn2q_p8, vtrn2q_p16, + vtrn2q_s8, vtrn2q_s16, vtrn2q_s32, vtrn2q_s64, vtrn2q_u8, vtrn2q_u16, + vtrn2q_u32, vtrn2q_u64): Replace temporary asm with __builtin_shuffle. + +2014-07-16 Yvan Roux + + Backport from trunk r209794. + 2014-04-25 Marek Polacek + + PR c/60114 + * c-parser.c (c_parser_initelt): Pass input_location to + process_init_element. + (c_parser_initval): Pass loc to process_init_element. + * c-tree.h (process_init_element): Adjust declaration. + * c-typeck.c (push_init_level): Pass input_location to + process_init_element. + (pop_init_level): Likewise. + (set_designator): Likewise. + (output_init_element): Add location_t parameter. Pass loc to + digest_init. + (output_pending_init_elements): Pass input_location to + output_init_element. + (process_init_element): Add location_t parameter. Pass loc to + output_init_element. + +2014-07-16 Yvan Roux + + Backport from trunk r211771. + 2014-06-18 Kyrylo Tkachov + + * genattrtab.c (n_bypassed): New variable. + (process_bypasses): Initialise n_bypassed. + Count number of bypassed reservations. + (make_automaton_attrs): Allocate space for bypassed reservations + rather than number of bypasses. + +2014-07-16 Yvan Roux + + Backport from trunk r210861. + 2014-05-23 Jiong Wang + + * config/aarch64/predicates.md (aarch64_call_insn_operand): New + predicate. + * config/aarch64/constraints.md ("Ucs", "Usf"): New constraints. + * config/aarch64/aarch64.md (*sibcall_insn, *sibcall_value_insn): + Adjust for tailcalling through registers. + * config/aarch64/aarch64.h (enum reg_class): New caller save + register class. + (REG_CLASS_NAMES): Likewise. + (REG_CLASS_CONTENTS): Likewise. + * config/aarch64/aarch64.c (aarch64_function_ok_for_sibcall): + Allow tailcalling without decls. + +2014-07-16 Yvan Roux + + Backport from trunk r211314. + 2014-06-06 James Greenhalgh + + * config/aarch64/aarch64-protos.h (aarch64_expand_movmem): New. + * config/aarch64/aarch64.c (aarch64_move_pointer): New. + (aarch64_progress_pointer): Likewise. + (aarch64_copy_one_part_and_move_pointers): Likewise. + (aarch64_expand_movmen): Likewise. + * config/aarch64/aarch64.h (MOVE_RATIO): Set low. + * config/aarch64/aarch64.md (movmem): New. + +2014-07-16 Yvan Roux + + Backport from trunk r211185, 211186. + 2014-06-03 Alan Lawrence + + * gcc/config/aarch64/aarch64-builtins.c + (aarch64_types_binop_uus_qualifiers, + aarch64_types_shift_to_unsigned_qualifiers, + aarch64_types_unsigned_shiftacc_qualifiers): Define. + * gcc/config/aarch64/aarch64-simd-builtins.def (uqshl, uqrshl, uqadd, + uqsub, usqadd, usra_n, ursra_n, uqshrn_n, uqrshrn_n, usri_n, usli_n, + sqshlu_n, uqshl_n): Update qualifiers. + * gcc/config/aarch64/arm_neon.h (vqadd_u8, vqadd_u16, vqadd_u32, + vqadd_u64, vqaddq_u8, vqaddq_u16, vqaddq_u32, vqaddq_u64, vqsub_u8, + vqsub_u16, vqsub_u32, vqsub_u64, vqsubq_u8, vqsubq_u16, vqsubq_u32, + vqsubq_u64, vqaddb_u8, vqaddh_u16, vqadds_u32, vqaddd_u64, vqrshl_u8, + vqrshl_u16, vqrshl_u32, vqrshl_u64, vqrshlq_u8, vqrshlq_u16, + vqrshlq_u32, vqrshlq_u64, vqrshlb_u8, vqrshlh_u16, vqrshls_u32, + vqrshld_u64, vqrshrn_n_u16, vqrshrn_n_u32, vqrshrn_n_u64, + vqrshrnh_n_u16, vqrshrns_n_u32, vqrshrnd_n_u64, vqshl_u8, vqshl_u16, + vqshl_u32, vqshl_u64, vqshlq_u8, vqshlq_u16, vqshlq_u32, vqshlq_u64, + vqshlb_u8, vqshlh_u16, vqshls_u32, vqshld_u64, vqshl_n_u8, vqshl_n_u16, + vqshl_n_u32, vqshl_n_u64, vqshlq_n_u8, vqshlq_n_u16, vqshlq_n_u32, + vqshlq_n_u64, vqshlb_n_u8, vqshlh_n_u16, vqshls_n_u32, vqshld_n_u64, + vqshlu_n_s8, vqshlu_n_s16, vqshlu_n_s32, vqshlu_n_s64, vqshluq_n_s8, + vqshluq_n_s16, vqshluq_n_s32, vqshluq_n_s64, vqshlub_n_s8, + vqshluh_n_s16, vqshlus_n_s32, vqshlud_n_s64, vqshrn_n_u16, + vqshrn_n_u32, vqshrn_n_u64, vqshrnh_n_u16, vqshrns_n_u32, + vqshrnd_n_u64, vqsubb_u8, vqsubh_u16, vqsubs_u32, vqsubd_u64, + vrsra_n_u8, vrsra_n_u16, vrsra_n_u32, vrsra_n_u64, vrsraq_n_u8, + vrsraq_n_u16, vrsraq_n_u32, vrsraq_n_u64, vrsrad_n_u64, vsli_n_u8, + vsli_n_u16, vsli_n_u32,vsli_n_u64, vsliq_n_u8, vsliq_n_u16, + vsliq_n_u32, vsliq_n_u64, vslid_n_u64, vsqadd_u8, vsqadd_u16, + vsqadd_u32, vsqadd_u64, vsqaddq_u8, vsqaddq_u16, vsqaddq_u32, + vsqaddq_u64, vsqaddb_u8, vsqaddh_u16, vsqadds_u32, vsqaddd_u64, + vsra_n_u8, vsra_n_u16, vsra_n_u32, vsra_n_u64, vsraq_n_u8, + vsraq_n_u16, vsraq_n_u32, vsraq_n_u64, vsrad_n_u64, vsri_n_u8, + vsri_n_u16, vsri_n_u32, vsri_n_u64, vsriq_n_u8, vsriq_n_u16, + vsriq_n_u32, vsriq_n_u64, vsrid_n_u64): Remove casts. + + 2014-06-03 Alan Lawrence + + * gcc/config/aarch64/aarch64-builtins.c + (aarch64_types_binop_ssu_qualifiers): New static data. + (TYPES_BINOP_SSU): Define. + * gcc/config/aarch64/aarch64-simd-builtins.def (suqadd, ushl, urshl, + urshr_n, ushll_n): Use appropriate unsigned qualifiers. 47 + * gcc/config/aarch64/arm_neon.h (vrshl_u8, vrshl_u16, vrshl_u32, + vrshl_u64, vrshlq_u8, vrshlq_u16, vrshlq_u32, vrshlq_u64, vrshld_u64, + vrshr_n_u8, vrshr_n_u16, vrshr_n_u32, vrshr_n_u64, vrshrq_n_u8, 50 + vrshrq_n_u16, vrshrq_n_u32, vrshrq_n_u64, vrshrd_n_u64, vshll_n_u8, + vshll_n_u16, vshll_n_u32, vuqadd_s8, vuqadd_s16, vuqadd_s32, 52 + vuqadd_s64, vuqaddq_s8, vuqaddq_s16, vuqaddq_s32, vuqaddq_s64, 53 + vuqaddb_s8, vuqaddh_s16, vuqadds_s32, vuqaddd_s64): Add signedness + suffix to builtin function name, remove cast. 55 + (vshl_s8, vshl_s16, vshl_s32, vshl_s64, vshl_u8, vshl_u16, vshl_u32, + vshl_u64, vshlq_s8, vshlq_s16, vshlq_s32, vshlq_s64, vshlq_u8, 57 + vshlq_u16, vshlq_u32, vshlq_u64, vshld_s64, vshld_u64): Remove cast. + +2014-07-16 Yvan Roux + + Backport from trunk r211408, 211416. + 2014-06-10 Marcus Shawcroft + + * config/aarch64/aarch64.c (aarch64_save_or_restore_fprs): Fix + REG_CFA_RESTORE mode. + + 2014-06-10 Jiong Wang + + * config/aarch64/aarch64.c (aarch64_save_or_restore_fprs) + (aarch64_save_or_restore_callee_save_registers): Fix layout. + +2014-07-16 Yvan Roux + + Backport from trunk r211418. + 2014-06-10 Kyrylo Tkachov + + * config/aarch64/aarch64-simd.md (move_lo_quad_): + Change second alternative type to f_mcr. + * config/aarch64/aarch64.md (*movsi_aarch64): Change 11th + and 12th alternatives' types to f_mcr and f_mrc. + (*movdi_aarch64): Same for 12th and 13th alternatives. + (*movsf_aarch64): Change 9th alternatives' type to mov_reg. + (aarch64_movtilow_tilow): Change type to fmov. + +2014-07-16 Yvan Roux + + Backport from trunk r211371. + 2014-06-09 Ramana Radhakrishnan + + * config/arm/arm-modes.def: Remove XFmode. + +2014-07-16 Yvan Roux + + Backport from trunk r211268. + 2014-06-05 Marcus Shawcroft + + * config/aarch64/aarch64.c (aarch64_expand_prologue): Update stack + layout comment. + +2014-07-16 Yvan Roux + + Backport from trunk r211129. + 2014-06-02 Ramana Radhakrishnan + + PR target/61154 + * config/arm/arm.h (TARGET_SUPPORTS_WIDE_INT): Define. + * config/arm/arm.md (mov64 splitter): Replace const_double_operand + with immediate_operand. + +2014-07-16 Yvan Roux + + Backport from trunk r211073. + 2014-05-30 Kyrylo Tkachov + + * config/arm/thumb2.md (*thumb2_movhi_insn): Set type of movw + to mov_imm. + * config/arm/vfp.md (*thumb2_movsi_vfp): Likewise. + +2014-07-16 Yvan Roux + + Backport from trunk r211050. + 2014-05-29 Richard Earnshaw + Richard Sandiford + + * arm/iterators.md (shiftable_ops): New code iterator. + (t2_binop0, arith_shift_insn): New code attributes. + * arm/predicates.md (shift_nomul_operator): New predicate. + * arm/arm.md (insn_enabled): Delete. + (enabled): Remove insn_enabled test. + (*arith_shiftsi): Delete. Replace with ... + (*_multsi): ... new pattern. + (*_shiftsi): ... new pattern. + * config/arm/arm.c (arm_print_operand): Handle operand format 'b'. + +2014-07-16 Yvan Roux + + Backport from trunk r210996. + 2014-05-27 Andrew Pinski + + * config/aarch64/aarch64.md (stack_protect_set_): + Use for the register in assembly template. + (stack_protect_test): Use the mode of operands[0] for the + result. + (stack_protect_test_): Use for the register + in assembly template. + +2014-07-16 Yvan Roux + + Backport from trunk r210967. + 2014-05-27 Kyrylo Tkachov + + * config/arm/neon.md (neon_bswap): New pattern. + * config/arm/arm.c (neon_itype): Add NEON_BSWAP. + (arm_init_neon_builtins): Handle NEON_BSWAP. + Define required type nodes. + (arm_expand_neon_builtin): Handle NEON_BSWAP. + (arm_builtin_vectorized_function): Handle BUILTIN_BSWAP builtins. + * config/arm/arm_neon_builtins.def (bswap): Define builtins. + * config/arm/iterators.md (VDQHSD): New mode iterator. + +2014-07-16 Yvan Roux + + Backport from trunk r210471. + 2014-05-15 Kyrylo Tkachov + + * config/arm/arm.c (arm_option_override): Use the SCHED_PRESSURE_MODEL + enum name for PARAM_SCHED_PRESSURE_ALGORITHM. + +2014-07-16 Yvan Roux + + Backport from trunk r210369. + 2014-05-13 Kyrylo Tkachov + + * config/arm/arm.c (neon_itype): Remove NEON_RESULTPAIR. + (arm_init_neon_builtins): Remove handling of NEON_RESULTPAIR. + Remove associated type declarations and initialisations. + (arm_expand_neon_builtin): Likewise. + (neon_emit_pair_result_insn): Delete. + * config/arm/arm_neon_builtins (vtrn, vzip, vuzp): Delete. + * config/arm/neon.md (neon_vtrn): Delete. + (neon_vzip): Likewise. + (neon_vuzp): Likewise. + +2014-07-16 Yvan Roux + + Backport from trunk r211058, 211177. + 2014-05-29 Alan Lawrence + + * config/aarch64/aarch64-builtins.c (aarch64_types_binopv_qualifiers, + TYPES_BINOPV): New static data. + * config/aarch64/aarch64-simd-builtins.def (im_lane_bound): New builtin. + * config/aarch64/aarch64-simd.md (aarch64_ext, aarch64_im_lane_boundsi): + New patterns. + * config/aarch64/aarch64.c (aarch64_expand_vec_perm_const_1): Match + patterns for EXT. + (aarch64_evpc_ext): New function. + + * config/aarch64/iterators.md (UNSPEC_EXT): New enum element. + + * config/aarch64/arm_neon.h (vext_f32, vext_f64, vext_p8, vext_p16, + vext_s8, vext_s16, vext_s32, vext_s64, vext_u8, vext_u16, vext_u32, + vext_u64, vextq_f32, vextq_f64, vextq_p8, vextq_p16, vextq_s8, + vextq_s16, vextq_s32, vextq_s64, vextq_u8, vextq_u16, vextq_u32, + vextq_u64): Replace __asm with __builtin_shuffle and im_lane_boundsi. + + 2014-06-03 Alan Lawrence + + * config/aarch64/aarch64.c (aarch64_evpc_ext): allow and handle + location == 0. + +2014-07-16 Yvan Roux + + Backport from trunk r209797. + 2014-04-25 Kyrylo Tkachov + + * config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): + Use HOST_WIDE_INT_C for mask literal. + (aarch_rev16_shleft_mask_imm_p): Likewise. + +2014-07-16 Yvan Roux + + Backport from trunk r211148. + 2014-06-02 Andrew Pinski + + * config/aarch64/aarch64-linux.h (GLIBC_DYNAMIC_LINKER): + /lib/ld-linux32-aarch64.so.1 is used for ILP32. + (LINUX_TARGET_LINK_SPEC): Update linker script for ILP32. + file whose name depends on -mabi= and -mbig-endian. + * config/aarch64/t-aarch64-linux (MULTILIB_OSDIRNAMES): Handle LP64 + better and handle ilp32 too. + (MULTILIB_OPTIONS): Delete. + (MULTILIB_DIRNAMES): Delete. + +2014-07-16 Yvan Roux + + Backport from trunk r210828, r211103. + 2014-05-31 Kugan Vivekanandarajah + + * config/arm/arm.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New define. + (arm_builtins) : Add ARM_BUILTIN_GET_FPSCR and ARM_BUILTIN_SET_FPSCR. + (bdesc_2arg) : Add description for builtins __builtins_arm_set_fpscr + and __builtins_arm_get_fpscr. + (arm_init_builtins) : Initialize builtins __builtins_arm_set_fpscr and + __builtins_arm_get_fpscr. + (arm_expand_builtin) : Expand builtins __builtins_arm_set_fpscr and + __builtins_arm_ldfpscr. + (arm_atomic_assign_expand_fenv): New function. + * config/arm/vfp.md (set_fpscr): New pattern. + (get_fpscr) : Likewise. + * config/arm/unspecs.md (unspecv): Add VUNSPEC_GET_FPSCR and + VUNSPEC_SET_FPSCR. + * doc/extend.texi (AARCH64 Built-in Functions) : Document + __builtins_arm_set_fpscr, __builtins_arm_get_fpscr. + + 2014-05-23 Kugan Vivekanandarajah + + * config/aarch64/aarch64.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New + define. + * config/aarch64/aarch64-protos.h (aarch64_atomic_assign_expand_fenv): + New function declaration. + * config/aarch64/aarch64-builtins.c (aarch64_builtins) : Add + AARCH64_BUILTIN_GET_FPCR, AARCH64_BUILTIN_SET_FPCR. + AARCH64_BUILTIN_GET_FPSR and AARCH64_BUILTIN_SET_FPSR. + (aarch64_init_builtins) : Initialize builtins + __builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr. + __builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr. + (aarch64_expand_builtin) : Expand builtins __builtins_aarch64_set_fpcr + __builtins_aarch64_get_fpcr, __builtins_aarch64_get_fpsr, + and __builtins_aarch64_set_fpsr. + (aarch64_atomic_assign_expand_fenv): New function. + * config/aarch64/aarch64.md (set_fpcr): New pattern. + (get_fpcr) : Likewise. + (set_fpsr) : Likewise. + (get_fpsr) : Likewise. + (unspecv): Add UNSPECV_GET_FPCR and UNSPECV_SET_FPCR, UNSPECV_GET_FPSR + and UNSPECV_SET_FPSR. + * doc/extend.texi (AARCH64 Built-in Functions) : Document + __builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr. + __builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr. + +2014-07-16 Yvan Roux + + Backport from trunk r210355. + 2014-05-13 Ian Bolton + + * config/aarch64/aarch64-protos.h + (aarch64_hard_regno_caller_save_mode): New prototype. + * config/aarch64/aarch64.c (aarch64_hard_regno_caller_save_mode): + New function. + * config/aarch64/aarch64.h (HARD_REGNO_CALLER_SAVE_MODE): New macro. + +2014-07-16 Yvan Roux + + Backport from trunk r209943. + 2014-04-30 Alan Lawrence + + * config/aarch64/arm_neon.h (vuzp1_f32, vuzp1_p8, vuzp1_p16, vuzp1_s8, + vuzp1_s16, vuzp1_s32, vuzp1_u8, vuzp1_u16, vuzp1_u32, vuzp1q_f32, + vuzp1q_f64, vuzp1q_p8, vuzp1q_p16, vuzp1q_s8, vuzp1q_s16, vuzp1q_s32, + vuzp1q_s64, vuzp1q_u8, vuzp1q_u16, vuzp1q_u32, vuzp1q_u64, vuzp2_f32, + vuzp2_p8, vuzp2_p16, vuzp2_s8, vuzp2_s16, vuzp2_s32, vuzp2_u8, + vuzp2_u16, vuzp2_u32, vuzp2q_f32, vuzp2q_f64, vuzp2q_p8, vuzp2q_p16, + vuzp2q_s8, vuzp2q_s16, vuzp2q_s32, vuzp2q_s64, vuzp2q_u8, vuzp2q_u16, + vuzp2q_u32, vuzp2q_u64): Replace temporary asm with __builtin_shuffle. + +2014-06-26 Yvan Roux + + * LINARO-VERSION: Bump version. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + * LINARO-VERSION: Update. + +2014-06-24 Yvan Roux + + Revert: + 2014-05-23 Yvan Roux + + Backport from trunk r209643. + 2014-04-22 Ramana Radhakrishnan + + * config/aarch64/aarch64.c (TARGET_FLAGS_REGNUM): Define. + +2014-06-13 Yvan Roux + + Backport from trunk r210493, 210494, 210495, 210496, 210497, 210498, + 210499, 210500, 210501, 210502, 210503, 210504, 210505, 210506, 210507, + 210508, 210509, 210510, 210512, 211205, 211206. + 2014-05-16 James Greenhalgh + + * config/aarch64/aarch64-protos.h (scale_addr_mode_cost): New. + (cpu_addrcost_table): Use it. + * config/aarch64/aarch64.c (generic_addrcost_table): Initialize it. + (aarch64_address_cost): Rewrite using aarch64_classify_address, + move it. + + 2014-05-16 James Greenhalgh + + * config/aarch64/aarch64.c (cortexa57_addrcost_table): New. + (cortexa57_vector_cost): Likewise. + (cortexa57_tunings): Use them. + + 2014-05-16 James Greenhalgh + + * config/aarch64/aarch64.c (aarch64_rtx_costs_wrapper): New. + (TARGET_RTX_COSTS): Call it. + + 2014-05-16 James Greenhalgh + Philipp Tomsich + + * config/aarch64/aarch64.c (aarch64_build_constant): Conditionally + emit instructions, return number of instructions which would + be emitted. + (aarch64_add_constant): Update call to aarch64_build_constant. + (aarch64_output_mi_thunk): Likewise. + (aarch64_rtx_costs): Estimate cost of a CONST_INT, cost + a CONST_DOUBLE. + + 2014-05-16 James Greenhalgh + Philipp Tomsich + + * config/aarch64/aarch64.c (aarch64_strip_shift_or_extend): Rename + to... + (aarch64_strip_extend): ...this, don't strip shifts, check RTX is + well formed. + (aarch64_rtx_mult_cost): New. + (aarch64_rtx_costs): Use it, refactor as appropriate. + + 2014-05-16 James Greenhalgh + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Set default costs. + + 2014-05-16 James Greenhalgh + Philip Tomsich + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costing + for SET RTX. + + 2014-05-16 James Greenhalgh + Philipp Tomsich + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Use address + costs when costing loads and stores to memory. + + 2014-05-16 James Greenhalgh + Philipp Tomsich + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve cost for + logical operations. + + 2014-05-16 James Greenhalgh + Philipp Tomsich + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost + ZERO_EXTEND and SIGN_EXTEND better. + + 2014-05-16 James Greenhalgh + Philipp Tomsich + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costs for + rotates and shifts. + + 2014-03-16 James Greenhalgh + Philipp Tomsich + + * config/aarch64/aarch64.c (aarch64_rtx_arith_op_extract_p): New. + (aarch64_rtx_costs): Improve costs for SIGN/ZERO_EXTRACT. + + 2014-05-16 James Greenhalgh + Philipp Tomsich + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costs for + DIV/MOD. + + 2014-05-16 James Greenhalgh + Philipp Tomsich + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost comparison + operators. + + 2014-05-16 James Greenhalgh + Philipp Tomsich + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost FMA, + FLOAT_EXTEND, FLOAT_TRUNCATE, ABS, SMAX, and SMIN. + + 2014-05-16 James Greenhalgh + Philipp Tomsich + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost TRUNCATE. + + 2014-05-16 James Greenhalgh + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost SYMBOL_REF, + HIGH, LO_SUM. + + 2014-05-16 James Greenhalgh + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle the case + where we were unable to cost an RTX. + + 2014-05-16 James Greenhalgh + + * config/aarch64/aarch64.c (aarch64_rtx_mult_cost): Fix FNMUL case. + + 2014-06-03 Andrew Pinski + + * config/aarch64/aarch64.c (aarch64_if_then_else_costs): New function. + (aarch64_rtx_costs): Use aarch64_if_then_else_costs. + + 2014-06-03 Andrew Pinski + + * config/aarch64/aarch64.c (aarch64_if_then_else_costs): Allow non + comparisons for OP0. + +2014-06-13 Yvan Roux + + * LINARO-VERSION: Bump version. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + * LINARO-VERSION: Update. + +2014-06-04 Yvan Roux + + Backport from trunk r211211. + 2014-06-04 Bin Cheng + + * config/aarch64/aarch64.c (aarch64_classify_address) + (aarch64_legitimize_reload_address): Support full addressing modes + for vector modes. + * config/aarch64/aarch64.md (mov, movmisalign) + (*aarch64_simd_mov, *aarch64_simd_mov): Relax predicates. + +2014-05-25 Yvan Roux + + Backport from trunk r209906. + 2014-04-29 Alan Lawrence + + * config/aarch64/arm_neon.h (vzip1_f32, vzip1_p8, vzip1_p16, vzip1_s8, + vzip1_s16, vzip1_s32, vzip1_u8, vzip1_u16, vzip1_u32, vzip1q_f32, + vzip1q_f64, vzip1q_p8, vzip1q_p16, vzip1q_s8, vzip1q_s16, vzip1q_s32, + vzip1q_s64, vzip1q_u8, vzip1q_u16, vzip1q_u32, vzip1q_u64, vzip2_f32, + vzip2_p8, vzip2_p16, vzip2_s8, vzip2_s16, vzip2_s32, vzip2_u8, + vzip2_u16, vzip2_u32, vzip2q_f32, vzip2q_f64, vzip2q_p8, vzip2q_p16, + vzip2q_s8, vzip2q_s16, vzip2q_s32, vzip2q_s64, vzip2q_u8, vzip2q_u16, + vzip2q_u32, vzip2q_u64): Replace inline __asm__ with __builtin_shuffle. + +2014-05-25 Yvan Roux + + Backport from trunk r209897. + 2014-04-29 James Greenhalgh + + * calls.c (initialize_argument_information): Always treat + PUSH_ARGS_REVERSED as 1, simplify code accordingly. + (expand_call): Likewise. + (emit_library_call_calue_1): Likewise. + * expr.c (PUSH_ARGS_REVERSED): Do not define. + (emit_push_insn): Always treat PUSH_ARGS_REVERSED as 1, simplify + code accordingly. + +2014-05-25 Yvan Roux + + Backport from trunk r209880. + 2014-04-28 James Greenhalgh + + * config/aarch64/aarch64-builtins.c + (aarch64_types_storestruct_lane_qualifiers): New. + (TYPES_STORESTRUCT_LANE): Likewise. + * config/aarch64/aarch64-simd-builtins.def (st2_lane): New. + (st3_lane): Likewise. + (st4_lane): Likewise. + * config/aarch64/aarch64-simd.md (vec_store_lanesoi_lane): New. + (vec_store_lanesci_lane): Likewise. + (vec_store_lanesxi_lane): Likewise. + (aarch64_st2_lane): Likewise. + (aarch64_st3_lane): Likewise. + (aarch64_st4_lane): Likewise. + * config/aarch64/aarch64.md (unspec): Add UNSPEC_ST{2,3,4}_LANE. + * config/aarch64/arm_neon.h + (__ST2_LANE_FUNC): Rewrite using builtins, update use points to + use new macro arguments. + (__ST3_LANE_FUNC): Likewise. + (__ST4_LANE_FUNC): Likewise. + * config/aarch64/iterators.md (V_TWO_ELEM): New. + (V_THREE_ELEM): Likewise. + (V_FOUR_ELEM): Likewise. + +2014-05-25 Yvan Roux + + Backport from trunk r209878. + 2014-04-28 James Greenhalgh + + * config/aarch64/aarch64-protos.h (aarch64_modes_tieable_p): New. + * config/aarch64/aarch64.c + (aarch64_cannot_change_mode_class): Weaken conditions. + (aarch64_modes_tieable_p): New. + * config/aarch64/aarch64.h (MODES_TIEABLE_P): Use it. + +2014-05-25 Yvan Roux + + Backport from trunk r209808. + 2014-04-25 Jiong Wang + + * config/arm/predicates.md (call_insn_operand): Add long_call check. + * config/arm/arm.md (sibcall, sibcall_value): Force the address to + reg for long_call. + * config/arm/arm.c (arm_function_ok_for_sibcall): Remove long_call + restriction. + +2014-05-25 Yvan Roux + + Backport from trunk r209806. + 2014-04-25 Kyrylo Tkachov + + * config/arm/arm.c (arm_cortex_a8_tune): Initialise + T16-related fields. + +2014-05-25 Yvan Roux + + Backport from trunk r209742, 209749. + 2014-04-24 Alan Lawrence + + * config/aarch64/aarch64.c (aarch64_evpc_tbl): Enable for bigendian. + + 2014-04-24 Tejas Belagod + + * config/aarch64/aarch64.c (aarch64_evpc_tbl): Reverse order of elements + for big-endian. + +2014-05-23 Yvan Roux + + Backport from trunk r209736. + 2014-04-24 Kyrylo Tkachov + + * config/aarch64/aarch64-builtins.c + (aarch64_builtin_vectorized_function): Handle BUILT_IN_BSWAP16, + BUILT_IN_BSWAP32, BUILT_IN_BSWAP64. + * config/aarch64/aarch64-simd.md (bswap): New pattern. + * config/aarch64/aarch64-simd-builtins.def: Define vector bswap + builtins. + * config/aarch64/iterator.md (VDQHSD): New mode iterator. + (Vrevsuff): New mode attribute. + +2014-05-23 Yvan Roux + + Backport from trunk r209712. + 2014-04-23 Venkataramanan Kumar + + * config/aarch64/aarch64.md (stack_protect_set, stack_protect_test) + (stack_protect_set_, stack_protect_test_): Add + machine descriptions for Stack Smashing Protector. + +2014-05-23 Yvan Roux + + Backport from trunk r209711. + 2014-04-23 Richard Earnshaw + + * aarch64.md (_rol3): New pattern. + (_rolsi3_uxtw): Likewise. + * aarch64.c (aarch64_strip_shift): Handle ROTATE and ROTATERT. + +2014-05-23 Yvan Roux + + Backport from trunk r209710. + 2014-04-23 James Greenhalgh + + * config/arm/arm.c (arm_cortex_a57_tune): Initialize all fields. + (arm_cortex_a12_tune): Likewise. + +2014-05-23 Yvan Roux + + Backport from trunk r209706. + 2014-04-23 Kyrylo Tkachov + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle BSWAP. + +2014-05-23 Yvan Roux + + Backport from trunk r209701, 209702, 209703, 209704, 209705. + 2014-04-23 Kyrylo Tkachov + + * config/arm/arm.md (arm_rev16si2): New pattern. + (arm_rev16si2_alt): Likewise. + * config/arm/arm.c (arm_new_rtx_costs): Handle rev16 case. + + 2014-04-23 Kyrylo Tkachov + * config/aarch64/aarch64.md (rev162): New pattern. + (rev162_alt): Likewise. + * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle rev16 case. + * config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): New. + (aarch_rev16_shleft_mask_imm_p): Likewise. + (aarch_rev16_p_1): Likewise. + (aarch_rev16_p): Likewise. + * config/arm/aarch-common-protos.h (aarch_rev16_p): Declare extern. + (aarch_rev16_shright_mask_imm_p): Likewise. + (aarch_rev16_shleft_mask_imm_p): Likewise. + + 2014-04-23 Kyrylo Tkachov + + * config/arm/aarch-common-protos.h (alu_cost_table): Add rev field. + * config/arm/aarch-cost-tables.h (generic_extra_costs): Specify + rev cost. + (cortex_a53_extra_costs): Likewise. + (cortex_a57_extra_costs): Likewise. + * config/arm/arm.c (cortexa9_extra_costs): Likewise. + (cortexa7_extra_costs): Likewise. + (cortexa8_extra_costs): Likewise. + (cortexa12_extra_costs): Likewise. + (cortexa15_extra_costs): Likewise. + (v7m_extra_costs): Likewise. + (arm_new_rtx_costs): Handle BSWAP. + + 2013-04-23 Kyrylo Tkachov + + * config/arm/arm.c (cortexa8_extra_costs): New table. + (arm_cortex_a8_tune): New tuning struct. + * config/arm/arm-cores.def (cortex-a8): Use cortex_a8 tuning struct. + + 2014-04-23 Kyrylo Tkachov + + * config/arm/arm.c (arm_new_rtx_costs): Handle FMA. + +2014-05-23 Yvan Roux + + Backport from trunk r209659. + 2014-04-22 Richard Henderson + + * config/aarch64/aarch64 (addti3, subti3): New expanders. + (add3_compare0): Remove leading * from name. + (add3_carryin): Likewise. + (sub3_compare0): Likewise. + (sub3_carryin): Likewise. + (mulditi3): New expander. + (multi3): New expander. + (madd): Remove leading * from name. + +2014-05-23 Yvan Roux + + Backport from trunk r209645. + 2014-04-22 Andrew Pinski + + * config/aarch64/aarch64.c (aarch64_load_symref_appropriately): + Handle TLS for ILP32. + * config/aarch64/aarch64.md (tlsie_small): Rename to ... + (tlsie_small_): this and handle PTR. + (tlsie_small_sidi): New pattern. + (tlsle_small): Change to an expand to handle ILP32. + (tlsle_small_): New pattern. + (tlsdesc_small): Rename to ... + (tlsdesc_small_): this and handle PTR. + +2014-05-23 Yvan Roux + + Backport from trunk r209643. + 2014-04-22 Ramana Radhakrishnan + + * config/aarch64/aarch64.c (TARGET_FLAGS_REGNUM): Define. + +2014-05-23 Yvan Roux + + Backport from trunk r209641, 209642. + 2014-04-22 Alex Velenko + + * config/aarch64/aarch64-builtins.c (TYPES_REINTERP): Removed. + (aarch64_types_signed_unsigned_qualifiers): Qualifier added. + (aarch64_types_signed_poly_qualifiers): Likewise. + (aarch64_types_unsigned_signed_qualifiers): Likewise. + (aarch64_types_poly_signed_qualifiers): Likewise. + (TYPES_REINTERP_SS): Type macro added. + (TYPES_REINTERP_SU): Likewise. + (TYPES_REINTERP_SP): Likewise. + (TYPES_REINTERP_US): Likewise. + (TYPES_REINTERP_PS): Likewise. + (aarch64_fold_builtin): New expression folding added. + * config/aarch64/aarch64-simd-builtins.def (REINTERP): + Declarations removed. + (REINTERP_SS): Declarations added. + (REINTERP_US): Likewise. + (REINTERP_PS): Likewise. + (REINTERP_SU): Likewise. + (REINTERP_SP): Likewise. + * config/aarch64/arm_neon.h (vreinterpret_p8_f64): Implemented. + (vreinterpretq_p8_f64): Likewise. + (vreinterpret_p16_f64): Likewise. + (vreinterpretq_p16_f64): Likewise. + (vreinterpret_f32_f64): Likewise. + (vreinterpretq_f32_f64): Likewise. + (vreinterpret_f64_f32): Likewise. + (vreinterpret_f64_p8): Likewise. + (vreinterpret_f64_p16): Likewise. + (vreinterpret_f64_s8): Likewise. + (vreinterpret_f64_s16): Likewise. + (vreinterpret_f64_s32): Likewise. + (vreinterpret_f64_s64): Likewise. + (vreinterpret_f64_u8): Likewise. + (vreinterpret_f64_u16): Likewise. + (vreinterpret_f64_u32): Likewise. + (vreinterpret_f64_u64): Likewise. + (vreinterpretq_f64_f32): Likewise. + (vreinterpretq_f64_p8): Likewise. + (vreinterpretq_f64_p16): Likewise. + (vreinterpretq_f64_s8): Likewise. + (vreinterpretq_f64_s16): Likewise. + (vreinterpretq_f64_s32): Likewise. + (vreinterpretq_f64_s64): Likewise. + (vreinterpretq_f64_u8): Likewise. + (vreinterpretq_f64_u16): Likewise. + (vreinterpretq_f64_u32): Likewise. + (vreinterpretq_f64_u64): Likewise. + (vreinterpret_s64_f64): Likewise. + (vreinterpretq_s64_f64): Likewise. + (vreinterpret_u64_f64): Likewise. + (vreinterpretq_u64_f64): Likewise. + (vreinterpret_s8_f64): Likewise. + (vreinterpretq_s8_f64): Likewise. + (vreinterpret_s16_f64): Likewise. + (vreinterpretq_s16_f64): Likewise. + (vreinterpret_s32_f64): Likewise. + (vreinterpretq_s32_f64): Likewise. + (vreinterpret_u8_f64): Likewise. + (vreinterpretq_u8_f64): Likewise. + (vreinterpret_u16_f64): Likewise. + (vreinterpretq_u16_f64): Likewise. + (vreinterpret_u32_f64): Likewise. + (vreinterpretq_u32_f64): Likewise. + + 2014-04-22 Alex Velenko + + * config/aarch64/aarch64/aarch64-builtins.c (TYPES_REINTERP): Removed. + * config/aarch64/aarch64/aarch64-simd-builtins.def (REINTERP): Removed. + (vreinterpret_p8_s8): Likewise. + * config/aarch64/aarch64/arm_neon.h (vreinterpret_p8_s8): Uses cast. + (vreinterpret_p8_s16): Likewise. + (vreinterpret_p8_s32): Likewise. + (vreinterpret_p8_s64): Likewise. + (vreinterpret_p8_f32): Likewise. + (vreinterpret_p8_u8): Likewise. + (vreinterpret_p8_u16): Likewise. + (vreinterpret_p8_u32): Likewise. + (vreinterpret_p8_u64): Likewise. + (vreinterpret_p8_p16): Likewise. + (vreinterpretq_p8_s8): Likewise. + (vreinterpretq_p8_s16): Likewise. + (vreinterpretq_p8_s32): Likewise. + (vreinterpretq_p8_s64): Likewise. + (vreinterpretq_p8_f32): Likewise. + (vreinterpretq_p8_u8): Likewise. + (vreinterpretq_p8_u16): Likewise. + (vreinterpretq_p8_u32): Likewise. + (vreinterpretq_p8_u64): Likewise. + (vreinterpretq_p8_p16): Likewise. + (vreinterpret_p16_s8): Likewise. + (vreinterpret_p16_s16): Likewise. + (vreinterpret_p16_s32): Likewise. + (vreinterpret_p16_s64): Likewise. + (vreinterpret_p16_f32): Likewise. + (vreinterpret_p16_u8): Likewise. + (vreinterpret_p16_u16): Likewise. + (vreinterpret_p16_u32): Likewise. + (vreinterpret_p16_u64): Likewise. + (vreinterpret_p16_p8): Likewise. + (vreinterpretq_p16_s8): Likewise. + (vreinterpretq_p16_s16): Likewise. + (vreinterpretq_p16_s32): Likewise. + (vreinterpretq_p16_s64): Likewise. + (vreinterpretq_p16_f32): Likewise. + (vreinterpretq_p16_u8): Likewise. + (vreinterpretq_p16_u16): Likewise. + (vreinterpretq_p16_u32): Likewise. + (vreinterpretq_p16_u64): Likewise. + (vreinterpretq_p16_p8): Likewise. + (vreinterpret_f32_s8): Likewise. + (vreinterpret_f32_s16): Likewise. + (vreinterpret_f32_s32): Likewise. + (vreinterpret_f32_s64): Likewise. + (vreinterpret_f32_u8): Likewise. + (vreinterpret_f32_u16): Likewise. + (vreinterpret_f32_u32): Likewise. + (vreinterpret_f32_u64): Likewise. + (vreinterpret_f32_p8): Likewise. + (vreinterpret_f32_p16): Likewise. + (vreinterpretq_f32_s8): Likewise. + (vreinterpretq_f32_s16): Likewise. + (vreinterpretq_f32_s32): Likewise. + (vreinterpretq_f32_s64): Likewise. + (vreinterpretq_f32_u8): Likewise. + (vreinterpretq_f32_u16): Likewise. + (vreinterpretq_f32_u32): Likewise. + (vreinterpretq_f32_u64): Likewise. + (vreinterpretq_f32_p8): Likewise. + (vreinterpretq_f32_p16): Likewise. + (vreinterpret_s64_s8): Likewise. + (vreinterpret_s64_s16): Likewise. + (vreinterpret_s64_s32): Likewise. + (vreinterpret_s64_f32): Likewise. + (vreinterpret_s64_u8): Likewise. + (vreinterpret_s64_u16): Likewise. + (vreinterpret_s64_u32): Likewise. + (vreinterpret_s64_u64): Likewise. + (vreinterpret_s64_p8): Likewise. + (vreinterpret_s64_p16): Likewise. + (vreinterpretq_s64_s8): Likewise. + (vreinterpretq_s64_s16): Likewise. + (vreinterpretq_s64_s32): Likewise. + (vreinterpretq_s64_f32): Likewise. + (vreinterpretq_s64_u8): Likewise. + (vreinterpretq_s64_u16): Likewise. + (vreinterpretq_s64_u32): Likewise. + (vreinterpretq_s64_u64): Likewise. + (vreinterpretq_s64_p8): Likewise. + (vreinterpretq_s64_p16): Likewise. + (vreinterpret_u64_s8): Likewise. + (vreinterpret_u64_s16): Likewise. + (vreinterpret_u64_s32): Likewise. + (vreinterpret_u64_s64): Likewise. + (vreinterpret_u64_f32): Likewise. + (vreinterpret_u64_u8): Likewise. + (vreinterpret_u64_u16): Likewise. + (vreinterpret_u64_u32): Likewise. + (vreinterpret_u64_p8): Likewise. + (vreinterpret_u64_p16): Likewise. + (vreinterpretq_u64_s8): Likewise. + (vreinterpretq_u64_s16): Likewise. + (vreinterpretq_u64_s32): Likewise. + (vreinterpretq_u64_s64): Likewise. + (vreinterpretq_u64_f32): Likewise. + (vreinterpretq_u64_u8): Likewise. + (vreinterpretq_u64_u16): Likewise. + (vreinterpretq_u64_u32): Likewise. + (vreinterpretq_u64_p8): Likewise. + (vreinterpretq_u64_p16): Likewise. + (vreinterpret_s8_s16): Likewise. + (vreinterpret_s8_s32): Likewise. + (vreinterpret_s8_s64): Likewise. + (vreinterpret_s8_f32): Likewise. + (vreinterpret_s8_u8): Likewise. + (vreinterpret_s8_u16): Likewise. + (vreinterpret_s8_u32): Likewise. + (vreinterpret_s8_u64): Likewise. + (vreinterpret_s8_p8): Likewise. + (vreinterpret_s8_p16): Likewise. + (vreinterpretq_s8_s16): Likewise. + (vreinterpretq_s8_s32): Likewise. + (vreinterpretq_s8_s64): Likewise. + (vreinterpretq_s8_f32): Likewise. + (vreinterpretq_s8_u8): Likewise. + (vreinterpretq_s8_u16): Likewise. + (vreinterpretq_s8_u32): Likewise. + (vreinterpretq_s8_u64): Likewise. + (vreinterpretq_s8_p8): Likewise. + (vreinterpretq_s8_p16): Likewise. + (vreinterpret_s16_s8): Likewise. + (vreinterpret_s16_s32): Likewise. + (vreinterpret_s16_s64): Likewise. + (vreinterpret_s16_f32): Likewise. + (vreinterpret_s16_u8): Likewise. + (vreinterpret_s16_u16): Likewise. + (vreinterpret_s16_u32): Likewise. + (vreinterpret_s16_u64): Likewise. + (vreinterpret_s16_p8): Likewise. + (vreinterpret_s16_p16): Likewise. + (vreinterpretq_s16_s8): Likewise. + (vreinterpretq_s16_s32): Likewise. + (vreinterpretq_s16_s64): Likewise. + (vreinterpretq_s16_f32): Likewise. + (vreinterpretq_s16_u8): Likewise. + (vreinterpretq_s16_u16): Likewise. + (vreinterpretq_s16_u32): Likewise. + (vreinterpretq_s16_u64): Likewise. + (vreinterpretq_s16_p8): Likewise. + (vreinterpretq_s16_p16): Likewise. + (vreinterpret_s32_s8): Likewise. + (vreinterpret_s32_s16): Likewise. + (vreinterpret_s32_s64): Likewise. + (vreinterpret_s32_f32): Likewise. + (vreinterpret_s32_u8): Likewise. + (vreinterpret_s32_u16): Likewise. + (vreinterpret_s32_u32): Likewise. + (vreinterpret_s32_u64): Likewise. + (vreinterpret_s32_p8): Likewise. + (vreinterpret_s32_p16): Likewise. + (vreinterpretq_s32_s8): Likewise. + (vreinterpretq_s32_s16): Likewise. + (vreinterpretq_s32_s64): Likewise. + (vreinterpretq_s32_f32): Likewise. + (vreinterpretq_s32_u8): Likewise. + (vreinterpretq_s32_u16): Likewise. + (vreinterpretq_s32_u32): Likewise. + (vreinterpretq_s32_u64): Likewise. + (vreinterpretq_s32_p8): Likewise. + (vreinterpretq_s32_p16): Likewise. + (vreinterpret_u8_s8): Likewise. + (vreinterpret_u8_s16): Likewise. + (vreinterpret_u8_s32): Likewise. + (vreinterpret_u8_s64): Likewise. + (vreinterpret_u8_f32): Likewise. + (vreinterpret_u8_u16): Likewise. + (vreinterpret_u8_u32): Likewise. + (vreinterpret_u8_u64): Likewise. + (vreinterpret_u8_p8): Likewise. + (vreinterpret_u8_p16): Likewise. + (vreinterpretq_u8_s8): Likewise. + (vreinterpretq_u8_s16): Likewise. + (vreinterpretq_u8_s32): Likewise. + (vreinterpretq_u8_s64): Likewise. + (vreinterpretq_u8_f32): Likewise. + (vreinterpretq_u8_u16): Likewise. + (vreinterpretq_u8_u32): Likewise. + (vreinterpretq_u8_u64): Likewise. + (vreinterpretq_u8_p8): Likewise. + (vreinterpretq_u8_p16): Likewise. + (vreinterpret_u16_s8): Likewise. + (vreinterpret_u16_s16): Likewise. + (vreinterpret_u16_s32): Likewise. + (vreinterpret_u16_s64): Likewise. + (vreinterpret_u16_f32): Likewise. + (vreinterpret_u16_u8): Likewise. + (vreinterpret_u16_u32): Likewise. + (vreinterpret_u16_u64): Likewise. + (vreinterpret_u16_p8): Likewise. + (vreinterpret_u16_p16): Likewise. + (vreinterpretq_u16_s8): Likewise. + (vreinterpretq_u16_s16): Likewise. + (vreinterpretq_u16_s32): Likewise. + (vreinterpretq_u16_s64): Likewise. + (vreinterpretq_u16_f32): Likewise. + (vreinterpretq_u16_u8): Likewise. + (vreinterpretq_u16_u32): Likewise. + (vreinterpretq_u16_u64): Likewise. + (vreinterpretq_u16_p8): Likewise. + (vreinterpretq_u16_p16): Likewise. + (vreinterpret_u32_s8): Likewise. + (vreinterpret_u32_s16): Likewise. + (vreinterpret_u32_s32): Likewise. + (vreinterpret_u32_s64): Likewise. + (vreinterpret_u32_f32): Likewise. + (vreinterpret_u32_u8): Likewise. + (vreinterpret_u32_u16): Likewise. + (vreinterpret_u32_u64): Likewise. + (vreinterpret_u32_p8): Likewise. + (vreinterpret_u32_p16): Likewise. + (vreinterpretq_u32_s8): Likewise. + (vreinterpretq_u32_s16): Likewise. + (vreinterpretq_u32_s32): Likewise. + (vreinterpretq_u32_s64): Likewise. + (vreinterpretq_u32_f32): Likewise. + (vreinterpretq_u32_u8): Likewise. + (vreinterpretq_u32_u16): Likewise. + (vreinterpretq_u32_u64): Likewise. + (vreinterpretq_u32_p8): Likewise. + (vreinterpretq_u32_p16): Likewise. + +2014-05-23 Yvan Roux + + Backport from trunk r209640. + 2014-04-22 Alex Velenko + + * gcc/config/aarch64/aarch64-simd.md (aarch64_s): + Pattern extended. + * config/aarch64/aarch64-simd-builtins.def (sqneg): Iterator + extended. + (sqabs): Likewise. + * config/aarch64/arm_neon.h (vqneg_s64): New intrinsic. + (vqnegd_s64): Likewise. + (vqabs_s64): Likewise. + (vqabsd_s64): Likewise. + +2014-05-23 Yvan Roux + + Backport from trunk r209627, 209636. + 2014-04-22 Renlin + Jiong Wang + + * config/aarch64/aarch64.h (aarch64_frame): Delete "fp_lr_offset". + * config/aarch64/aarch64.c (aarch64_layout_frame) + (aarch64_initial_elimination_offset): Likewise. + + 2014-04-22 Marcus Shawcroft + + * config/aarch64/aarch64.c (aarch64_initial_elimination_offset): + Fix indentation. + +2014-05-23 Yvan Roux + + Backport from trunk r209618. + 2014-04-22 Renlin Li + + * config/aarch64/aarch64.c (aarch64_print_operand_address): Adjust + the output asm format. + +2014-05-23 Yvan Roux + + Backport from trunk r209617. + 2014-04-22 James Greenhalgh + + * config/aarch64/aarch64-simd.md + (aarch64_cmdi): Always split. + (*aarch64_cmdi): New. + (aarch64_cmtstdi): Always split. + (*aarch64_cmtstdi): New. + +2014-05-23 Yvan Roux + + Backport from trunk r209615. + 2014-04-22 Ramana Radhakrishnan + + * config/arm/arm.c (arm_hard_regno_mode_ok): Loosen + restrictions on core registers for DImode values in Thumb2. + +2014-05-23 Yvan Roux + + Backport from trunk r209613, r209614. + 2014-04-22 Ian Bolton + + * config/arm/arm.md (*anddi_notdi_zesidi): New pattern. + * config/arm/thumb2.md (*iordi_notdi_zesidi): New pattern. + + 2014-04-22 Ian Bolton + + * config/arm/thumb2.md (*iordi_notdi_di): New pattern. + (*iordi_notzesidi_di): Likewise. + (*iordi_notsesidi_di): Likewise. + +2014-05-23 Yvan Roux + + Backport from trunk r209561. + 2014-04-22 Ian Bolton + + * config/arm/arm-protos.h (tune_params): New struct members. + * config/arm/arm.c: Initialise tune_params per processor. + (thumb2_reorg): Suppress conversion from t32 to t16 when optimizing + for speed, based on new tune_params. + +2014-05-23 Yvan Roux + + Backport from trunk r209559. + 2014-04-22 Alex Velenko + + * config/aarch64/aarch64-builtins.c (BUILTIN_VDQF_DF): Macro + added. + * config/aarch64/aarch64-simd-builtins.def (frintn): Use added + macro. + * config/aarch64/aarch64-simd.md (): Comment + corrected. + * config/aarch64/aarch64.md (): Likewise. + * config/aarch64/arm_neon.h (vrnd_f64): Added. + (vrnda_f64): Likewise. + (vrndi_f64): Likewise. + (vrndm_f64): Likewise. + (vrndn_f64): Likewise. + (vrndp_f64): Likewise. + (vrndx_f64): Likewise. + +2014-05-23 Yvan Roux + + Backport from trunk r209419. + 2014-04-15 Kyrylo Tkachov + + PR rtl-optimization/60663 + * config/arm/arm.c (arm_new_rtx_costs): Improve ASM_OPERANDS case, + avoid 0 cost. + +2014-05-23 Yvan Roux + + Backport from trunk r209457. + 2014-04-16 Andrew Pinski + + * config/host-linux.c (TRY_EMPTY_VM_SPACE): Change aarch64 ilp32 + definition. + +2014-05-19 Yvan Roux + + * LINARO-VERSION: Bump version. + +2014-05-14 Yvan Roux + GCC Linaro 4.9-2014.05 released. + * LINARO-VERSION: Update. + +2014-05-13 Yvan Roux + + Backport from trunk r209889. + 2014-04-29 Zhenqiang Chen + + * config/aarch64/aarch64.md (movcc): New for GPF. + +2014-05-13 Yvan Roux + + Backport from trunk r209556. + 2014-04-22 Zhenqiang Chen + + * config/arm/arm.c (arm_print_operand, thumb_exit): Make sure + GET_MODE_SIZE argument is enum machine_mode. + +2014-04-28 Yvan Roux + + * LINARO-VERSION: Bump version. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. + * LINARO-VERSION: New file. + * configure.ac: Add Linaro version string. +2014-07-16 Yvan Roux + + Backport from trunk r211771. + 2014-06-18 Kyrylo Tkachov + + * genattrtab.c (n_bypassed): New variable. + (process_bypasses): Initialise n_bypassed. + Count number of bypassed reservations. + (make_automaton_attrs): Allocate space for bypassed reservations + rather than number of bypasses. + +2014-07-16 Yvan Roux + + Backport from trunk r210861. + 2014-05-23 Jiong Wang + + * config/aarch64/predicates.md (aarch64_call_insn_operand): New + predicate. + * config/aarch64/constraints.md ("Ucs", "Usf"): New constraints. + * config/aarch64/aarch64.md (*sibcall_insn, *sibcall_value_insn): + Adjust for tailcalling through registers. + * config/aarch64/aarch64.h (enum reg_class): New caller save + register class. + (REG_CLASS_NAMES): Likewise. + (REG_CLASS_CONTENTS): Likewise. + * config/aarch64/aarch64.c (aarch64_function_ok_for_sibcall): + Allow tailcalling without decls. + +2014-07-16 Yvan Roux + + Backport from trunk r211314. + 2014-06-06 James Greenhalgh + + * config/aarch64/aarch64-protos.h (aarch64_expand_movmem): New. + * config/aarch64/aarch64.c (aarch64_move_pointer): New. + (aarch64_progress_pointer): Likewise. + (aarch64_copy_one_part_and_move_pointers): Likewise. + (aarch64_expand_movmen): Likewise. + * config/aarch64/aarch64.h (MOVE_RATIO): Set low. + * config/aarch64/aarch64.md (movmem): New. + +2014-07-16 Yvan Roux + + Backport from trunk r211185, 211186. + 2014-06-03 Alan Lawrence + + * gcc/config/aarch64/aarch64-builtins.c + (aarch64_types_binop_uus_qualifiers, + aarch64_types_shift_to_unsigned_qualifiers, + aarch64_types_unsigned_shiftacc_qualifiers): Define. + * gcc/config/aarch64/aarch64-simd-builtins.def (uqshl, uqrshl, uqadd, + uqsub, usqadd, usra_n, ursra_n, uqshrn_n, uqrshrn_n, usri_n, usli_n, + sqshlu_n, uqshl_n): Update qualifiers. + * gcc/config/aarch64/arm_neon.h (vqadd_u8, vqadd_u16, vqadd_u32, + vqadd_u64, vqaddq_u8, vqaddq_u16, vqaddq_u32, vqaddq_u64, vqsub_u8, + vqsub_u16, vqsub_u32, vqsub_u64, vqsubq_u8, vqsubq_u16, vqsubq_u32, + vqsubq_u64, vqaddb_u8, vqaddh_u16, vqadds_u32, vqaddd_u64, vqrshl_u8, + vqrshl_u16, vqrshl_u32, vqrshl_u64, vqrshlq_u8, vqrshlq_u16, + vqrshlq_u32, vqrshlq_u64, vqrshlb_u8, vqrshlh_u16, vqrshls_u32, + vqrshld_u64, vqrshrn_n_u16, vqrshrn_n_u32, vqrshrn_n_u64, + vqrshrnh_n_u16, vqrshrns_n_u32, vqrshrnd_n_u64, vqshl_u8, vqshl_u16, + vqshl_u32, vqshl_u64, vqshlq_u8, vqshlq_u16, vqshlq_u32, vqshlq_u64, + vqshlb_u8, vqshlh_u16, vqshls_u32, vqshld_u64, vqshl_n_u8, vqshl_n_u16, + vqshl_n_u32, vqshl_n_u64, vqshlq_n_u8, vqshlq_n_u16, vqshlq_n_u32, + vqshlq_n_u64, vqshlb_n_u8, vqshlh_n_u16, vqshls_n_u32, vqshld_n_u64, + vqshlu_n_s8, vqshlu_n_s16, vqshlu_n_s32, vqshlu_n_s64, vqshluq_n_s8, + vqshluq_n_s16, vqshluq_n_s32, vqshluq_n_s64, vqshlub_n_s8, + vqshluh_n_s16, vqshlus_n_s32, vqshlud_n_s64, vqshrn_n_u16, + vqshrn_n_u32, vqshrn_n_u64, vqshrnh_n_u16, vqshrns_n_u32, + vqshrnd_n_u64, vqsubb_u8, vqsubh_u16, vqsubs_u32, vqsubd_u64, + vrsra_n_u8, vrsra_n_u16, vrsra_n_u32, vrsra_n_u64, vrsraq_n_u8, + vrsraq_n_u16, vrsraq_n_u32, vrsraq_n_u64, vrsrad_n_u64, vsli_n_u8, + vsli_n_u16, vsli_n_u32,vsli_n_u64, vsliq_n_u8, vsliq_n_u16, + vsliq_n_u32, vsliq_n_u64, vslid_n_u64, vsqadd_u8, vsqadd_u16, + vsqadd_u32, vsqadd_u64, vsqaddq_u8, vsqaddq_u16, vsqaddq_u32, + vsqaddq_u64, vsqaddb_u8, vsqaddh_u16, vsqadds_u32, vsqaddd_u64, + vsra_n_u8, vsra_n_u16, vsra_n_u32, vsra_n_u64, vsraq_n_u8, + vsraq_n_u16, vsraq_n_u32, vsraq_n_u64, vsrad_n_u64, vsri_n_u8, + vsri_n_u16, vsri_n_u32, vsri_n_u64, vsriq_n_u8, vsriq_n_u16, + vsriq_n_u32, vsriq_n_u64, vsrid_n_u64): Remove casts. + + 2014-06-03 Alan Lawrence + + * gcc/config/aarch64/aarch64-builtins.c + (aarch64_types_binop_ssu_qualifiers): New static data. + (TYPES_BINOP_SSU): Define. + * gcc/config/aarch64/aarch64-simd-builtins.def (suqadd, ushl, urshl, + urshr_n, ushll_n): Use appropriate unsigned qualifiers. 47 + * gcc/config/aarch64/arm_neon.h (vrshl_u8, vrshl_u16, vrshl_u32, + vrshl_u64, vrshlq_u8, vrshlq_u16, vrshlq_u32, vrshlq_u64, vrshld_u64, + vrshr_n_u8, vrshr_n_u16, vrshr_n_u32, vrshr_n_u64, vrshrq_n_u8, 50 + vrshrq_n_u16, vrshrq_n_u32, vrshrq_n_u64, vrshrd_n_u64, vshll_n_u8, + vshll_n_u16, vshll_n_u32, vuqadd_s8, vuqadd_s16, vuqadd_s32, 52 + vuqadd_s64, vuqaddq_s8, vuqaddq_s16, vuqaddq_s32, vuqaddq_s64, 53 + vuqaddb_s8, vuqaddh_s16, vuqadds_s32, vuqaddd_s64): Add signedness + suffix to builtin function name, remove cast. 55 + (vshl_s8, vshl_s16, vshl_s32, vshl_s64, vshl_u8, vshl_u16, vshl_u32, + vshl_u64, vshlq_s8, vshlq_s16, vshlq_s32, vshlq_s64, vshlq_u8, 57 + vshlq_u16, vshlq_u32, vshlq_u64, vshld_s64, vshld_u64): Remove cast. + +2014-07-16 Yvan Roux + + Backport from trunk r211408, 211416. + 2014-06-10 Marcus Shawcroft + + * config/aarch64/aarch64.c (aarch64_save_or_restore_fprs): Fix + REG_CFA_RESTORE mode. + + 2014-06-10 Jiong Wang + + * config/aarch64/aarch64.c (aarch64_save_or_restore_fprs) + (aarch64_save_or_restore_callee_save_registers): Fix layout. + +2014-07-16 Yvan Roux + + Backport from trunk r211418. + 2014-06-10 Kyrylo Tkachov + + * config/aarch64/aarch64-simd.md (move_lo_quad_): + Change second alternative type to f_mcr. + * config/aarch64/aarch64.md (*movsi_aarch64): Change 11th + and 12th alternatives' types to f_mcr and f_mrc. + (*movdi_aarch64): Same for 12th and 13th alternatives. + (*movsf_aarch64): Change 9th alternatives' type to mov_reg. + (aarch64_movtilow_tilow): Change type to fmov. + +2014-07-16 Yvan Roux + + Backport from trunk r211371. + 2014-06-09 Ramana Radhakrishnan + + * config/arm/arm-modes.def: Remove XFmode. + +2014-07-16 Yvan Roux + + Backport from trunk r211268. + 2014-06-05 Marcus Shawcroft + + * config/aarch64/aarch64.c (aarch64_expand_prologue): Update stack + layout comment. + +2014-07-16 Yvan Roux + + Backport from trunk r211129. + 2014-06-02 Ramana Radhakrishnan + + PR target/61154 + * config/arm/arm.h (TARGET_SUPPORTS_WIDE_INT): Define. + * config/arm/arm.md (mov64 splitter): Replace const_double_operand + with immediate_operand. + +2014-07-16 Yvan Roux + + Backport from trunk r211073. + 2014-05-30 Kyrylo Tkachov + + * config/arm/thumb2.md (*thumb2_movhi_insn): Set type of movw + to mov_imm. + * config/arm/vfp.md (*thumb2_movsi_vfp): Likewise. + +2014-07-16 Yvan Roux + + Backport from trunk r211050. + 2014-05-29 Richard Earnshaw + Richard Sandiford + + * arm/iterators.md (shiftable_ops): New code iterator. + (t2_binop0, arith_shift_insn): New code attributes. + * arm/predicates.md (shift_nomul_operator): New predicate. + * arm/arm.md (insn_enabled): Delete. + (enabled): Remove insn_enabled test. + (*arith_shiftsi): Delete. Replace with ... + (*_multsi): ... new pattern. + (*_shiftsi): ... new pattern. + * config/arm/arm.c (arm_print_operand): Handle operand format 'b'. + +2014-07-16 Yvan Roux + + Backport from trunk r210996. + 2014-05-27 Andrew Pinski + + * config/aarch64/aarch64.md (stack_protect_set_): + Use for the register in assembly template. + (stack_protect_test): Use the mode of operands[0] for the + result. + (stack_protect_test_): Use for the register + in assembly template. + +2014-07-16 Yvan Roux + + Backport from trunk r210967. + 2014-05-27 Kyrylo Tkachov + + * config/arm/neon.md (neon_bswap): New pattern. + * config/arm/arm.c (neon_itype): Add NEON_BSWAP. + (arm_init_neon_builtins): Handle NEON_BSWAP. + Define required type nodes. + (arm_expand_neon_builtin): Handle NEON_BSWAP. + (arm_builtin_vectorized_function): Handle BUILTIN_BSWAP builtins. + * config/arm/arm_neon_builtins.def (bswap): Define builtins. + * config/arm/iterators.md (VDQHSD): New mode iterator. + +2014-07-16 Yvan Roux + + Backport from trunk r210471. + 2014-05-15 Kyrylo Tkachov + + * config/arm/arm.c (arm_option_override): Use the SCHED_PRESSURE_MODEL + enum name for PARAM_SCHED_PRESSURE_ALGORITHM. + +2014-07-16 Yvan Roux + + Backport from trunk r210369. + 2014-05-13 Kyrylo Tkachov + + * config/arm/arm.c (neon_itype): Remove NEON_RESULTPAIR. + (arm_init_neon_builtins): Remove handling of NEON_RESULTPAIR. + Remove associated type declarations and initialisations. + (arm_expand_neon_builtin): Likewise. + (neon_emit_pair_result_insn): Delete. + * config/arm/arm_neon_builtins (vtrn, vzip, vuzp): Delete. + * config/arm/neon.md (neon_vtrn): Delete. + (neon_vzip): Likewise. + (neon_vuzp): Likewise. + +2014-07-16 Yvan Roux + + Backport from trunk r211058, 211177. + 2014-05-29 Alan Lawrence + + * config/aarch64/aarch64-builtins.c (aarch64_types_binopv_qualifiers, + TYPES_BINOPV): New static data. + * config/aarch64/aarch64-simd-builtins.def (im_lane_bound): New builtin. + * config/aarch64/aarch64-simd.md (aarch64_ext, aarch64_im_lane_boundsi): + New patterns. + * config/aarch64/aarch64.c (aarch64_expand_vec_perm_const_1): Match + patterns for EXT. + (aarch64_evpc_ext): New function. + + * config/aarch64/iterators.md (UNSPEC_EXT): New enum element. + + * config/aarch64/arm_neon.h (vext_f32, vext_f64, vext_p8, vext_p16, + vext_s8, vext_s16, vext_s32, vext_s64, vext_u8, vext_u16, vext_u32, + vext_u64, vextq_f32, vextq_f64, vextq_p8, vextq_p16, vextq_s8, + vextq_s16, vextq_s32, vextq_s64, vextq_u8, vextq_u16, vextq_u32, + vextq_u64): Replace __asm with __builtin_shuffle and im_lane_boundsi. + + 2014-06-03 Alan Lawrence + + * config/aarch64/aarch64.c (aarch64_evpc_ext): allow and handle + location == 0. + +2014-07-16 Yvan Roux + + Backport from trunk r209797. + 2014-04-25 Kyrylo Tkachov + + * config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): + Use HOST_WIDE_INT_C for mask literal. + (aarch_rev16_shleft_mask_imm_p): Likewise. + +2014-07-16 Yvan Roux + + Backport from trunk r211148. + 2014-06-02 Andrew Pinski + + * config/aarch64/aarch64-linux.h (GLIBC_DYNAMIC_LINKER): + /lib/ld-linux32-aarch64.so.1 is used for ILP32. + (LINUX_TARGET_LINK_SPEC): Update linker script for ILP32. + file whose name depends on -mabi= and -mbig-endian. + * config/aarch64/t-aarch64-linux (MULTILIB_OSDIRNAMES): Handle LP64 + better and handle ilp32 too. + (MULTILIB_OPTIONS): Delete. + (MULTILIB_DIRNAMES): Delete. + +2014-07-16 Yvan Roux + + Backport from trunk r210828, r211103. + 2014-05-31 Kugan Vivekanandarajah + + * config/arm/arm.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New define. + (arm_builtins) : Add ARM_BUILTIN_GET_FPSCR and ARM_BUILTIN_SET_FPSCR. + (bdesc_2arg) : Add description for builtins __builtins_arm_set_fpscr + and __builtins_arm_get_fpscr. + (arm_init_builtins) : Initialize builtins __builtins_arm_set_fpscr and + __builtins_arm_get_fpscr. + (arm_expand_builtin) : Expand builtins __builtins_arm_set_fpscr and + __builtins_arm_ldfpscr. + (arm_atomic_assign_expand_fenv): New function. + * config/arm/vfp.md (set_fpscr): New pattern. + (get_fpscr) : Likewise. + * config/arm/unspecs.md (unspecv): Add VUNSPEC_GET_FPSCR and + VUNSPEC_SET_FPSCR. + * doc/extend.texi (AARCH64 Built-in Functions) : Document + __builtins_arm_set_fpscr, __builtins_arm_get_fpscr. + + 2014-05-23 Kugan Vivekanandarajah + + * config/aarch64/aarch64.c (TARGET_ATOMIC_ASSIGN_EXPAND_FENV): New + define. + * config/aarch64/aarch64-protos.h (aarch64_atomic_assign_expand_fenv): + New function declaration. + * config/aarch64/aarch64-builtins.c (aarch64_builtins) : Add + AARCH64_BUILTIN_GET_FPCR, AARCH64_BUILTIN_SET_FPCR. + AARCH64_BUILTIN_GET_FPSR and AARCH64_BUILTIN_SET_FPSR. + (aarch64_init_builtins) : Initialize builtins + __builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr. + __builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr. + (aarch64_expand_builtin) : Expand builtins __builtins_aarch64_set_fpcr + __builtins_aarch64_get_fpcr, __builtins_aarch64_get_fpsr, + and __builtins_aarch64_set_fpsr. + (aarch64_atomic_assign_expand_fenv): New function. + * config/aarch64/aarch64.md (set_fpcr): New pattern. + (get_fpcr) : Likewise. + (set_fpsr) : Likewise. + (get_fpsr) : Likewise. + (unspecv): Add UNSPECV_GET_FPCR and UNSPECV_SET_FPCR, UNSPECV_GET_FPSR + and UNSPECV_SET_FPSR. + * doc/extend.texi (AARCH64 Built-in Functions) : Document + __builtins_aarch64_set_fpcr, __builtins_aarch64_get_fpcr. + __builtins_aarch64_set_fpsr and __builtins_aarch64_get_fpsr. + +2014-07-16 Yvan Roux + + Backport from trunk r210355. + 2014-05-13 Ian Bolton + + * config/aarch64/aarch64-protos.h + (aarch64_hard_regno_caller_save_mode): New prototype. + * config/aarch64/aarch64.c (aarch64_hard_regno_caller_save_mode): + New function. + * config/aarch64/aarch64.h (HARD_REGNO_CALLER_SAVE_MODE): New macro. + +2014-07-16 Yvan Roux + + Backport from trunk r209943. + 2014-04-30 Alan Lawrence + + * config/aarch64/arm_neon.h (vuzp1_f32, vuzp1_p8, vuzp1_p16, vuzp1_s8, + vuzp1_s16, vuzp1_s32, vuzp1_u8, vuzp1_u16, vuzp1_u32, vuzp1q_f32, + vuzp1q_f64, vuzp1q_p8, vuzp1q_p16, vuzp1q_s8, vuzp1q_s16, vuzp1q_s32, + vuzp1q_s64, vuzp1q_u8, vuzp1q_u16, vuzp1q_u32, vuzp1q_u64, vuzp2_f32, + vuzp2_p8, vuzp2_p16, vuzp2_s8, vuzp2_s16, vuzp2_s32, vuzp2_u8, + vuzp2_u16, vuzp2_u32, vuzp2q_f32, vuzp2q_f64, vuzp2q_p8, vuzp2q_p16, + vuzp2q_s8, vuzp2q_s16, vuzp2q_s32, vuzp2q_s64, vuzp2q_u8, vuzp2q_u16, + vuzp2q_u32, vuzp2q_u64): Replace temporary asm with __builtin_shuffle. + +2014-06-26 Yvan Roux + + * LINARO-VERSION: Bump version. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + * LINARO-VERSION: Update. + +2014-06-24 Yvan Roux + + Revert: + 2014-05-23 Yvan Roux + + Backport from trunk r209643. + 2014-04-22 Ramana Radhakrishnan + + * config/aarch64/aarch64.c (TARGET_FLAGS_REGNUM): Define. + +2014-06-13 Yvan Roux + + Backport from trunk r210493, 210494, 210495, 210496, 210497, 210498, + 210499, 210500, 210501, 210502, 210503, 210504, 210505, 210506, 210507, + 210508, 210509, 210510, 210512, 211205, 211206. + 2014-05-16 James Greenhalgh + + * config/aarch64/aarch64-protos.h (scale_addr_mode_cost): New. + (cpu_addrcost_table): Use it. + * config/aarch64/aarch64.c (generic_addrcost_table): Initialize it. + (aarch64_address_cost): Rewrite using aarch64_classify_address, + move it. + + 2014-05-16 James Greenhalgh + + * config/aarch64/aarch64.c (cortexa57_addrcost_table): New. + (cortexa57_vector_cost): Likewise. + (cortexa57_tunings): Use them. + + 2014-05-16 James Greenhalgh + + * config/aarch64/aarch64.c (aarch64_rtx_costs_wrapper): New. + (TARGET_RTX_COSTS): Call it. + + 2014-05-16 James Greenhalgh + Philipp Tomsich + + * config/aarch64/aarch64.c (aarch64_build_constant): Conditionally + emit instructions, return number of instructions which would + be emitted. + (aarch64_add_constant): Update call to aarch64_build_constant. + (aarch64_output_mi_thunk): Likewise. + (aarch64_rtx_costs): Estimate cost of a CONST_INT, cost + a CONST_DOUBLE. + + 2014-05-16 James Greenhalgh + Philipp Tomsich + + * config/aarch64/aarch64.c (aarch64_strip_shift_or_extend): Rename + to... + (aarch64_strip_extend): ...this, don't strip shifts, check RTX is + well formed. + (aarch64_rtx_mult_cost): New. + (aarch64_rtx_costs): Use it, refactor as appropriate. + + 2014-05-16 James Greenhalgh + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Set default costs. + + 2014-05-16 James Greenhalgh + Philip Tomsich + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costing + for SET RTX. + + 2014-05-16 James Greenhalgh + Philipp Tomsich + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Use address + costs when costing loads and stores to memory. + + 2014-05-16 James Greenhalgh + Philipp Tomsich + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve cost for + logical operations. + + 2014-05-16 James Greenhalgh + Philipp Tomsich + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost + ZERO_EXTEND and SIGN_EXTEND better. + + 2014-05-16 James Greenhalgh + Philipp Tomsich + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costs for + rotates and shifts. + + 2014-03-16 James Greenhalgh + Philipp Tomsich + + * config/aarch64/aarch64.c (aarch64_rtx_arith_op_extract_p): New. + (aarch64_rtx_costs): Improve costs for SIGN/ZERO_EXTRACT. + + 2014-05-16 James Greenhalgh + Philipp Tomsich + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costs for + DIV/MOD. + + 2014-05-16 James Greenhalgh + Philipp Tomsich + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost comparison + operators. + + 2014-05-16 James Greenhalgh + Philipp Tomsich + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost FMA, + FLOAT_EXTEND, FLOAT_TRUNCATE, ABS, SMAX, and SMIN. + + 2014-05-16 James Greenhalgh + Philipp Tomsich + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost TRUNCATE. + + 2014-05-16 James Greenhalgh + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost SYMBOL_REF, + HIGH, LO_SUM. + + 2014-05-16 James Greenhalgh + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle the case + where we were unable to cost an RTX. + + 2014-05-16 James Greenhalgh + + * config/aarch64/aarch64.c (aarch64_rtx_mult_cost): Fix FNMUL case. + + 2014-06-03 Andrew Pinski + + * config/aarch64/aarch64.c (aarch64_if_then_else_costs): New function. + (aarch64_rtx_costs): Use aarch64_if_then_else_costs. + + 2014-06-03 Andrew Pinski + + * config/aarch64/aarch64.c (aarch64_if_then_else_costs): Allow non + comparisons for OP0. + +2014-06-13 Yvan Roux + + * LINARO-VERSION: Bump version. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + * LINARO-VERSION: Update. + +2014-06-04 Yvan Roux + + Backport from trunk r211211. + 2014-06-04 Bin Cheng + + * config/aarch64/aarch64.c (aarch64_classify_address) + (aarch64_legitimize_reload_address): Support full addressing modes + for vector modes. + * config/aarch64/aarch64.md (mov, movmisalign) + (*aarch64_simd_mov, *aarch64_simd_mov): Relax predicates. + +2014-05-25 Yvan Roux + + Backport from trunk r209906. + 2014-04-29 Alan Lawrence + + * config/aarch64/arm_neon.h (vzip1_f32, vzip1_p8, vzip1_p16, vzip1_s8, + vzip1_s16, vzip1_s32, vzip1_u8, vzip1_u16, vzip1_u32, vzip1q_f32, + vzip1q_f64, vzip1q_p8, vzip1q_p16, vzip1q_s8, vzip1q_s16, vzip1q_s32, + vzip1q_s64, vzip1q_u8, vzip1q_u16, vzip1q_u32, vzip1q_u64, vzip2_f32, + vzip2_p8, vzip2_p16, vzip2_s8, vzip2_s16, vzip2_s32, vzip2_u8, + vzip2_u16, vzip2_u32, vzip2q_f32, vzip2q_f64, vzip2q_p8, vzip2q_p16, + vzip2q_s8, vzip2q_s16, vzip2q_s32, vzip2q_s64, vzip2q_u8, vzip2q_u16, + vzip2q_u32, vzip2q_u64): Replace inline __asm__ with __builtin_shuffle. + +2014-05-25 Yvan Roux + + Backport from trunk r209897. + 2014-04-29 James Greenhalgh + + * calls.c (initialize_argument_information): Always treat + PUSH_ARGS_REVERSED as 1, simplify code accordingly. + (expand_call): Likewise. + (emit_library_call_calue_1): Likewise. + * expr.c (PUSH_ARGS_REVERSED): Do not define. + (emit_push_insn): Always treat PUSH_ARGS_REVERSED as 1, simplify + code accordingly. + +2014-05-25 Yvan Roux + + Backport from trunk r209880. + 2014-04-28 James Greenhalgh + + * config/aarch64/aarch64-builtins.c + (aarch64_types_storestruct_lane_qualifiers): New. + (TYPES_STORESTRUCT_LANE): Likewise. + * config/aarch64/aarch64-simd-builtins.def (st2_lane): New. + (st3_lane): Likewise. + (st4_lane): Likewise. + * config/aarch64/aarch64-simd.md (vec_store_lanesoi_lane): New. + (vec_store_lanesci_lane): Likewise. + (vec_store_lanesxi_lane): Likewise. + (aarch64_st2_lane): Likewise. + (aarch64_st3_lane): Likewise. + (aarch64_st4_lane): Likewise. + * config/aarch64/aarch64.md (unspec): Add UNSPEC_ST{2,3,4}_LANE. + * config/aarch64/arm_neon.h + (__ST2_LANE_FUNC): Rewrite using builtins, update use points to + use new macro arguments. + (__ST3_LANE_FUNC): Likewise. + (__ST4_LANE_FUNC): Likewise. + * config/aarch64/iterators.md (V_TWO_ELEM): New. + (V_THREE_ELEM): Likewise. + (V_FOUR_ELEM): Likewise. + +2014-05-25 Yvan Roux + + Backport from trunk r209878. + 2014-04-28 James Greenhalgh + + * config/aarch64/aarch64-protos.h (aarch64_modes_tieable_p): New. + * config/aarch64/aarch64.c + (aarch64_cannot_change_mode_class): Weaken conditions. + (aarch64_modes_tieable_p): New. + * config/aarch64/aarch64.h (MODES_TIEABLE_P): Use it. + +2014-05-25 Yvan Roux + + Backport from trunk r209808. + 2014-04-25 Jiong Wang + + * config/arm/predicates.md (call_insn_operand): Add long_call check. + * config/arm/arm.md (sibcall, sibcall_value): Force the address to + reg for long_call. + * config/arm/arm.c (arm_function_ok_for_sibcall): Remove long_call + restriction. + +2014-05-25 Yvan Roux + + Backport from trunk r209806. + 2014-04-25 Kyrylo Tkachov + + * config/arm/arm.c (arm_cortex_a8_tune): Initialise + T16-related fields. + +2014-05-25 Yvan Roux + + Backport from trunk r209742, 209749. + 2014-04-24 Alan Lawrence + + * config/aarch64/aarch64.c (aarch64_evpc_tbl): Enable for bigendian. + + 2014-04-24 Tejas Belagod + + * config/aarch64/aarch64.c (aarch64_evpc_tbl): Reverse order of elements + for big-endian. + +2014-05-23 Yvan Roux + + Backport from trunk r209736. + 2014-04-24 Kyrylo Tkachov + + * config/aarch64/aarch64-builtins.c + (aarch64_builtin_vectorized_function): Handle BUILT_IN_BSWAP16, + BUILT_IN_BSWAP32, BUILT_IN_BSWAP64. + * config/aarch64/aarch64-simd.md (bswap): New pattern. + * config/aarch64/aarch64-simd-builtins.def: Define vector bswap + builtins. + * config/aarch64/iterator.md (VDQHSD): New mode iterator. + (Vrevsuff): New mode attribute. + +2014-05-23 Yvan Roux + + Backport from trunk r209712. + 2014-04-23 Venkataramanan Kumar + + * config/aarch64/aarch64.md (stack_protect_set, stack_protect_test) + (stack_protect_set_, stack_protect_test_): Add + machine descriptions for Stack Smashing Protector. + +2014-05-23 Yvan Roux + + Backport from trunk r209711. + 2014-04-23 Richard Earnshaw + + * aarch64.md (_rol3): New pattern. + (_rolsi3_uxtw): Likewise. + * aarch64.c (aarch64_strip_shift): Handle ROTATE and ROTATERT. + +2014-05-23 Yvan Roux + + Backport from trunk r209710. + 2014-04-23 James Greenhalgh + + * config/arm/arm.c (arm_cortex_a57_tune): Initialize all fields. + (arm_cortex_a12_tune): Likewise. + +2014-05-23 Yvan Roux + + Backport from trunk r209706. + 2014-04-23 Kyrylo Tkachov + + * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle BSWAP. + +2014-05-23 Yvan Roux + + Backport from trunk r209701, 209702, 209703, 209704, 209705. + 2014-04-23 Kyrylo Tkachov + + * config/arm/arm.md (arm_rev16si2): New pattern. + (arm_rev16si2_alt): Likewise. + * config/arm/arm.c (arm_new_rtx_costs): Handle rev16 case. + + 2014-04-23 Kyrylo Tkachov + * config/aarch64/aarch64.md (rev162): New pattern. + (rev162_alt): Likewise. + * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle rev16 case. + * config/arm/aarch-common.c (aarch_rev16_shright_mask_imm_p): New. + (aarch_rev16_shleft_mask_imm_p): Likewise. + (aarch_rev16_p_1): Likewise. + (aarch_rev16_p): Likewise. + * config/arm/aarch-common-protos.h (aarch_rev16_p): Declare extern. + (aarch_rev16_shright_mask_imm_p): Likewise. + (aarch_rev16_shleft_mask_imm_p): Likewise. + + 2014-04-23 Kyrylo Tkachov + + * config/arm/aarch-common-protos.h (alu_cost_table): Add rev field. + * config/arm/aarch-cost-tables.h (generic_extra_costs): Specify + rev cost. + (cortex_a53_extra_costs): Likewise. + (cortex_a57_extra_costs): Likewise. + * config/arm/arm.c (cortexa9_extra_costs): Likewise. + (cortexa7_extra_costs): Likewise. + (cortexa8_extra_costs): Likewise. + (cortexa12_extra_costs): Likewise. + (cortexa15_extra_costs): Likewise. + (v7m_extra_costs): Likewise. + (arm_new_rtx_costs): Handle BSWAP. + + 2013-04-23 Kyrylo Tkachov + + * config/arm/arm.c (cortexa8_extra_costs): New table. + (arm_cortex_a8_tune): New tuning struct. + * config/arm/arm-cores.def (cortex-a8): Use cortex_a8 tuning struct. + + 2014-04-23 Kyrylo Tkachov + + * config/arm/arm.c (arm_new_rtx_costs): Handle FMA. + +2014-05-23 Yvan Roux + + Backport from trunk r209659. + 2014-04-22 Richard Henderson + + * config/aarch64/aarch64 (addti3, subti3): New expanders. + (add3_compare0): Remove leading * from name. + (add3_carryin): Likewise. + (sub3_compare0): Likewise. + (sub3_carryin): Likewise. + (mulditi3): New expander. + (multi3): New expander. + (madd): Remove leading * from name. + +2014-05-23 Yvan Roux + + Backport from trunk r209645. + 2014-04-22 Andrew Pinski + + * config/aarch64/aarch64.c (aarch64_load_symref_appropriately): + Handle TLS for ILP32. + * config/aarch64/aarch64.md (tlsie_small): Rename to ... + (tlsie_small_): this and handle PTR. + (tlsie_small_sidi): New pattern. + (tlsle_small): Change to an expand to handle ILP32. + (tlsle_small_): New pattern. + (tlsdesc_small): Rename to ... + (tlsdesc_small_): this and handle PTR. + +2014-05-23 Yvan Roux + + Backport from trunk r209643. + 2014-04-22 Ramana Radhakrishnan + + * config/aarch64/aarch64.c (TARGET_FLAGS_REGNUM): Define. + +2014-05-23 Yvan Roux + + Backport from trunk r209641, 209642. + 2014-04-22 Alex Velenko + + * config/aarch64/aarch64-builtins.c (TYPES_REINTERP): Removed. + (aarch64_types_signed_unsigned_qualifiers): Qualifier added. + (aarch64_types_signed_poly_qualifiers): Likewise. + (aarch64_types_unsigned_signed_qualifiers): Likewise. + (aarch64_types_poly_signed_qualifiers): Likewise. + (TYPES_REINTERP_SS): Type macro added. + (TYPES_REINTERP_SU): Likewise. + (TYPES_REINTERP_SP): Likewise. + (TYPES_REINTERP_US): Likewise. + (TYPES_REINTERP_PS): Likewise. + (aarch64_fold_builtin): New expression folding added. + * config/aarch64/aarch64-simd-builtins.def (REINTERP): + Declarations removed. + (REINTERP_SS): Declarations added. + (REINTERP_US): Likewise. + (REINTERP_PS): Likewise. + (REINTERP_SU): Likewise. + (REINTERP_SP): Likewise. + * config/aarch64/arm_neon.h (vreinterpret_p8_f64): Implemented. + (vreinterpretq_p8_f64): Likewise. + (vreinterpret_p16_f64): Likewise. + (vreinterpretq_p16_f64): Likewise. + (vreinterpret_f32_f64): Likewise. + (vreinterpretq_f32_f64): Likewise. + (vreinterpret_f64_f32): Likewise. + (vreinterpret_f64_p8): Likewise. + (vreinterpret_f64_p16): Likewise. + (vreinterpret_f64_s8): Likewise. + (vreinterpret_f64_s16): Likewise. + (vreinterpret_f64_s32): Likewise. + (vreinterpret_f64_s64): Likewise. + (vreinterpret_f64_u8): Likewise. + (vreinterpret_f64_u16): Likewise. + (vreinterpret_f64_u32): Likewise. + (vreinterpret_f64_u64): Likewise. + (vreinterpretq_f64_f32): Likewise. + (vreinterpretq_f64_p8): Likewise. + (vreinterpretq_f64_p16): Likewise. + (vreinterpretq_f64_s8): Likewise. + (vreinterpretq_f64_s16): Likewise. + (vreinterpretq_f64_s32): Likewise. + (vreinterpretq_f64_s64): Likewise. + (vreinterpretq_f64_u8): Likewise. + (vreinterpretq_f64_u16): Likewise. + (vreinterpretq_f64_u32): Likewise. + (vreinterpretq_f64_u64): Likewise. + (vreinterpret_s64_f64): Likewise. + (vreinterpretq_s64_f64): Likewise. + (vreinterpret_u64_f64): Likewise. + (vreinterpretq_u64_f64): Likewise. + (vreinterpret_s8_f64): Likewise. + (vreinterpretq_s8_f64): Likewise. + (vreinterpret_s16_f64): Likewise. + (vreinterpretq_s16_f64): Likewise. + (vreinterpret_s32_f64): Likewise. + (vreinterpretq_s32_f64): Likewise. + (vreinterpret_u8_f64): Likewise. + (vreinterpretq_u8_f64): Likewise. + (vreinterpret_u16_f64): Likewise. + (vreinterpretq_u16_f64): Likewise. + (vreinterpret_u32_f64): Likewise. + (vreinterpretq_u32_f64): Likewise. + + 2014-04-22 Alex Velenko + + * config/aarch64/aarch64/aarch64-builtins.c (TYPES_REINTERP): Removed. + * config/aarch64/aarch64/aarch64-simd-builtins.def (REINTERP): Removed. + (vreinterpret_p8_s8): Likewise. + * config/aarch64/aarch64/arm_neon.h (vreinterpret_p8_s8): Uses cast. + (vreinterpret_p8_s16): Likewise. + (vreinterpret_p8_s32): Likewise. + (vreinterpret_p8_s64): Likewise. + (vreinterpret_p8_f32): Likewise. + (vreinterpret_p8_u8): Likewise. + (vreinterpret_p8_u16): Likewise. + (vreinterpret_p8_u32): Likewise. + (vreinterpret_p8_u64): Likewise. + (vreinterpret_p8_p16): Likewise. + (vreinterpretq_p8_s8): Likewise. + (vreinterpretq_p8_s16): Likewise. + (vreinterpretq_p8_s32): Likewise. + (vreinterpretq_p8_s64): Likewise. + (vreinterpretq_p8_f32): Likewise. + (vreinterpretq_p8_u8): Likewise. + (vreinterpretq_p8_u16): Likewise. + (vreinterpretq_p8_u32): Likewise. + (vreinterpretq_p8_u64): Likewise. + (vreinterpretq_p8_p16): Likewise. + (vreinterpret_p16_s8): Likewise. + (vreinterpret_p16_s16): Likewise. + (vreinterpret_p16_s32): Likewise. + (vreinterpret_p16_s64): Likewise. + (vreinterpret_p16_f32): Likewise. + (vreinterpret_p16_u8): Likewise. + (vreinterpret_p16_u16): Likewise. + (vreinterpret_p16_u32): Likewise. + (vreinterpret_p16_u64): Likewise. + (vreinterpret_p16_p8): Likewise. + (vreinterpretq_p16_s8): Likewise. + (vreinterpretq_p16_s16): Likewise. + (vreinterpretq_p16_s32): Likewise. + (vreinterpretq_p16_s64): Likewise. + (vreinterpretq_p16_f32): Likewise. + (vreinterpretq_p16_u8): Likewise. + (vreinterpretq_p16_u16): Likewise. + (vreinterpretq_p16_u32): Likewise. + (vreinterpretq_p16_u64): Likewise. + (vreinterpretq_p16_p8): Likewise. + (vreinterpret_f32_s8): Likewise. + (vreinterpret_f32_s16): Likewise. + (vreinterpret_f32_s32): Likewise. + (vreinterpret_f32_s64): Likewise. + (vreinterpret_f32_u8): Likewise. + (vreinterpret_f32_u16): Likewise. + (vreinterpret_f32_u32): Likewise. + (vreinterpret_f32_u64): Likewise. + (vreinterpret_f32_p8): Likewise. + (vreinterpret_f32_p16): Likewise. + (vreinterpretq_f32_s8): Likewise. + (vreinterpretq_f32_s16): Likewise. + (vreinterpretq_f32_s32): Likewise. + (vreinterpretq_f32_s64): Likewise. + (vreinterpretq_f32_u8): Likewise. + (vreinterpretq_f32_u16): Likewise. + (vreinterpretq_f32_u32): Likewise. + (vreinterpretq_f32_u64): Likewise. + (vreinterpretq_f32_p8): Likewise. + (vreinterpretq_f32_p16): Likewise. + (vreinterpret_s64_s8): Likewise. + (vreinterpret_s64_s16): Likewise. + (vreinterpret_s64_s32): Likewise. + (vreinterpret_s64_f32): Likewise. + (vreinterpret_s64_u8): Likewise. + (vreinterpret_s64_u16): Likewise. + (vreinterpret_s64_u32): Likewise. + (vreinterpret_s64_u64): Likewise. + (vreinterpret_s64_p8): Likewise. + (vreinterpret_s64_p16): Likewise. + (vreinterpretq_s64_s8): Likewise. + (vreinterpretq_s64_s16): Likewise. + (vreinterpretq_s64_s32): Likewise. + (vreinterpretq_s64_f32): Likewise. + (vreinterpretq_s64_u8): Likewise. + (vreinterpretq_s64_u16): Likewise. + (vreinterpretq_s64_u32): Likewise. + (vreinterpretq_s64_u64): Likewise. + (vreinterpretq_s64_p8): Likewise. + (vreinterpretq_s64_p16): Likewise. + (vreinterpret_u64_s8): Likewise. + (vreinterpret_u64_s16): Likewise. + (vreinterpret_u64_s32): Likewise. + (vreinterpret_u64_s64): Likewise. + (vreinterpret_u64_f32): Likewise. + (vreinterpret_u64_u8): Likewise. + (vreinterpret_u64_u16): Likewise. + (vreinterpret_u64_u32): Likewise. + (vreinterpret_u64_p8): Likewise. + (vreinterpret_u64_p16): Likewise. + (vreinterpretq_u64_s8): Likewise. + (vreinterpretq_u64_s16): Likewise. + (vreinterpretq_u64_s32): Likewise. + (vreinterpretq_u64_s64): Likewise. + (vreinterpretq_u64_f32): Likewise. + (vreinterpretq_u64_u8): Likewise. + (vreinterpretq_u64_u16): Likewise. + (vreinterpretq_u64_u32): Likewise. + (vreinterpretq_u64_p8): Likewise. + (vreinterpretq_u64_p16): Likewise. + (vreinterpret_s8_s16): Likewise. + (vreinterpret_s8_s32): Likewise. + (vreinterpret_s8_s64): Likewise. + (vreinterpret_s8_f32): Likewise. + (vreinterpret_s8_u8): Likewise. + (vreinterpret_s8_u16): Likewise. + (vreinterpret_s8_u32): Likewise. + (vreinterpret_s8_u64): Likewise. + (vreinterpret_s8_p8): Likewise. + (vreinterpret_s8_p16): Likewise. + (vreinterpretq_s8_s16): Likewise. + (vreinterpretq_s8_s32): Likewise. + (vreinterpretq_s8_s64): Likewise. + (vreinterpretq_s8_f32): Likewise. + (vreinterpretq_s8_u8): Likewise. + (vreinterpretq_s8_u16): Likewise. + (vreinterpretq_s8_u32): Likewise. + (vreinterpretq_s8_u64): Likewise. + (vreinterpretq_s8_p8): Likewise. + (vreinterpretq_s8_p16): Likewise. + (vreinterpret_s16_s8): Likewise. + (vreinterpret_s16_s32): Likewise. + (vreinterpret_s16_s64): Likewise. + (vreinterpret_s16_f32): Likewise. + (vreinterpret_s16_u8): Likewise. + (vreinterpret_s16_u16): Likewise. + (vreinterpret_s16_u32): Likewise. + (vreinterpret_s16_u64): Likewise. + (vreinterpret_s16_p8): Likewise. + (vreinterpret_s16_p16): Likewise. + (vreinterpretq_s16_s8): Likewise. + (vreinterpretq_s16_s32): Likewise. + (vreinterpretq_s16_s64): Likewise. + (vreinterpretq_s16_f32): Likewise. + (vreinterpretq_s16_u8): Likewise. + (vreinterpretq_s16_u16): Likewise. + (vreinterpretq_s16_u32): Likewise. + (vreinterpretq_s16_u64): Likewise. + (vreinterpretq_s16_p8): Likewise. + (vreinterpretq_s16_p16): Likewise. + (vreinterpret_s32_s8): Likewise. + (vreinterpret_s32_s16): Likewise. + (vreinterpret_s32_s64): Likewise. + (vreinterpret_s32_f32): Likewise. + (vreinterpret_s32_u8): Likewise. + (vreinterpret_s32_u16): Likewise. + (vreinterpret_s32_u32): Likewise. + (vreinterpret_s32_u64): Likewise. + (vreinterpret_s32_p8): Likewise. + (vreinterpret_s32_p16): Likewise. + (vreinterpretq_s32_s8): Likewise. + (vreinterpretq_s32_s16): Likewise. + (vreinterpretq_s32_s64): Likewise. + (vreinterpretq_s32_f32): Likewise. + (vreinterpretq_s32_u8): Likewise. + (vreinterpretq_s32_u16): Likewise. + (vreinterpretq_s32_u32): Likewise. + (vreinterpretq_s32_u64): Likewise. + (vreinterpretq_s32_p8): Likewise. + (vreinterpretq_s32_p16): Likewise. + (vreinterpret_u8_s8): Likewise. + (vreinterpret_u8_s16): Likewise. + (vreinterpret_u8_s32): Likewise. + (vreinterpret_u8_s64): Likewise. + (vreinterpret_u8_f32): Likewise. + (vreinterpret_u8_u16): Likewise. + (vreinterpret_u8_u32): Likewise. + (vreinterpret_u8_u64): Likewise. + (vreinterpret_u8_p8): Likewise. + (vreinterpret_u8_p16): Likewise. + (vreinterpretq_u8_s8): Likewise. + (vreinterpretq_u8_s16): Likewise. + (vreinterpretq_u8_s32): Likewise. + (vreinterpretq_u8_s64): Likewise. + (vreinterpretq_u8_f32): Likewise. + (vreinterpretq_u8_u16): Likewise. + (vreinterpretq_u8_u32): Likewise. + (vreinterpretq_u8_u64): Likewise. + (vreinterpretq_u8_p8): Likewise. + (vreinterpretq_u8_p16): Likewise. + (vreinterpret_u16_s8): Likewise. + (vreinterpret_u16_s16): Likewise. + (vreinterpret_u16_s32): Likewise. + (vreinterpret_u16_s64): Likewise. + (vreinterpret_u16_f32): Likewise. + (vreinterpret_u16_u8): Likewise. + (vreinterpret_u16_u32): Likewise. + (vreinterpret_u16_u64): Likewise. + (vreinterpret_u16_p8): Likewise. + (vreinterpret_u16_p16): Likewise. + (vreinterpretq_u16_s8): Likewise. + (vreinterpretq_u16_s16): Likewise. + (vreinterpretq_u16_s32): Likewise. + (vreinterpretq_u16_s64): Likewise. + (vreinterpretq_u16_f32): Likewise. + (vreinterpretq_u16_u8): Likewise. + (vreinterpretq_u16_u32): Likewise. + (vreinterpretq_u16_u64): Likewise. + (vreinterpretq_u16_p8): Likewise. + (vreinterpretq_u16_p16): Likewise. + (vreinterpret_u32_s8): Likewise. + (vreinterpret_u32_s16): Likewise. + (vreinterpret_u32_s32): Likewise. + (vreinterpret_u32_s64): Likewise. + (vreinterpret_u32_f32): Likewise. + (vreinterpret_u32_u8): Likewise. + (vreinterpret_u32_u16): Likewise. + (vreinterpret_u32_u64): Likewise. + (vreinterpret_u32_p8): Likewise. + (vreinterpret_u32_p16): Likewise. + (vreinterpretq_u32_s8): Likewise. + (vreinterpretq_u32_s16): Likewise. + (vreinterpretq_u32_s32): Likewise. + (vreinterpretq_u32_s64): Likewise. + (vreinterpretq_u32_f32): Likewise. + (vreinterpretq_u32_u8): Likewise. + (vreinterpretq_u32_u16): Likewise. + (vreinterpretq_u32_u64): Likewise. + (vreinterpretq_u32_p8): Likewise. + (vreinterpretq_u32_p16): Likewise. + +2014-05-23 Yvan Roux + + Backport from trunk r209640. + 2014-04-22 Alex Velenko + + * gcc/config/aarch64/aarch64-simd.md (aarch64_s): + Pattern extended. + * config/aarch64/aarch64-simd-builtins.def (sqneg): Iterator + extended. + (sqabs): Likewise. + * config/aarch64/arm_neon.h (vqneg_s64): New intrinsic. + (vqnegd_s64): Likewise. + (vqabs_s64): Likewise. + (vqabsd_s64): Likewise. + +2014-05-23 Yvan Roux + + Backport from trunk r209627, 209636. + 2014-04-22 Renlin + Jiong Wang + + * config/aarch64/aarch64.h (aarch64_frame): Delete "fp_lr_offset". + * config/aarch64/aarch64.c (aarch64_layout_frame) + (aarch64_initial_elimination_offset): Likewise. + + 2014-04-22 Marcus Shawcroft + + * config/aarch64/aarch64.c (aarch64_initial_elimination_offset): + Fix indentation. + +2014-05-23 Yvan Roux + + Backport from trunk r209618. + 2014-04-22 Renlin Li + + * config/aarch64/aarch64.c (aarch64_print_operand_address): Adjust + the output asm format. + +2014-05-23 Yvan Roux + + Backport from trunk r209617. + 2014-04-22 James Greenhalgh + + * config/aarch64/aarch64-simd.md + (aarch64_cmdi): Always split. + (*aarch64_cmdi): New. + (aarch64_cmtstdi): Always split. + (*aarch64_cmtstdi): New. + +2014-05-23 Yvan Roux + + Backport from trunk r209615. + 2014-04-22 Ramana Radhakrishnan + + * config/arm/arm.c (arm_hard_regno_mode_ok): Loosen + restrictions on core registers for DImode values in Thumb2. + +2014-05-23 Yvan Roux + + Backport from trunk r209613, r209614. + 2014-04-22 Ian Bolton + + * config/arm/arm.md (*anddi_notdi_zesidi): New pattern. + * config/arm/thumb2.md (*iordi_notdi_zesidi): New pattern. + + 2014-04-22 Ian Bolton + + * config/arm/thumb2.md (*iordi_notdi_di): New pattern. + (*iordi_notzesidi_di): Likewise. + (*iordi_notsesidi_di): Likewise. + +2014-05-23 Yvan Roux + + Backport from trunk r209561. + 2014-04-22 Ian Bolton + + * config/arm/arm-protos.h (tune_params): New struct members. + * config/arm/arm.c: Initialise tune_params per processor. + (thumb2_reorg): Suppress conversion from t32 to t16 when optimizing + for speed, based on new tune_params. + +2014-05-23 Yvan Roux + + Backport from trunk r209559. + 2014-04-22 Alex Velenko + + * config/aarch64/aarch64-builtins.c (BUILTIN_VDQF_DF): Macro + added. + * config/aarch64/aarch64-simd-builtins.def (frintn): Use added + macro. + * config/aarch64/aarch64-simd.md (): Comment + corrected. + * config/aarch64/aarch64.md (): Likewise. + * config/aarch64/arm_neon.h (vrnd_f64): Added. + (vrnda_f64): Likewise. + (vrndi_f64): Likewise. + (vrndm_f64): Likewise. + (vrndn_f64): Likewise. + (vrndp_f64): Likewise. + (vrndx_f64): Likewise. + +2014-05-23 Yvan Roux + + Backport from trunk r209419. + 2014-04-15 Kyrylo Tkachov + + PR rtl-optimization/60663 + * config/arm/arm.c (arm_new_rtx_costs): Improve ASM_OPERANDS case, + avoid 0 cost. + +2014-05-23 Yvan Roux + + Backport from trunk r209457. + 2014-04-16 Andrew Pinski + + * config/host-linux.c (TRY_EMPTY_VM_SPACE): Change aarch64 ilp32 + definition. + +2014-05-19 Yvan Roux + + * LINARO-VERSION: Bump version. + +2014-05-14 Yvan Roux + GCC Linaro 4.9-2014.05 released. + * LINARO-VERSION: Update. + +2014-05-13 Yvan Roux + + Backport from trunk r209889. + 2014-04-29 Zhenqiang Chen + + * config/aarch64/aarch64.md (movcc): New for GPF. + +2014-05-13 Yvan Roux + + Backport from trunk r209556. + 2014-04-22 Zhenqiang Chen + + * config/arm/arm.c (arm_print_operand, thumb_exit): Make sure + GET_MODE_SIZE argument is enum machine_mode. + +2014-04-28 Yvan Roux + + * LINARO-VERSION: Bump version. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. + * LINARO-VERSION: New file. + * configure.ac: Add Linaro version string. --- a/src/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c +++ b/src/gcc/testsuite/gcc.target/arm/iordi_notdi-1.c @@ -0,0 +1,65 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fno-inline --save-temps" } */ + +extern void abort (void); + +typedef long long s64int; +typedef int s32int; +typedef unsigned long long u64int; +typedef unsigned int u32int; + +s64int +iordi_di_notdi (s64int a, s64int b) +{ + return (a | ~b); +} + +s64int +iordi_di_notzesidi (s64int a, u32int b) +{ + return (a | ~(u64int) b); +} + +s64int +iordi_notdi_zesidi (s64int a, u32int b) +{ + return (~a | (u64int) b); +} + +s64int +iordi_di_notsesidi (s64int a, s32int b) +{ + return (a | ~(s64int) b); +} + +int main () +{ + s64int a64 = 0xdeadbeef00000000ll; + s64int b64 = 0x000000004f4f0112ll; + s64int c64 = 0xdeadbeef000f0000ll; + + u32int c32 = 0x01124f4f; + s32int d32 = 0xabbaface; + + s64int z = iordi_di_notdi (a64, b64); + if (z != 0xffffffffb0b0feedll) + abort (); + + z = iordi_di_notzesidi (a64, c32); + if (z != 0xfffffffffeedb0b0ll) + abort (); + + z = iordi_notdi_zesidi (c64, c32); + if (z != 0x21524110fff2ffffll) + abort (); + + z = iordi_di_notsesidi (a64, d32); + if (z != 0xdeadbeef54450531ll) + abort (); + + return 0; +} + +/* { dg-final { scan-assembler-times "orn\t" 6 { target arm_thumb2 } } } */ + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vzips16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips16_1.c @@ -0,0 +1,12 @@ +/* Test the `vzips16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vzips16.x" + +/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vexts64_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts64_1.c @@ -0,0 +1,12 @@ +/* Test the `vexts64' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/ext_s64.x" + +/* Don't scan assembler for vext - it can be optimized into a move from r0. */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns16_1.c @@ -0,0 +1,12 @@ +/* Test the `vtrns16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vtrns16.x" + +/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu16_1.c @@ -0,0 +1,12 @@ +/* Test the `vzipu16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vzipu16.x" + +/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs8_1.c @@ -0,0 +1,12 @@ +/* Test the `vzipQs8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vzipqs8.x" + +/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu8_1.c @@ -0,0 +1,12 @@ +/* Test the `vextQu8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/extq_u8.x" + +/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqf32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqf32_1.c @@ -0,0 +1,12 @@ +/* Test the `vtrnQf32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vtrnqf32.x" + +/* { dg-final { scan-assembler-times "vtrn\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vextu64_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu64_1.c @@ -0,0 +1,12 @@ +/* Test the `vextu64' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/ext_u64.x" + +/* Don't scan assembler for vext - it can be optimized into a move from r0. */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs8_1.c @@ -0,0 +1,12 @@ +/* Test the `vtrnQs8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vtrnqs8.x" + +/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu16_1.c @@ -0,0 +1,12 @@ +/* Test the `vtrnu16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vtrnu16.x" + +/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp8_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpQp8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpqp8.x" + +/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vextu8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu8_1.c @@ -0,0 +1,12 @@ +/* Test the `vextu8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/ext_u8.x" + +/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs16_1.c @@ -0,0 +1,12 @@ +/* Test the `vtrnQs16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vtrnqs16.x" + +/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs64_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs64_1.c @@ -0,0 +1,12 @@ +/* Test the `vextQs64' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/extq_s64.x" + +/* { dg-final { scan-assembler-times "vext\.64\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns8_1.c @@ -0,0 +1,12 @@ +/* Test the `vtrns8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vtrns8.x" + +/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu16_1.c @@ -0,0 +1,12 @@ +/* Test the `vtrnQu16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vtrnqu16.x" + +/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu64_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu64_1.c @@ -0,0 +1,12 @@ +/* Test the `vextQu64' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/extq_u64.x" + +/* { dg-final { scan-assembler-times "vext\.64\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqp16_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpQp16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpqp16.x" + +/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs32_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpQs32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpqs32.x" + +/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vextp16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp16_1.c @@ -0,0 +1,12 @@ +/* Test the `vextp16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/ext_p16.x" + +/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vexts32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts32_1.c @@ -0,0 +1,12 @@ +/* Test the `vexts32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/ext_s32.x" + +/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps8_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzps8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzps8.x" + +/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu32_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpQu32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpqu32.x" + +/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vextu32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu32_1.c @@ -0,0 +1,12 @@ +/* Test the `vextu32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/ext_u32.x" + +/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqp8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqp8_1.c @@ -0,0 +1,12 @@ +/* Test the `vzipQp8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vzipqp8.x" + +/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp8_1.c @@ -0,0 +1,12 @@ +/* Test the `vtrnQp8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vtrnqp8.x" + +/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/simd.exp +++ b/src/gcc/testsuite/gcc.target/arm/simd/simd.exp @@ -0,0 +1,35 @@ +# Copyright (C) 1997-2014 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# GCC testsuite that uses the `dg.exp' driver. + +# Exit immediately if this isn't an ARM target. +if ![istarget arm*-*-*] then { + return +} + +# Load support procs. +load_lib gcc-dg.exp + +# Initialize `dg'. +dg-init + +# Main loop. +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \ + "" "" + +# All done. +dg-finish --- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpp16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpp16_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpp16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpp16.x" + +/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps32_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzps32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzps32.x" + +/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu32_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpu32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpu32.x" + +/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp16_1.c @@ -0,0 +1,12 @@ +/* Test the `vextQp16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/extq_p16.x" + +/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs32_1.c @@ -0,0 +1,12 @@ +/* Test the `vextQs32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/extq_s32.x" + +/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqp16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqp16_1.c @@ -0,0 +1,12 @@ +/* Test the `vzipQp16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vzipqp16.x" + +/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs32_1.c @@ -0,0 +1,12 @@ +/* Test the `vzipQs32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vzipqs32.x" + +/* { dg-final { scan-assembler-times "vzip\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnp8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnp8_1.c @@ -0,0 +1,12 @@ +/* Test the `vtrnp8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vtrnp8.x" + +/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu32_1.c @@ -0,0 +1,12 @@ +/* Test the `vextQu32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/extq_u32.x" + +/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu8_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpQu8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpqu8.x" + +/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vzips8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips8_1.c @@ -0,0 +1,12 @@ +/* Test the `vzips8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vzips8.x" + +/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu32_1.c @@ -0,0 +1,12 @@ +/* Test the `vzipQu32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vzipqu32.x" + +/* { dg-final { scan-assembler-times "vzip\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpp8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpp8_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpp8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpp8.x" + +/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vzipp16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipp16_1.c @@ -0,0 +1,12 @@ +/* Test the `vzipp16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vzipp16.x" + +/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vzips32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vzips32_1.c @@ -0,0 +1,12 @@ +/* Test the `vzips32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vzips32.x" + +/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnp16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnp16_1.c @@ -0,0 +1,12 @@ +/* Test the `vtrnp16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vtrnp16.x" + +/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vextp64_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp64_1.c @@ -0,0 +1,26 @@ +/* Test the `vextp64' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +extern void abort (void); + +int +main (int argc, char **argv) +{ + int i; + poly64x1_t in1 = {0}; + poly64x1_t in2 = {1}; + poly64x1_t actual = vext_p64 (in1, in2, 0); + if (actual != in1) + abort (); + + return 0; +} + +/* Don't scan assembler for vext - it can be optimized into a move from r0. +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vtrns32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrns32_1.c @@ -0,0 +1,12 @@ +/* Test the `vtrns32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vtrns32.x" + +/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu32_1.c @@ -0,0 +1,12 @@ +/* Test the `vzipu32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vzipu32.x" + +/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs8_1.c @@ -0,0 +1,12 @@ +/* Test the `vextQs8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/extq_s8.x" + +/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu32_1.c @@ -0,0 +1,12 @@ +/* Test the `vtrnu32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vtrnu32.x" + +/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu8_1.c @@ -0,0 +1,12 @@ +/* Test the `vzipQu8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vzipqu8.x" + +/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu8_1.c @@ -0,0 +1,12 @@ +/* Test the `vtrnQu8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vtrnqu8.x" + +/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqf32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqf32_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpQf32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpqf32.x" + +/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vzipp8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipp8_1.c @@ -0,0 +1,12 @@ +/* Test the `vzipp8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vzipp8.x" + +/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqp16_1.c @@ -0,0 +1,12 @@ +/* Test the `vtrnQp16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vtrnqp16.x" + +/* { dg-final { scan-assembler-times "vtrn\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vexts8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts8_1.c @@ -0,0 +1,12 @@ +/* Test the `vexts8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/ext_s8.x" + +/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp64_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp64_1.c @@ -0,0 +1,33 @@ +/* Test the `vextQp64' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_crypto_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_crypto } */ + +#include "arm_neon.h" + +extern void abort (void); + +poly64x2_t +test_vextq_p64_1 (poly64x2_t a, poly64x2_t b) +{ + return vextq_p64(a, b, 1); +} + +int +main (int argc, char **argv) +{ + int i, off; + poly64x2_t in1 = {0, 1}; + poly64x2_t in2 = {2, 3}; + poly64x2_t actual = test_vextq_p64_1 (in1, in2); + for (i = 0; i < 2; i++) + if (actual[i] != i + 1) + abort (); + + return 0; +} + +/* { dg-final { scan-assembler-times "vext\.64\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vextf32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vextf32_1.c @@ -0,0 +1,12 @@ +/* Test the `vextf32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/ext_f32.x" + +/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqs32_1.c @@ -0,0 +1,12 @@ +/* Test the `vtrnQs32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vtrnqs32.x" + +/* { dg-final { scan-assembler-times "vtrn\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnqu32_1.c @@ -0,0 +1,12 @@ +/* Test the `vtrnQu32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vtrnqu32.x" + +/* { dg-final { scan-assembler-times "vtrn\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnu8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnu8_1.c @@ -0,0 +1,12 @@ +/* Test the `vtrnu8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vtrnu8.x" + +/* { dg-final { scan-assembler-times "vtrn\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vextQp8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQp8_1.c @@ -0,0 +1,12 @@ +/* Test the `vextQp8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/extq_p8.x" + +/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpf32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpf32_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpf32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpf32.x" + +/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs16_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpQs16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpqs16.x" + +/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vexts16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vexts16_1.c @@ -0,0 +1,12 @@ +/* Test the `vexts16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/ext_s16.x" + +/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu8_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpu8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpu8.x" + +/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqu16_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpQu16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpqu16.x" + +/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vextQf32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQf32_1.c @@ -0,0 +1,12 @@ +/* Test the `vextQf32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/extq_f32.x" + +/* { dg-final { scan-assembler-times "vext\.32\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vextu16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vextu16_1.c @@ -0,0 +1,12 @@ +/* Test the `vextu16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/ext_u16.x" + +/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqf32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqf32_1.c @@ -0,0 +1,12 @@ +/* Test the `vzipQf32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vzipqf32.x" + +/* { dg-final { scan-assembler-times "vzip\.32\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vuzps16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzps16_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzps16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzps16.x" + +/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vextp8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vextp8_1.c @@ -0,0 +1,12 @@ +/* Test the `vextp8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/ext_p8.x" + +/* { dg-final { scan-assembler-times "vext\.8\[ \t\]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpu16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpu16_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpu16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpu16.x" + +/* { dg-final { scan-assembler-times "vuzp\.16\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vuzpqs8_1.c @@ -0,0 +1,12 @@ +/* Test the `vuzpQs8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vuzpqs8.x" + +/* { dg-final { scan-assembler-times "vuzp\.8\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vextQs16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQs16_1.c @@ -0,0 +1,12 @@ +/* Test the `vextQs16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/extq_s16.x" + +/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vzipf32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipf32_1.c @@ -0,0 +1,12 @@ +/* Test the `vzipf32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vzipf32.x" + +/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vextQu16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vextQu16_1.c @@ -0,0 +1,12 @@ +/* Test the `vextQu16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/extq_u16.x" + +/* { dg-final { scan-assembler-times "vext\.16\[ \t\]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, #\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqs16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqs16_1.c @@ -0,0 +1,12 @@ +/* Test the `vzipQs16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vzipqs16.x" + +/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vtrnf32_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vtrnf32_1.c @@ -0,0 +1,12 @@ +/* Test the `vtrnf32' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vtrnf32.x" + +/* { dg-final { scan-assembler-times "vuzp\.32\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vzipqu16_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipqu16_1.c @@ -0,0 +1,12 @@ +/* Test the `vzipQu16' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vzipqu16.x" + +/* { dg-final { scan-assembler-times "vzip\.16\[ \t\]+\[qQ\]\[0-9\]+, ?\[qQ\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/simd/vzipu8_1.c +++ b/src/gcc/testsuite/gcc.target/arm/simd/vzipu8_1.c @@ -0,0 +1,12 @@ +/* Test the `vzipu8' ARM Neon intrinsic. */ + +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-options "-save-temps -O1 -fno-inline" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include "../../aarch64/simd/vzipu8.x" + +/* { dg-final { scan-assembler-times "vzip\.8\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/arm/tail-long-call.c +++ b/src/gcc/testsuite/gcc.target/arm/tail-long-call.c @@ -0,0 +1,12 @@ +/* { dg-skip-if "need at least armv5te" { *-*-* } { "-march=armv[234]*" "-mthumb" } { "" } } */ +/* { dg-options "-O2 -march=armv5te -marm" } */ +/* { dg-final { scan-assembler "bx" } } */ +/* { dg-final { scan-assembler-not "blx" } } */ + +int lcal (int) __attribute__ ((long_call)); + +int +dec (int a) +{ + return lcal (a); +} --- a/src/gcc/testsuite/gcc.target/arm/rev16.c +++ b/src/gcc/testsuite/gcc.target/arm/rev16.c @@ -0,0 +1,35 @@ +/* { dg-options "-O2" } */ +/* { dg-do run } */ + +extern void abort (void); + +typedef unsigned int __u32; + +__u32 +__rev16_32_alt (__u32 x) +{ + return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8) + | (((__u32)(x) & (__u32)0x00ff00ffUL) << 8); +} + +__u32 +__rev16_32 (__u32 x) +{ + return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8) + | (((__u32)(x) & (__u32)0xff00ff00UL) >> 8); +} + +int +main (void) +{ + volatile __u32 in32 = 0x12345678; + volatile __u32 expected32 = 0x34127856; + + if (__rev16_32 (in32) != expected32) + abort (); + + if (__rev16_32_alt (in32) != expected32) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c +++ b/src/gcc/testsuite/gcc.target/arm/anddi_notdi-1.c @@ -0,0 +1,65 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -fno-inline --save-temps" } */ + +extern void abort (void); + +typedef long long s64int; +typedef int s32int; +typedef unsigned long long u64int; +typedef unsigned int u32int; + +s64int +anddi_di_notdi (s64int a, s64int b) +{ + return (a & ~b); +} + +s64int +anddi_di_notzesidi (s64int a, u32int b) +{ + return (a & ~(u64int) b); +} + +s64int +anddi_notdi_zesidi (s64int a, u32int b) +{ + return (~a & (u64int) b); +} + +s64int +anddi_di_notsesidi (s64int a, s32int b) +{ + return (a & ~(s64int) b); +} + +int main () +{ + s64int a64 = 0xdeadbeef0000ffffll; + s64int b64 = 0x000000005f470112ll; + s64int c64 = 0xdeadbeef300f0000ll; + + u32int c32 = 0x01124f4f; + s32int d32 = 0xabbaface; + + s64int z = anddi_di_notdi (c64, b64); + if (z != 0xdeadbeef20080000ll) + abort (); + + z = anddi_di_notzesidi (a64, c32); + if (z != 0xdeadbeef0000b0b0ll) + abort (); + + z = anddi_notdi_zesidi (c64, c32); + if (z != 0x0000000001104f4fll) + abort (); + + z = anddi_di_notsesidi (a64, d32); + if (z != 0x0000000000000531ll) + abort (); + + return 0; +} + +/* { dg-final { scan-assembler-times "bic\t" 6 } } */ + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vqabs_s64_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vqabs_s64_1.c @@ -0,0 +1,54 @@ +/* Test vqabs_s64 intrinsics work correctly. */ +/* { dg-do run } */ +/* { dg-options "--save-temps" } */ + +#include + +extern void abort (void); + +int __attribute__ ((noinline)) +test_vqabs_s64 (int64x1_t passed, int64_t expected) +{ + return vget_lane_s64 (vqabs_s64 (passed), 0) != expected; +} + +int __attribute__ ((noinline)) +test_vqabsd_s64 (int64_t passed, int64_t expected) +{ + return vqabsd_s64 (passed) != expected; +} + +/* { dg-final { scan-assembler-times "sqabs\\td\[0-9\]+, d\[0-9\]+" 2 } } */ + +int +main (int argc, char **argv) +{ + /* Basic test. */ + if (test_vqabs_s64 (vcreate_s64 (-1), 1)) + abort (); + if (test_vqabsd_s64 (-1, 1)) + abort (); + + /* Getting absolute value of min int64_t. + Note, exact result cannot be represented in int64_t, + so max int64_t is expected. */ + if (test_vqabs_s64 (vcreate_s64 (0x8000000000000000), 0x7fffffffffffffff)) + abort (); + if (test_vqabsd_s64 (0x8000000000000000, 0x7fffffffffffffff)) + abort (); + + /* Another input that gets max int64_t. */ + if (test_vqabs_s64 (vcreate_s64 (0x8000000000000001), 0x7fffffffffffffff)) + abort (); + if (test_vqabsd_s64 (0x8000000000000001, 0x7fffffffffffffff)) + abort (); + + /* Checking that large positive numbers stay the same. */ + if (test_vqabs_s64 (vcreate_s64 (0x7fffffffffffffff), 0x7fffffffffffffff)) + abort (); + if (test_vqabsd_s64 (0x7fffffffffffffff, 0x7fffffffffffffff)) + abort (); + + return 0; +} +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/acle/acle.exp +++ b/src/gcc/testsuite/gcc.target/aarch64/acle/acle.exp @@ -0,0 +1,35 @@ +# Copyright (C) 2014 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +# GCC testsuite that uses the `dg.exp' driver. + +# Exit immediately if this isn't an AArch64 target. +if ![istarget aarch64*-*-*] then { + return +} + +# Load support procs. +load_lib gcc-dg.exp + +# Initialize `dg'. +dg-init + +# Main loop. +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \ + "" "" + +# All done. +dg-finish --- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32b.c +++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32b.c @@ -0,0 +1,15 @@ +/* Test the crc32b ACLE intrinsic. */ + +/* { dg-do assemble } */ +/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ + +#include "arm_acle.h" + +uint32_t +test_crc32b (uint32_t arg0, uint8_t arg1) +{ + return __crc32b (arg0, arg1); +} + +/* { dg-final { scan-assembler "crc32b\tw..?, w..?, w..?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32d.c +++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32d.c @@ -0,0 +1,15 @@ +/* Test the crc32d ACLE intrinsic. */ + +/* { dg-do assemble } */ +/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ + +#include "arm_acle.h" + +uint32_t +test_crc32d (uint32_t arg0, uint64_t arg1) +{ + return __crc32d (arg0, arg1); +} + +/* { dg-final { scan-assembler "crc32x\tw..?, w..?, x..?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cb.c +++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cb.c @@ -0,0 +1,15 @@ +/* Test the crc32cb ACLE intrinsic. */ + +/* { dg-do assemble } */ +/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ + +#include "arm_acle.h" + +uint32_t +test_crc32cb (uint32_t arg0, uint8_t arg1) +{ + return __crc32cb (arg0, arg1); +} + +/* { dg-final { scan-assembler "crc32cb\tw..?, w..?, w..?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cd.c +++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cd.c @@ -0,0 +1,15 @@ +/* Test the crc32cd ACLE intrinsic. */ + +/* { dg-do assemble } */ +/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ + +#include "arm_acle.h" + +uint32_t +test_crc32cd (uint32_t arg0, uint64_t arg1) +{ + return __crc32cd (arg0, arg1); +} + +/* { dg-final { scan-assembler "crc32cx\tw..?, w..?, x..?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32w.c +++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32w.c @@ -0,0 +1,15 @@ +/* Test the crc32w ACLE intrinsic. */ + +/* { dg-do assemble } */ +/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ + +#include "arm_acle.h" + +uint32_t +test_crc32w (uint32_t arg0, uint32_t arg1) +{ + return __crc32w (arg0, arg1); +} + +/* { dg-final { scan-assembler "crc32w\tw..?, w..?, w..?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32h.c +++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32h.c @@ -0,0 +1,15 @@ +/* Test the crc32h ACLE intrinsic. */ + +/* { dg-do assemble } */ +/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ + +#include "arm_acle.h" + +uint32_t +test_crc32h (uint32_t arg0, uint16_t arg1) +{ + return __crc32h (arg0, arg1); +} + +/* { dg-final { scan-assembler "crc32h\tw..?, w..?, w..?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cw.c +++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32cw.c @@ -0,0 +1,15 @@ +/* Test the crc32cw ACLE intrinsic. */ + +/* { dg-do assemble } */ +/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ + +#include "arm_acle.h" + +uint32_t +test_crc32cw (uint32_t arg0, uint32_t arg1) +{ + return __crc32cw (arg0, arg1); +} + +/* { dg-final { scan-assembler "crc32cw\tw..?, w..?, w..?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/acle/crc32ch.c +++ b/src/gcc/testsuite/gcc.target/aarch64/acle/crc32ch.c @@ -0,0 +1,15 @@ +/* Test the crc32ch ACLE intrinsic. */ + +/* { dg-do assemble } */ +/* { dg-options "-save-temps -O2 -march=armv8-a+crc" } */ + +#include "arm_acle.h" + +uint32_t +test_crc32ch (uint32_t arg0, uint16_t arg1) +{ + return __crc32ch (arg0, arg1); +} + +/* { dg-final { scan-assembler "crc32ch\tw..?, w..?, w..?\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vreinterpret_f64_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vreinterpret_f64_1.c @@ -0,0 +1,596 @@ +/* Test vreinterpret_f64_* and vreinterpret_*_f64 intrinsics work correctly. */ +/* { dg-do run } */ +/* { dg-options "-O3" } */ + +#include + +extern void abort (void); + +#define ABS(a) __builtin_fabs (a) +#define ISNAN(a) __builtin_isnan (a) + +#define DOUBLE_EQUALS(a, b, epsilon) \ +( \ + ((a) == (b)) \ + || (ISNAN (a) && ISNAN (b)) \ + || (ABS (a - b) < epsilon) \ +) + +/* Pi accurate up to 16 digits. + Further digits are a closest binary approximation. */ +#define PI_F64 3.14159265358979311599796346854 +/* Hex representation in Double (IEEE754 Double precision 64-bit) is: + 0x400921FB54442D18. */ + +/* E accurate up to 16 digits. + Further digits are a closest binary approximation. */ +#define E_F64 2.71828182845904509079559829843 +/* Hex representation in Double (IEEE754 Double precision 64-bit) is: + 0x4005BF0A8B145769. */ + +float32x2_t __attribute__ ((noinline)) +wrap_vreinterpret_f32_f64 (float64x1_t __a) +{ + return vreinterpret_f32_f64 (__a); +} + +int __attribute__ ((noinline)) +test_vreinterpret_f32_f64 () +{ + float64x1_t a; + float32x2_t b; + float64_t c[1] = { PI_F64 }; + /* Values { 0x54442D18, 0x400921FB } reinterpreted as f32. */ + float32_t d[2] = { 3.3702805504E12, 2.1426990032196044921875E0 }; + float32_t e[2]; + int i; + + a = vld1_f64 (c); + b = wrap_vreinterpret_f32_f64 (a); + vst1_f32 (e, b); + for (i = 0; i < 2; i++) + if (!DOUBLE_EQUALS (d[i], e[i], __FLT_EPSILON__)) + return 1; + return 0; +}; + +int8x8_t __attribute__ ((noinline)) +wrap_vreinterpret_s8_f64 (float64x1_t __a) +{ + return vreinterpret_s8_f64 (__a); +} + +int __attribute__ ((noinline)) +test_vreinterpret_s8_f64 () +{ + float64x1_t a; + int8x8_t b; + float64_t c[1] = { PI_F64 }; + int8_t d[8] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40 }; + int8_t e[8]; + int i; + + a = vld1_f64 (c); + b = wrap_vreinterpret_s8_f64 (a); + vst1_s8 (e, b); + for (i = 0; i < 8; i++) + if (d[i] != e[i]) + return 1; + return 0; +}; + +int16x4_t __attribute__ ((noinline)) +wrap_vreinterpret_s16_f64 (float64x1_t __a) +{ + return vreinterpret_s16_f64 (__a); +} + +int __attribute__ ((noinline)) +test_vreinterpret_s16_f64 () +{ + float64x1_t a; + int16x4_t b; + float64_t c[1] = { PI_F64 }; + int16_t d[4] = { 0x2D18, 0x5444, 0x21FB, 0x4009 }; + int16_t e[4]; + int i; + + a = vld1_f64 (c); + b = wrap_vreinterpret_s16_f64 (a); + vst1_s16 (e, b); + for (i = 0; i < 4; i++) + if (d[i] != e[i]) + return 1; + return 0; +}; + +int32x2_t __attribute__ ((noinline)) +wrap_vreinterpret_s32_f64 (float64x1_t __a) +{ + return vreinterpret_s32_f64 (__a); +} + +int __attribute__ ((noinline)) +test_vreinterpret_s32_f64 () +{ + float64x1_t a; + int32x2_t b; + float64_t c[1] = { PI_F64 }; + int32_t d[2] = { 0x54442D18, 0x400921FB }; + int32_t e[2]; + int i; + + a = vld1_f64 (c); + b = wrap_vreinterpret_s32_f64 (a); + vst1_s32 (e, b); + for (i = 0; i < 2; i++) + if (d[i] != e[i]) + return 1; + return 0; +}; + +int64x1_t __attribute__ ((noinline)) +wrap_vreinterpret_s64_f64 (float64x1_t __a) +{ + return vreinterpret_s64_f64 (__a); +} + +int __attribute__ ((noinline)) +test_vreinterpret_s64_f64 () +{ + float64x1_t a; + int64x1_t b; + float64_t c[1] = { PI_F64 }; + int64_t d[1] = { 0x400921FB54442D18 }; + int64_t e[1]; + int i; + + a = vld1_f64 (c); + b = wrap_vreinterpret_s64_f64 (a); + vst1_s64 (e, b); + if (d[0] != e[0]) + return 1; + return 0; +}; + +float32x4_t __attribute__ ((noinline)) +wrap_vreinterpretq_f32_f64 (float64x2_t __a) +{ + return vreinterpretq_f32_f64 (__a); +} + +int __attribute__ ((noinline)) +test_vreinterpretq_f32_f64 () +{ + float64x2_t a; + float32x4_t b; + float64_t c[2] = { PI_F64, E_F64 }; + + /* Values corresponding to f32 reinterpret of + { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A }. */ + float32_t d[4] = { 3.3702805504E12, + 2.1426990032196044921875E0, + -2.8569523269651966444143014594E-32, + 2.089785099029541015625E0 }; + float32_t e[4]; + int i; + + a = vld1q_f64 (c); + b = wrap_vreinterpretq_f32_f64 (a); + vst1q_f32 (e, b); + for (i = 0; i < 4; i++) + { + if (!DOUBLE_EQUALS (d[i], e[i], __FLT_EPSILON__)) + return 1; + } + return 0; +}; + +int8x16_t __attribute__ ((noinline)) +wrap_vreinterpretq_s8_f64 (float64x2_t __a) +{ + return vreinterpretq_s8_f64 (__a); +} + +int __attribute__ ((noinline)) +test_vreinterpretq_s8_f64 () +{ + float64x2_t a; + int8x16_t b; + float64_t c[2] = { PI_F64, E_F64 }; + int8_t d[16] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40, + 0x69, 0x57, 0x14, 0x8B, 0x0A, 0xBF, 0x05, 0x40 }; + int8_t e[16]; + int i; + + a = vld1q_f64 (c); + b = wrap_vreinterpretq_s8_f64 (a); + vst1q_s8 (e, b); + for (i = 0; i < 16; i++) + if (d[i] != e[i]) + return 1; + return 0; +}; + +int16x8_t __attribute__ ((noinline)) +wrap_vreinterpretq_s16_f64 (float64x2_t __a) +{ + return vreinterpretq_s16_f64 (__a); +} + +int __attribute__ ((noinline)) +test_vreinterpretq_s16_f64 () +{ + float64x2_t a; + int16x8_t b; + float64_t c[2] = { PI_F64, E_F64 }; + int16_t d[8] = { 0x2D18, 0x5444, 0x21FB, 0x4009, + 0x5769, 0x8B14, 0xBF0A, 0x4005 }; + int16_t e[8]; + int i; + + a = vld1q_f64 (c); + b = wrap_vreinterpretq_s16_f64 (a); + vst1q_s16 (e, b); + for (i = 0; i < 8; i++) + if (d[i] != e[i]) + return 1; + return 0; +}; + +int32x4_t __attribute__ ((noinline)) +wrap_vreinterpretq_s32_f64 (float64x2_t __a) +{ + return vreinterpretq_s32_f64 (__a); +} + +int __attribute__ ((noinline)) +test_vreinterpretq_s32_f64 () +{ + float64x2_t a; + int32x4_t b; + float64_t c[2] = { PI_F64, E_F64 }; + int32_t d[4] = { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A }; + int32_t e[4]; + int i; + + a = vld1q_f64 (c); + b = wrap_vreinterpretq_s32_f64 (a); + vst1q_s32 (e, b); + for (i = 0; i < 4; i++) + if (d[i] != e[i]) + return 1; + return 0; +}; + +int64x2_t __attribute__ ((noinline)) +wrap_vreinterpretq_s64_f64 (float64x2_t __a) +{ + return vreinterpretq_s64_f64 (__a); +} + +int __attribute__ ((noinline)) +test_vreinterpretq_s64_f64 () +{ + float64x2_t a; + int64x2_t b; + float64_t c[2] = { PI_F64, E_F64 }; + int64_t d[2] = { 0x400921FB54442D18, 0x4005BF0A8B145769 }; + int64_t e[2]; + int i; + + a = vld1q_f64 (c); + b = wrap_vreinterpretq_s64_f64 (a); + vst1q_s64 (e, b); + for (i = 0; i < 2; i++) + if (d[i] != e[i]) + return 1; + return 0; +}; + +float64x1_t __attribute__ ((noinline)) +wrap_vreinterpret_f64_f32 (float32x2_t __a) +{ + return vreinterpret_f64_f32 (__a); +} + +int __attribute__ ((noinline)) +test_vreinterpret_f64_f32 () +{ + float32x2_t a; + float64x1_t b; + /* Values { 0x54442D18, 0x400921FB } reinterpreted as f32. */ + float32_t c[2] = { 3.3702805504E12, 2.1426990032196044921875E0 }; + float64_t d[1] = { PI_F64 }; + float64_t e[1]; + int i; + + a = vld1_f32 (c); + b = wrap_vreinterpret_f64_f32 (a); + vst1_f64 (e, b); + if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__)) + return 1; + return 0; +}; + +float64x1_t __attribute__ ((noinline)) +wrap_vreinterpret_f64_s8 (int8x8_t __a) +{ + return vreinterpret_f64_s8 (__a); +} + +int __attribute__ ((noinline)) +test_vreinterpret_f64_s8 () +{ + int8x8_t a; + float64x1_t b; + int8_t c[8] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40 }; + float64_t d[1] = { PI_F64 }; + float64_t e[1]; + int i; + + a = vld1_s8 (c); + b = wrap_vreinterpret_f64_s8 (a); + vst1_f64 (e, b); + if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__)) + return 1; + return 0; +}; + +float64x1_t __attribute__ ((noinline)) +wrap_vreinterpret_f64_s16 (int16x4_t __a) +{ + return vreinterpret_f64_s16 (__a); +} + +int __attribute__ ((noinline)) +test_vreinterpret_f64_s16 () +{ + int16x4_t a; + float64x1_t b; + int16_t c[4] = { 0x2D18, 0x5444, 0x21FB, 0x4009 }; + float64_t d[1] = { PI_F64 }; + float64_t e[1]; + int i; + + a = vld1_s16 (c); + b = wrap_vreinterpret_f64_s16 (a); + vst1_f64 (e, b); + if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__)) + return 1; + return 0; +}; + +float64x1_t __attribute__ ((noinline)) +wrap_vreinterpret_f64_s32 (int32x2_t __a) +{ + return vreinterpret_f64_s32 (__a); +} + +int __attribute__ ((noinline)) +test_vreinterpret_f64_s32 () +{ + int32x2_t a; + float64x1_t b; + int32_t c[2] = { 0x54442D18, 0x400921FB }; + float64_t d[1] = { PI_F64 }; + float64_t e[1]; + int i; + + a = vld1_s32 (c); + b = wrap_vreinterpret_f64_s32 (a); + vst1_f64 (e, b); + if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__)) + return 1; + return 0; +}; + +float64x1_t __attribute__ ((noinline)) +wrap_vreinterpret_f64_s64 (int64x1_t __a) +{ + return vreinterpret_f64_s64 (__a); +} + +int __attribute__ ((noinline)) +test_vreinterpret_f64_s64 () +{ + int64x1_t a; + float64x1_t b; + int64_t c[1] = { 0x400921FB54442D18 }; + float64_t d[1] = { PI_F64 }; + float64_t e[1]; + + a = vld1_s64 (c); + b = wrap_vreinterpret_f64_s64 (a); + vst1_f64 (e, b); + if (!DOUBLE_EQUALS (d[0], e[0], __DBL_EPSILON__)) + return 1; + return 0; +}; + +float64x2_t __attribute__ ((noinline)) +wrap_vreinterpretq_f64_f32 (float32x4_t __a) +{ + return vreinterpretq_f64_f32 (__a); +} + +int __attribute__ ((noinline)) +test_vreinterpretq_f64_f32 () +{ + float32x4_t a; + float64x2_t b; + /* Values corresponding to f32 reinterpret of + { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A }. */ + float32_t c[4] = { 3.3702805504E12, + 2.1426990032196044921875E0, + -2.8569523269651966444143014594E-32, + 2.089785099029541015625E0 }; + + float64_t d[2] = { PI_F64, E_F64 }; + float64_t e[2]; + int i; + + a = vld1q_f32 (c); + b = wrap_vreinterpretq_f64_f32 (a); + vst1q_f64 (e, b); + for (i = 0; i < 2; i++) + if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__)) + return 1; + return 0; +}; + +float64x2_t __attribute__ ((noinline)) +wrap_vreinterpretq_f64_s8 (int8x16_t __a) +{ + return vreinterpretq_f64_s8 (__a); +} + +int __attribute__ ((noinline)) +test_vreinterpretq_f64_s8 () +{ + int8x16_t a; + float64x2_t b; + int8_t c[16] = { 0x18, 0x2D, 0x44, 0x54, 0xFB, 0x21, 0x09, 0x40, + 0x69, 0x57, 0x14, 0x8B, 0x0A, 0xBF, 0x05, 0x40 }; + float64_t d[2] = { PI_F64, E_F64 }; + float64_t e[2]; + int i; + + a = vld1q_s8 (c); + b = wrap_vreinterpretq_f64_s8 (a); + vst1q_f64 (e, b); + for (i = 0; i < 2; i++) + if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__)) + return 1; + return 0; +}; + +float64x2_t __attribute__ ((noinline)) +wrap_vreinterpretq_f64_s16 (int16x8_t __a) +{ + return vreinterpretq_f64_s16 (__a); +} + +int __attribute__ ((noinline)) +test_vreinterpretq_f64_s16 () +{ + int16x8_t a; + float64x2_t b; + int16_t c[8] = { 0x2D18, 0x5444, 0x21FB, 0x4009, + 0x5769, 0x8B14, 0xBF0A, 0x4005 }; + float64_t d[2] = { PI_F64, E_F64 }; + float64_t e[2]; + int i; + + a = vld1q_s16 (c); + b = wrap_vreinterpretq_f64_s16 (a); + vst1q_f64 (e, b); + for (i = 0; i < 2; i++) + if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__)) + return 1; + return 0; +}; + +float64x2_t __attribute__ ((noinline)) +wrap_vreinterpretq_f64_s32 (int32x4_t __a) +{ + return vreinterpretq_f64_s32 (__a); +} + +int __attribute__ ((noinline)) +test_vreinterpretq_f64_s32 () +{ + int32x4_t a; + float64x2_t b; + int32_t c[4] = { 0x54442D18, 0x400921FB, 0x8B145769, 0x4005BF0A }; + float64_t d[2] = { PI_F64, E_F64 }; + float64_t e[2]; + int i; + + a = vld1q_s32 (c); + b = wrap_vreinterpretq_f64_s32 (a); + vst1q_f64 (e, b); + for (i = 0; i < 2; i++) + if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__)) + return 1; + return 0; +}; + +float64x2_t __attribute__ ((noinline)) +wrap_vreinterpretq_f64_s64 (int64x2_t __a) +{ + return vreinterpretq_f64_s64 (__a); +} + +int __attribute__ ((noinline)) +test_vreinterpretq_f64_s64 () +{ + int64x2_t a; + float64x2_t b; + int64_t c[2] = { 0x400921FB54442D18, 0x4005BF0A8B145769 }; + float64_t d[2] = { PI_F64, E_F64 }; + float64_t e[2]; + int i; + + a = vld1q_s64 (c); + b = wrap_vreinterpretq_f64_s64 (a); + vst1q_f64 (e, b); + for (i = 0; i < 2; i++) + if (!DOUBLE_EQUALS (d[i], e[i], __DBL_EPSILON__)) + return 1; + return 0; +}; + +int +main (int argc, char **argv) +{ + if (test_vreinterpret_f32_f64 ()) + abort (); + + if (test_vreinterpret_s8_f64 ()) + abort (); + if (test_vreinterpret_s16_f64 ()) + abort (); + if (test_vreinterpret_s32_f64 ()) + abort (); + if (test_vreinterpret_s64_f64 ()) + abort (); + + if (test_vreinterpretq_f32_f64 ()) + abort (); + + if (test_vreinterpretq_s8_f64 ()) + abort (); + if (test_vreinterpretq_s16_f64 ()) + abort (); + if (test_vreinterpretq_s32_f64 ()) + abort (); + if (test_vreinterpretq_s64_f64 ()) + abort (); + + if (test_vreinterpret_f64_f32 ()) + abort (); + + if (test_vreinterpret_f64_s8 ()) + abort (); + if (test_vreinterpret_f64_s16 ()) + abort (); + if (test_vreinterpret_f64_s32 ()) + abort (); + if (test_vreinterpret_f64_s64 ()) + abort (); + + if (test_vreinterpretq_f64_f32 ()) + abort (); + + if (test_vreinterpretq_f64_s8 ()) + abort (); + if (test_vreinterpretq_f64_s16 ()) + abort (); + if (test_vreinterpretq_f64_s32 ()) + abort (); + if (test_vreinterpretq_f64_s64 ()) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16.x @@ -0,0 +1,114 @@ +extern void abort (void); + +int16x8_t +test_vextq_s16_1 (int16x8_t a, int16x8_t b) +{ + return vextq_s16 (a, b, 1); +} + +int16x8_t +test_vextq_s16_2 (int16x8_t a, int16x8_t b) +{ + return vextq_s16 (a, b, 2); +} + +int16x8_t +test_vextq_s16_3 (int16x8_t a, int16x8_t b) +{ + return vextq_s16 (a, b, 3); +} + +int16x8_t +test_vextq_s16_4 (int16x8_t a, int16x8_t b) +{ + return vextq_s16 (a, b, 4); +} + +int16x8_t +test_vextq_s16_5 (int16x8_t a, int16x8_t b) +{ + return vextq_s16 (a, b, 5); +} + +int16x8_t +test_vextq_s16_6 (int16x8_t a, int16x8_t b) +{ + return vextq_s16 (a, b, 6); +} + +int16x8_t +test_vextq_s16_7 (int16x8_t a, int16x8_t b) +{ + return vextq_s16 (a, b, 7); +} + +int +main (int argc, char **argv) +{ + int i, off; + int16_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; + int16x8_t in1 = vld1q_s16 (arr1); + int16_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; + int16x8_t in2 = vld1q_s16 (arr2); + int16_t exp[8]; + int16x8_t expected; + int16x8_t actual = test_vextq_s16_1 (in1, in2); + + for (i = 0; i < 8; i++) + exp[i] = i + 1; + expected = vld1q_s16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s16_2 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 2; + expected = vld1q_s16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s16_3 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 3; + expected = vld1q_s16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s16_4 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 4; + expected = vld1q_s16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s16_5 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 5; + expected = vld1q_s16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s16_6 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 6; + expected = vld1q_s16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s16_7 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 7; + expected = vld1q_s16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8.x @@ -0,0 +1,114 @@ +extern void abort (void); + +uint8x8_t +test_vext_u8_1 (uint8x8_t a, uint8x8_t b) +{ + return vext_u8 (a, b, 1); +} + +uint8x8_t +test_vext_u8_2 (uint8x8_t a, uint8x8_t b) +{ + return vext_u8 (a, b, 2); +} + +uint8x8_t +test_vext_u8_3 (uint8x8_t a, uint8x8_t b) +{ + return vext_u8 (a, b, 3); +} + +uint8x8_t +test_vext_u8_4 (uint8x8_t a, uint8x8_t b) +{ + return vext_u8 (a, b, 4); +} + +uint8x8_t +test_vext_u8_5 (uint8x8_t a, uint8x8_t b) +{ + return vext_u8 (a, b, 5); +} + +uint8x8_t +test_vext_u8_6 (uint8x8_t a, uint8x8_t b) +{ + return vext_u8 (a, b, 6); +} + +uint8x8_t +test_vext_u8_7 (uint8x8_t a, uint8x8_t b) +{ + return vext_u8 (a, b, 7); +} + +int +main (int argc, char **argv) +{ + int i, off; + uint8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; + uint8x8_t in1 = vld1_u8 (arr1); + uint8_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; + uint8x8_t in2 = vld1_u8 (arr2); + uint8_t exp[8]; + uint8x8_t expected; + uint8x8_t actual = test_vext_u8_1 (in1, in2); + + for (i = 0; i < 8; i++) + exp[i] = i + 1; + expected = vld1_u8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_u8_2 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 2; + expected = vld1_u8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_u8_3 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 3; + expected = vld1_u8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_u8_4 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 4; + expected = vld1_u8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_u8_5 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 5; + expected = vld1_u8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_u8_6 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 6; + expected = vld1_u8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_u8_7 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 7; + expected = vld1_u8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16.x @@ -0,0 +1,114 @@ +extern void abort (void); + +uint16x8_t +test_vextq_u16_1 (uint16x8_t a, uint16x8_t b) +{ + return vextq_u16 (a, b, 1); +} + +uint16x8_t +test_vextq_u16_2 (uint16x8_t a, uint16x8_t b) +{ + return vextq_u16 (a, b, 2); +} + +uint16x8_t +test_vextq_u16_3 (uint16x8_t a, uint16x8_t b) +{ + return vextq_u16 (a, b, 3); +} + +uint16x8_t +test_vextq_u16_4 (uint16x8_t a, uint16x8_t b) +{ + return vextq_u16 (a, b, 4); +} + +uint16x8_t +test_vextq_u16_5 (uint16x8_t a, uint16x8_t b) +{ + return vextq_u16 (a, b, 5); +} + +uint16x8_t +test_vextq_u16_6 (uint16x8_t a, uint16x8_t b) +{ + return vextq_u16 (a, b, 6); +} + +uint16x8_t +test_vextq_u16_7 (uint16x8_t a, uint16x8_t b) +{ + return vextq_u16 (a, b, 7); +} + +int +main (int argc, char **argv) +{ + int i, off; + uint16_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; + uint16x8_t in1 = vld1q_u16 (arr1); + uint16_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; + uint16x8_t in2 = vld1q_u16 (arr2); + uint16_t exp[8]; + uint16x8_t expected; + uint16x8_t actual = test_vextq_u16_1 (in1, in2); + + for (i = 0; i < 8; i++) + exp[i] = i + 1; + expected = vld1q_u16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u16_2 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 2; + expected = vld1q_u16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u16_3 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 3; + expected = vld1q_u16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u16_4 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 4; + expected = vld1q_u16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u16_5 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 5; + expected = vld1q_u16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u16_6 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 6; + expected = vld1q_u16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u16_7 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 7; + expected = vld1q_u16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16_1.c @@ -0,0 +1,11 @@ +/* Test the `vzip_s16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vzips16.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16.x @@ -0,0 +1,26 @@ +extern void abort (void); + +int16x8x2_t +test_vuzpqs16 (int16x8_t _a, int16x8_t _b) +{ + return vuzpq_s16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + int16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + int16x8x2_t result = test_vuzpqs16 (vld1q_s16 (first), vld1q_s16 (second)); + int16_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; + int16_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; + int16x8_t expect1 = vld1q_s16 (exp1); + int16x8_t expect2 = vld1q_s16 (exp2); + + for (i = 0; i < 8; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8_1.c @@ -0,0 +1,11 @@ +/* Test the `vzipq_s8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vzipqs8.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev64q_p8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev64qp8.x" + +/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16_1.c @@ -0,0 +1,11 @@ +/* Test the `vtrn_u16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vtrnu16.x" + +/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16.x @@ -0,0 +1,26 @@ +extern void abort (void); + +uint16x8x2_t +test_vuzpqu16 (uint16x8_t _a, uint16x8_t _b) +{ + return vuzpq_u16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + uint16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + uint16x8x2_t result = test_vuzpqu16 (vld1q_u16 (first), vld1q_u16 (second)); + uint16_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; + uint16_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; + uint16x8_t expect1 = vld1q_u16 (exp1); + uint16x8_t expect2 = vld1q_u16 (exp2); + + for (i = 0; i < 8; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8.x @@ -0,0 +1,26 @@ +extern void abort (void); + +uint8x8x2_t +test_vuzpu8 (uint8x8_t _a, uint8x8_t _b) +{ + return vuzp_u8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + uint8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + uint8x8x2_t result = test_vuzpu8 (vld1_u8 (first), vld1_u8 (second)); + uint8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; + uint8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; + uint8x8_t expect1 = vld1_u8 (exp1); + uint8x8_t expect2 = vld1_u8 (exp2); + + for (i = 0; i < 8; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16_1.c @@ -0,0 +1,10 @@ +/* Test the `vextu16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "ext_u16.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8_1.c @@ -0,0 +1,10 @@ +/* Test the `vextQu8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "extq_u8.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#?\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8.x @@ -0,0 +1,22 @@ +extern void abort (void); + +uint8x16_t +test_vrev64qu8 (uint8x16_t _arg) +{ + return vrev64q_u8 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + uint8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + uint8x16_t reversed = test_vrev64qu8 (inorder); + uint8x16_t expected = {8, 7, 6, 5, 4, 3, 2, 1, 16, 15, 14, 13, 12, 11, 10, 9}; + + for (i = 0; i < 16; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev32_p8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev32p8.x" + +/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64.x @@ -0,0 +1,17 @@ +extern void abort (void); + +int +main (int argc, char **argv) +{ + int i, off; + int64_t arr1[] = {0}; + int64x1_t in1 = vld1_s64 (arr1); + int64_t arr2[] = {1}; + int64x1_t in2 = vld1_s64 (arr2); + int64x1_t actual = vext_s64 (in1, in2, 0); + if (actual != in1) + abort (); + + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32.x @@ -0,0 +1,26 @@ +extern void abort (void); + +int32x2x2_t +test_vuzps32 (int32x2_t _a, int32x2_t _b) +{ + return vuzp_s32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int32_t first[] = {1, 2}; + int32_t second[] = {3, 4}; + int32x2x2_t result = test_vuzps32 (vld1_s32 (first), vld1_s32 (second)); + int32_t exp1[] = {1, 3}; + int32_t exp2[] = {2, 4}; + int32x2_t expect1 = vld1_s32 (exp1); + int32x2_t expect2 = vld1_s32 (exp2); + + for (i = 0; i < 2; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32.x @@ -0,0 +1,26 @@ +extern void abort (void); + +uint32x2x2_t +test_vuzpu32 (uint32x2_t _a, uint32x2_t _b) +{ + return vuzp_u32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint32_t first[] = {1, 2}; + uint32_t second[] = {3, 4}; + uint32x2x2_t result = test_vuzpu32 (vld1_u32 (first), vld1_u32 (second)); + uint32_t exp1[] = {1, 3}; + uint32_t exp2[] = {2, 4}; + uint32x2_t expect1 = vld1_u32 (exp1); + uint32x2_t expect2 = vld1_u32 (exp2); + + for (i = 0; i < 2; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64.x @@ -0,0 +1,17 @@ +extern void abort (void); + +int +main (int argc, char **argv) +{ + int i, off; + uint64_t arr1[] = {0}; + uint64x1_t in1 = vld1_u64 (arr1); + uint64_t arr2[] = {1}; + uint64x1_t in2 = vld1_u64 (arr2); + uint64x1_t actual = vext_u64 (in1, in2, 0); + if (actual != in1) + abort (); + + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8_1.c @@ -0,0 +1,11 @@ +/* Test the `vtrn_s8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vtrns8.x" + +/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16_1.c @@ -0,0 +1,11 @@ +/* Test the `vtrnq_s16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vtrnqs16.x" + +/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev64q_s32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev64qs32.x" + +/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4s, ?v\[0-9\]+.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev64_s8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev64s8.x" + +/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16.x @@ -0,0 +1,27 @@ +extern void abort (void); + +int16x8x2_t +test_vzipqs16 (int16x8_t _a, int16x8_t _b) +{ + return vzipq_s16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + int16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + int16x8x2_t result = test_vzipqs16 (vld1q_s16 (first), vld1q_s16 (second)); + int16x8_t res1 = result.val[0], res2 = result.val[1]; + int16_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; + int16_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; + int16x8_t expected1 = vld1q_s16 (exp1); + int16x8_t expected2 = vld1q_s16 (exp2); + + for (i = 0; i < 8; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32.x @@ -0,0 +1,27 @@ +extern void abort (void); + +float32x2x2_t +test_vzipf32 (float32x2_t _a, float32x2_t _b) +{ + return vzip_f32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + float32_t first[] = {1, 2}; + float32_t second[] = {3, 4}; + float32x2x2_t result = test_vzipf32 (vld1_f32 (first), vld1_f32 (second)); + float32x2_t res1 = result.val[0], res2 = result.val[1]; + float32_t exp1[] = {1, 3}; + float32_t exp2[] = {2, 4}; + float32x2_t expected1 = vld1_f32 (exp1); + float32x2_t expected2 = vld1_f32 (exp2); + + for (i = 0; i < 2; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8.x @@ -0,0 +1,27 @@ +extern void abort (void); + +uint8x8x2_t +test_vzipu8 (uint8x8_t _a, uint8x8_t _b) +{ + return vzip_u8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + uint8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + uint8x8x2_t result = test_vzipu8 (vld1_u8 (first), vld1_u8 (second)); + uint8x8_t res1 = result.val[0], res2 = result.val[1]; + uint8_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; + uint8_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; + uint8x8_t expected1 = vld1_u8 (exp1); + uint8x8_t expected2 = vld1_u8 (exp2); + + for (i = 0; i < 8; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16.x @@ -0,0 +1,27 @@ +extern void abort (void); + +uint16x8x2_t +test_vzipqu16 (uint16x8_t _a, uint16x8_t _b) +{ + return vzipq_u16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + uint16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + uint16x8x2_t result = test_vzipqu16 (vld1q_u16 (first), vld1q_u16 (second)); + uint16x8_t res1 = result.val[0], res2 = result.val[1]; + uint16_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; + uint16_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; + uint16x8_t expected1 = vld1q_u16 (exp1); + uint16x8_t expected2 = vld1q_u16 (exp2); + + for (i = 0; i < 8; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s16_1.c @@ -0,0 +1,10 @@ +/* Test the `vextQs16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "extq_s16.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzpq_p16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpqp16.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8.x @@ -0,0 +1,114 @@ +extern void abort (void); + +poly8x8_t +test_vext_p8_1 (poly8x8_t a, poly8x8_t b) +{ + return vext_p8 (a, b, 1); +} + +poly8x8_t +test_vext_p8_2 (poly8x8_t a, poly8x8_t b) +{ + return vext_p8 (a, b, 2); +} + +poly8x8_t +test_vext_p8_3 (poly8x8_t a, poly8x8_t b) +{ + return vext_p8 (a, b, 3); +} + +poly8x8_t +test_vext_p8_4 (poly8x8_t a, poly8x8_t b) +{ + return vext_p8 (a, b, 4); +} + +poly8x8_t +test_vext_p8_5 (poly8x8_t a, poly8x8_t b) +{ + return vext_p8 (a, b, 5); +} + +poly8x8_t +test_vext_p8_6 (poly8x8_t a, poly8x8_t b) +{ + return vext_p8 (a, b, 6); +} + +poly8x8_t +test_vext_p8_7 (poly8x8_t a, poly8x8_t b) +{ + return vext_p8 (a, b, 7); +} + +int +main (int argc, char **argv) +{ + int i, off; + poly8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; + poly8x8_t in1 = vld1_p8 (arr1); + poly8_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; + poly8x8_t in2 = vld1_p8 (arr2); + poly8_t exp[8]; + poly8x8_t expected; + poly8x8_t actual = test_vext_p8_1 (in1, in2); + + for (i = 0; i < 8; i++) + exp[i] = i + 1; + expected = vld1_p8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_p8_2 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 2; + expected = vld1_p8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_p8_3 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 3; + expected = vld1_p8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_p8_4 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 4; + expected = vld1_p8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_p8_5 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 5; + expected = vld1_p8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_p8_6 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 6; + expected = vld1_p8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_p8_7 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 7; + expected = vld1_p8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzpq_u32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpqu32.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8.x @@ -0,0 +1,26 @@ +extern void abort (void); + +poly8x8x2_t +test_vuzpp8 (poly8x8_t _a, poly8x8_t _b) +{ + return vuzp_p8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + poly8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + poly8x8x2_t result = test_vuzpp8 (vld1_p8 (first), vld1_p8 (second)); + poly8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; + poly8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; + poly8x8_t expect1 = vld1_p8 (exp1); + poly8x8_t expect2 = vld1_p8 (exp2); + + for (i = 0; i < 8; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev32_s16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev32s16.x" + +/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8_1.c @@ -0,0 +1,11 @@ +/* Test the `vzipq_p8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vzipqp8.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev32q_s8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev32qs8.x" + +/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev64_s32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev64s32.x" + +/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.2s, ?v\[0-9\]+.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/simd.exp +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/simd.exp @@ -0,0 +1,45 @@ +# Specific regression driver for AArch64 SIMD instructions. +# Copyright (C) 2014 Free Software Foundation, Inc. +# Contributed by ARM Ltd. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . */ + +# GCC testsuite that uses the `dg.exp' driver. + +# Exit immediately if this isn't an AArch64 target. +if {![istarget aarch64*-*-*] } then { + return +} + +# Load support procs. +load_lib gcc-dg.exp + +# If a testcase doesn't have special options, use these. +global DEFAULT_CFLAGS +if ![info exists DEFAULT_CFLAGS] then { + set DEFAULT_CFLAGS " -ansi -pedantic-errors" +} + +# Initialize `dg'. +dg-init + +# Main loop. +dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \ + "" $DEFAULT_CFLAGS + +# All done. +dg-finish --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16.x @@ -0,0 +1,27 @@ +extern void abort (void); + +int16x4x2_t +test_vtrns16 (int16x4_t _a, int16x4_t _b) +{ + return vtrn_s16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int16_t first[] = {1, 2, 3, 4}; + int16_t second[] = {5, 6, 7, 8}; + int16x4x2_t result = test_vtrns16 (vld1_s16 (first), vld1_s16 (second)); + int16x4_t res1 = result.val[0], res2 = result.val[1]; + int16_t exp1[] = {1, 5, 3, 7}; + int16_t exp2[] = {2, 6, 4, 8}; + int16x4_t expected1 = vld1_s16 (exp1); + int16x4_t expected2 = vld1_s16 (exp2); + + for (i = 0; i < 4; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu8_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev64q_u8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev64qu8.x" + +/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp8.x @@ -0,0 +1,22 @@ +extern void abort (void); + +poly8x16_t +test_vrev64qp8 (poly8x16_t _arg) +{ + return vrev64q_p8 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + poly8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + poly8x16_t reversed = test_vrev64qp8 (inorder); + poly8x16_t expected = {8, 7, 6, 5, 4, 3, 2, 1, 16, 15, 14, 13, 12, 11, 10, 9}; + + for (i = 0; i < 16; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu16.x @@ -0,0 +1,27 @@ +extern void abort (void); + +uint16x4x2_t +test_vtrnu16 (uint16x4_t _a, uint16x4_t _b) +{ + return vtrn_u16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint16_t first[] = {1, 2, 3, 4}; + uint16_t second[] = {5, 6, 7, 8}; + uint16x4x2_t result = test_vtrnu16 (vld1_u16 (first), vld1_u16 (second)); + uint16x4_t res1 = result.val[0], res2 = result.val[1]; + uint16_t exp1[] = {1, 5, 3, 7}; + uint16_t exp2[] = {2, 6, 4, 8}; + uint16x4_t expected1 = vld1_u16 (exp1); + uint16x4_t expected2 = vld1_u16 (exp2); + + for (i = 0; i < 4; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16.x @@ -0,0 +1,58 @@ +extern void abort (void); + +poly16x4_t +test_vext_p16_1 (poly16x4_t a, poly16x4_t b) +{ + return vext_p16 (a, b, 1); +} + +poly16x4_t +test_vext_p16_2 (poly16x4_t a, poly16x4_t b) +{ + return vext_p16 (a, b, 2); +} + +poly16x4_t +test_vext_p16_3 (poly16x4_t a, poly16x4_t b) +{ + return vext_p16 (a, b, 3); +} + +int +main (int argc, char **argv) +{ + int i, off; + poly16_t arr1[] = {0, 1, 2, 3}; + poly16x4_t in1 = vld1_p16 (arr1); + poly16_t arr2[] = {4, 5, 6, 7}; + poly16x4_t in2 = vld1_p16 (arr2); + poly16_t exp[4]; + poly16x4_t expected; + poly16x4_t actual = test_vext_p16_1 (in1, in2); + + for (i = 0; i < 4; i++) + exp[i] = i + 1; + expected = vld1_p16 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_p16_2 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 2; + expected = vld1_p16 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_p16_3 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 3; + expected = vld1_p16 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzp_p16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpp16.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8.x @@ -0,0 +1,29 @@ +extern void abort (void); + +uint8x16x2_t +test_vzipqu8 (uint8x16_t _a, uint8x16_t _b) +{ + return vzipq_u8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + uint8_t second[] = + {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; + uint8x16x2_t result = test_vzipqu8 (vld1q_u8 (first), vld1q_u8 (second)); + uint8x16_t res1 = result.val[0], res2 = result.val[1]; + uint8_t exp1[] = {1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24}; + uint8_t exp2[] = + {9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, 16, 32}; + uint8x16_t expected1 = vld1q_u8 (exp1); + uint8x16_t expected2 = vld1q_u8 (exp2); + + for (i = 0; i < 16; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u64_1.c @@ -0,0 +1,11 @@ +/* Test the `vextu64' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "ext_u64.x" + +/* Do not scan-assembler. An EXT instruction could be emitted, but would merely + return its first argument, so it is legitimate to optimize it out. */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu32_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzp_u32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpu32.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev32q_p16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev32qp16.x" + +/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32.x @@ -0,0 +1,58 @@ +extern void abort (void); + +float32x4_t +test_vextq_f32_1 (float32x4_t a, float32x4_t b) +{ + return vextq_f32 (a, b, 1); +} + +float32x4_t +test_vextq_f32_2 (float32x4_t a, float32x4_t b) +{ + return vextq_f32 (a, b, 2); +} + +float32x4_t +test_vextq_f32_3 (float32x4_t a, float32x4_t b) +{ + return vextq_f32 (a, b, 3); +} + +int +main (int argc, char **argv) +{ + int i, off; + float32_t arr1[] = {0, 1, 2, 3}; + float32x4_t in1 = vld1q_f32 (arr1); + float32_t arr2[] = {4, 5, 6, 7}; + float32x4_t in2 = vld1q_f32 (arr2); + float32_t exp[4]; + float32x4_t expected; + float32x4_t actual = test_vextq_f32_1 (in1, in2); + + for (i = 0; i < 4; i++) + exp[i] = i + 1; + expected = vld1q_f32 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_f32_2 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 2; + expected = vld1q_f32 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_f32_3 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 3; + expected = vld1q_f32 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16_1.c @@ -0,0 +1,11 @@ +/* Test the `vzipq_p16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vzipqp16.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8_1.c @@ -0,0 +1,11 @@ +/* Test the `vtrn_p8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vtrnp8.x" + +/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u8.x @@ -0,0 +1,227 @@ +extern void abort (void); + +uint8x16_t +test_vextq_u8_1 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 1); +} + +uint8x16_t +test_vextq_u8_2 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 2); +} + +uint8x16_t +test_vextq_u8_3 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 3); +} + +uint8x16_t +test_vextq_u8_4 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 4); +} + +uint8x16_t +test_vextq_u8_5 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 5); +} + +uint8x16_t +test_vextq_u8_6 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 6); +} + +uint8x16_t +test_vextq_u8_7 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 7); +} + +uint8x16_t +test_vextq_u8_8 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 8); +} + +uint8x16_t +test_vextq_u8_9 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 9); +} + +uint8x16_t +test_vextq_u8_10 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 10); +} + +uint8x16_t +test_vextq_u8_11 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 11); +} + +uint8x16_t +test_vextq_u8_12 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 12); +} + +uint8x16_t +test_vextq_u8_13 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 13); +} + +uint8x16_t +test_vextq_u8_14 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 14); +} + +uint8x16_t +test_vextq_u8_15 (uint8x16_t a, uint8x16_t b) +{ + return vextq_u8 (a, b, 15); +} + +int +main (int argc, char **argv) +{ + int i; + uint8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + uint8x16_t in1 = vld1q_u8 (arr1); + uint8_t arr2[] = + {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; + uint8x16_t in2 = vld1q_u8 (arr2); + uint8_t exp[16]; + uint8x16_t expected; + uint8x16_t actual = test_vextq_u8_1 (in1, in2); + + for (i = 0; i < 16; i++) + exp[i] = i + 1; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_2 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 2; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_3 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 3; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_4 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 4; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_5 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 5; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_6 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 6; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_7 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 7; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_8 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 8; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_9 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 9; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_10 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 10; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_11 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 11; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_12 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 12; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_13 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 13; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_14 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 14; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u8_15 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 15; + expected = vld1q_u8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32_1.c @@ -0,0 +1,11 @@ +/* Test the `vzipq_u32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vzipqu32.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev64_p8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev64p8.x" + +/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev32_u8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev32u8.x" + +/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev16_s8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev16s8.x" + +/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32.x @@ -0,0 +1,26 @@ +extern void abort (void); + +float32x4x2_t +test_vuzpqf32 (float32x4_t _a, float32x4_t _b) +{ + return vuzpq_f32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + float32_t first[] = {1, 2, 3, 4}; + float32_t second[] = {5, 6, 7, 8}; + float32x4x2_t result = test_vuzpqf32 (vld1q_f32 (first), vld1q_f32 (second)); + float32_t exp1[] = {1, 3, 5, 7}; + float32_t exp2[] = {2, 4, 6, 8}; + float32x4_t expect1 = vld1q_f32 (exp1); + float32x4_t expect2 = vld1q_f32 (exp2); + + for (i = 0; i < 4; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8.x @@ -0,0 +1,27 @@ +extern void abort (void); + +poly8x8x2_t +test_vzipp8 (poly8x8_t _a, poly8x8_t _b) +{ + return vzip_p8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + poly8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + poly8x8x2_t result = test_vzipp8 (vld1_p8 (first), vld1_p8 (second)); + poly8x8_t res1 = result.val[0], res2 = result.val[1]; + poly8_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; + poly8_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; + poly8x8_t expected1 = vld1_p8 (exp1); + poly8x8_t expected2 = vld1_p8 (exp2); + + for (i = 0; i < 8; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32.x @@ -0,0 +1,27 @@ +extern void abort (void); + +int32x4x2_t +test_vtrnqs32 (int32x4_t _a, int32x4_t _b) +{ + return vtrnq_s32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int32_t first[] = {1, 2, 3, 4}; + int32_t second[] = {5, 6, 7, 8}; + int32x4x2_t result = test_vtrnqs32 (vld1q_s32 (first), vld1q_s32 (second)); + int32x4_t res1 = result.val[0], res2 = result.val[1]; + int32_t exp1[] = {1, 5, 3, 7}; + int32_t exp2[] = {2, 6, 4, 8}; + int32x4_t expected1 = vld1q_s32 (exp1); + int32x4_t expected2 = vld1q_s32 (exp2); + + for (i = 0; i < 4; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32.x @@ -0,0 +1,27 @@ +extern void abort (void); + +uint32x4x2_t +test_vtrnqu32 (uint32x4_t _a, uint32x4_t _b) +{ + return vtrnq_u32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint32_t first[] = {1, 2, 3, 4}; + uint32_t second[] = {5, 6, 7, 8}; + uint32x4x2_t result = test_vtrnqu32 (vld1q_u32 (first), vld1q_u32 (second)); + uint32x4_t res1 = result.val[0], res2 = result.val[1]; + uint32_t exp1[] = {1, 5, 3, 7}; + uint32_t exp2[] = {2, 6, 4, 8}; + uint32x4_t expected1 = vld1q_u32 (exp1); + uint32x4_t expected2 = vld1q_u32 (exp2); + + for (i = 0; i < 4; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs32.x @@ -0,0 +1,22 @@ +extern void abort (void); + +int32x4_t +test_vrev64qs32 (int32x4_t _arg) +{ + return vrev64q_s32 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + int32x4_t inorder = {1, 2, 3, 4}; + int32x4_t reversed = test_vrev64qs32 (inorder); + int32x4_t expected = {2, 1, 4, 3}; + + for (i = 0; i < 4; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8.x @@ -0,0 +1,27 @@ +extern void abort (void); + +uint8x8x2_t +test_vtrnu8 (uint8x8_t _a, uint8x8_t _b) +{ + return vtrn_u8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + uint8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + uint8x8x2_t result = test_vtrnu8 (vld1_u8 (first), vld1_u8 (second)); + uint8x8_t res1 = result.val[0], res2 = result.val[1]; + uint8_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15}; + uint8_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16}; + uint8x8_t expected1 = vld1_u8 (exp1); + uint8x8_t expected2 = vld1_u8 (exp2); + + for (i = 0; i < 8; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32.x @@ -0,0 +1,22 @@ +extern void abort (void); + +uint32x4_t +test_vrev64qu32 (uint32x4_t _arg) +{ + return vrev64q_u32 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + uint32x4_t inorder = {1, 2, 3, 4}; + uint32x4_t reversed = test_vrev64qu32 (inorder); + uint32x4_t expected = {2, 1, 4, 3}; + + for (i = 0; i < 4; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64_1.c @@ -0,0 +1,10 @@ +/* Test the `vextQs64' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "extq_s64.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.8\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8.x @@ -0,0 +1,114 @@ +extern void abort (void); + +int8x8_t +test_vext_s8_1 (int8x8_t a, int8x8_t b) +{ + return vext_s8 (a, b, 1); +} + +int8x8_t +test_vext_s8_2 (int8x8_t a, int8x8_t b) +{ + return vext_s8 (a, b, 2); +} + +int8x8_t +test_vext_s8_3 (int8x8_t a, int8x8_t b) +{ + return vext_s8 (a, b, 3); +} + +int8x8_t +test_vext_s8_4 (int8x8_t a, int8x8_t b) +{ + return vext_s8 (a, b, 4); +} + +int8x8_t +test_vext_s8_5 (int8x8_t a, int8x8_t b) +{ + return vext_s8 (a, b, 5); +} + +int8x8_t +test_vext_s8_6 (int8x8_t a, int8x8_t b) +{ + return vext_s8 (a, b, 6); +} + +int8x8_t +test_vext_s8_7 (int8x8_t a, int8x8_t b) +{ + return vext_s8 (a, b, 7); +} + +int +main (int argc, char **argv) +{ + int i, off; + int8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; + int8x8_t in1 = vld1_s8 (arr1); + int8_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; + int8x8_t in2 = vld1_s8 (arr2); + int8_t exp[8]; + int8x8_t expected; + int8x8_t actual = test_vext_s8_1 (in1, in2); + + for (i = 0; i < 8; i++) + exp[i] = i + 1; + expected = vld1_s8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_s8_2 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 2; + expected = vld1_s8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_s8_3 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 3; + expected = vld1_s8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_s8_4 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 4; + expected = vld1_s8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_s8_5 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 5; + expected = vld1_s8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_s8_6 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 6; + expected = vld1_s8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_s8_7 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 7; + expected = vld1_s8 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32_1.c @@ -0,0 +1,11 @@ +/* Test the `vzip_s32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vzips32.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev32q_p8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev32qp8.x" + +/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16_1.c @@ -0,0 +1,11 @@ +/* Test the `vtrn_p16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vtrnp16.x" + +/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32_1.c @@ -0,0 +1,11 @@ +/* Test the `vtrn_u32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vtrnu32.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8.x @@ -0,0 +1,26 @@ +extern void abort (void); + +int8x8x2_t +test_vuzps8 (int8x8_t _a, int8x8_t _b) +{ + return vuzp_s8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + int8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + int8x8x2_t result = test_vuzps8 (vld1_s8 (first), vld1_s8 (second)); + int8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; + int8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; + int8x8_t expect1 = vld1_s8 (exp1); + int8x8_t expect2 = vld1_s8 (exp2); + + for (i = 0; i < 8; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu8_1.c @@ -0,0 +1,11 @@ +/* Test the `vzipq_u8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vzipqu8.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp8.x @@ -0,0 +1,29 @@ +extern void abort (void); + +poly8x16x2_t +test_vzipqp8 (poly8x16_t _a, poly8x16_t _b) +{ + return vzipq_p8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + poly8_t second[] = + {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; + poly8x16x2_t result = test_vzipqp8 (vld1q_p8 (first), vld1q_p8 (second)); + poly8x16_t res1 = result.val[0], res2 = result.val[1]; + poly8_t exp1[] = {1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24}; + poly8_t exp2[] = + {9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, 16, 32}; + poly8x16_t expected1 = vld1q_p8 (exp1); + poly8x16_t expected2 = vld1q_p8 (exp2); + + for (i = 0; i < 16; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p16_1.c @@ -0,0 +1,10 @@ +/* Test the `vextp16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "ext_p16.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s16.x @@ -0,0 +1,22 @@ +extern void abort (void); + +int16x4_t +test_vrev32s16 (int16x4_t _arg) +{ + return vrev32_s16 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + int16x4_t inorder = {1, 2, 3, 4}; + int16x4_t reversed = test_vrev32s16 (inorder); + int16x4_t expected = {2, 1, 4, 3}; + + for (i = 0; i < 4; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16.x @@ -0,0 +1,22 @@ +extern void abort (void); + +uint16x4_t +test_vrev32u16 (uint16x4_t _arg) +{ + return vrev32_u16 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + uint16x4_t inorder = {1, 2, 3, 4}; + uint16x4_t reversed = test_vrev32u16 (inorder); + uint16x4_t expected = {2, 1, 4, 3}; + + for (i = 0; i < 4; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16.x @@ -0,0 +1,22 @@ +extern void abort (void); + +poly16x4_t +test_vrev64p16 (poly16x4_t _arg) +{ + return vrev64_p16 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + poly16x4_t inorder = {1, 2, 3, 4}; + poly16x4_t reversed = test_vrev64p16 (inorder); + poly16x4_t expected = {4, 3, 2, 1}; + + for (i = 0; i < 4; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev64q_f32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev64qf32.x" + +/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4s, ?v\[0-9\]+.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32.x @@ -0,0 +1,27 @@ +extern void abort (void); + +float32x4x2_t +test_vzipqf32 (float32x4_t _a, float32x4_t _b) +{ + return vzipq_f32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + float32_t first[] = {1, 2, 3, 4}; + float32_t second[] = {5, 6, 7, 8}; + float32x4x2_t result = test_vzipqf32 (vld1q_f32 (first), vld1q_f32 (second)); + float32x4_t res1 = result.val[0], res2 = result.val[1]; + float32_t exp1[] = {1, 5, 2, 6}; + float32_t exp2[] = {3, 7, 4, 8}; + float32x4_t expected1 = vld1q_f32 (exp1); + float32x4_t expected2 = vld1q_f32 (exp2); + + for (i = 0; i < 4; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32_1.c @@ -0,0 +1,10 @@ +/* Test the `vextu32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "ext_u32.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8.x @@ -0,0 +1,227 @@ +extern void abort (void); + +poly8x16_t +test_vextq_p8_1 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 1); +} + +poly8x16_t +test_vextq_p8_2 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 2); +} + +poly8x16_t +test_vextq_p8_3 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 3); +} + +poly8x16_t +test_vextq_p8_4 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 4); +} + +poly8x16_t +test_vextq_p8_5 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 5); +} + +poly8x16_t +test_vextq_p8_6 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 6); +} + +poly8x16_t +test_vextq_p8_7 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 7); +} + +poly8x16_t +test_vextq_p8_8 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 8); +} + +poly8x16_t +test_vextq_p8_9 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 9); +} + +poly8x16_t +test_vextq_p8_10 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 10); +} + +poly8x16_t +test_vextq_p8_11 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 11); +} + +poly8x16_t +test_vextq_p8_12 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 12); +} + +poly8x16_t +test_vextq_p8_13 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 13); +} + +poly8x16_t +test_vextq_p8_14 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 14); +} + +poly8x16_t +test_vextq_p8_15 (poly8x16_t a, poly8x16_t b) +{ + return vextq_p8 (a, b, 15); +} + +int +main (int argc, char **argv) +{ + int i; + poly8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + poly8x16_t in1 = vld1q_p8 (arr1); + poly8_t arr2[] = + {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; + poly8x16_t in2 = vld1q_p8 (arr2); + poly8_t exp[16]; + poly8x16_t expected; + poly8x16_t actual = test_vextq_p8_1 (in1, in2); + + for (i = 0; i < 16; i++) + exp[i] = i + 1; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_2 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 2; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_3 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 3; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_4 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 4; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_5 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 5; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_6 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 6; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_7 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 7; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_8 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 8; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_9 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 9; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_10 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 10; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_11 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 11; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_12 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 12; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_13 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 13; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_14 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 14; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p8_15 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 15; + expected = vld1q_p8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8.x @@ -0,0 +1,22 @@ +extern void abort (void); + +int8x16_t +test_vrev64qs8 (int8x16_t _arg) +{ + return vrev64q_s8 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + int8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + int8x16_t reversed = test_vrev64qs8 (inorder); + int8x16_t expected = {8, 7, 6, 5, 4, 3, 2, 1, 16, 15, 14, 13, 12, 11, 10, 9}; + + for (i = 0; i < 16; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev16_p8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev16p8.x" + +/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs32_1.c @@ -0,0 +1,11 @@ +/* Test the `vtrnq_s32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vtrnqs32.x" + +/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16.x @@ -0,0 +1,26 @@ +extern void abort (void); + +int16x4x2_t +test_vuzps16 (int16x4_t _a, int16x4_t _b) +{ + return vuzp_s16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int16_t first[] = {1, 2, 3, 4}; + int16_t second[] = {5, 6, 7, 8}; + int16x4x2_t result = test_vuzps16 (vld1_s16 (first), vld1_s16 (second)); + int16_t exp1[] = {1, 3, 5, 7}; + int16_t exp2[] = {2, 4, 6, 8}; + int16x4_t expect1 = vld1_s16 (exp1); + int16x4_t expect2 = vld1_s16 (exp2); + + for (i = 0; i < 4; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16.x @@ -0,0 +1,26 @@ +extern void abort (void); + +uint16x4x2_t +test_vuzpu16 (uint16x4_t _a, uint16x4_t _b) +{ + return vuzp_u16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint16_t first[] = {1, 2, 3, 4}; + uint16_t second[] = {5, 6, 7, 8}; + uint16x4x2_t result = test_vuzpu16 (vld1_u16 (first), vld1_u16 (second)); + uint16_t exp1[] = {1, 3, 5, 7}; + uint16_t exp2[] = {2, 4, 6, 8}; + uint16x4_t expect1 = vld1_u16 (exp1); + uint16x4_t expect2 = vld1_u16 (exp2); + + for (i = 0; i < 4; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu8_1.c @@ -0,0 +1,11 @@ +/* Test the `vtrn_u8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vtrnu8.x" + +/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp8.x @@ -0,0 +1,27 @@ +extern void abort (void); + +poly8x8x2_t +test_vtrnp8 (poly8x8_t _a, poly8x8_t _b) +{ + return vtrn_p8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + poly8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + poly8x8x2_t result = test_vtrnp8 (vld1_p8 (first), vld1_p8 (second)); + poly8x8_t res1 = result.val[0], res2 = result.val[1]; + poly8_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15}; + poly8_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16}; + poly8x8_t expected1 = vld1_p8 (exp1); + poly8x8_t expected2 = vld1_p8 (exp2); + + for (i = 0; i < 8; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16.x @@ -0,0 +1,22 @@ +extern void abort (void); + +int16x8_t +test_vrev32qs16 (int16x8_t _arg) +{ + return vrev32q_s16 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + int16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; + int16x8_t reversed = test_vrev32qs16 (inorder); + int16x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7}; + + for (i = 0; i < 8; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev64_f32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev64f32.x" + +/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.2s, ?v\[0-9\]+.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8.x @@ -0,0 +1,27 @@ +extern void abort (void); + +int8x8x2_t +test_vzips8 (int8x8_t _a, int8x8_t _b) +{ + return vzip_s8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + int8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + int8x8x2_t result = test_vzips8 (vld1_s8 (first), vld1_s8 (second)); + int8x8_t res1 = result.val[0], res2 = result.val[1]; + int8_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; + int8_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; + int8x8_t expected1 = vld1_s8 (exp1); + int8x8_t expected2 = vld1_s8 (exp2); + + for (i = 0; i < 8; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32_1.c @@ -0,0 +1,10 @@ +/* Test the `vextQs32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "extq_s32.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16.x @@ -0,0 +1,22 @@ +extern void abort (void); + +uint16x8_t +test_vrev32qu16 (uint16x8_t _arg) +{ + return vrev32q_u16 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + uint16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; + uint16x8_t reversed = test_vrev32qu16 (inorder); + uint16x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7}; + + for (i = 0; i < 8; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev64q_u16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev64qu16.x" + +/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev64_u8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev64u8.x" + +/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32.x @@ -0,0 +1,27 @@ +extern void abort (void); + +float32x2x2_t +test_vtrnf32 (float32x2_t _a, float32x2_t _b) +{ + return vtrn_f32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + float32_t first[] = {1, 2}; + float32_t second[] = {3, 4}; + float32x2x2_t result = test_vtrnf32 (vld1_f32 (first), vld1_f32 (second)); + float32x2_t res1 = result.val[0], res2 = result.val[1]; + float32_t exp1[] = {1, 3}; + float32_t exp2[] = {2, 4}; + float32x2_t expected1 = vld1_f32 (exp1); + float32x2_t expected2 = vld1_f32 (exp2); + + for (i = 0; i < 2; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s8_1.c @@ -0,0 +1,10 @@ +/* Test the `vexts8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "ext_s8.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#?\[0-9\]+\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8.x @@ -0,0 +1,22 @@ +extern void abort (void); + +uint8x8_t +test_vrev16u8 (uint8x8_t _arg) +{ + return vrev16_u8 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + uint8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; + uint8x8_t reversed = test_vrev16u8 (inorder); + uint8x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7}; + + for (i = 0; i < 8; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs16_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzpq_s16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpqs16.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s64.x @@ -0,0 +1,30 @@ +extern void abort (void); + +int64x2_t +test_vextq_s64_1 (int64x2_t a, int64x2_t b) +{ + return vextq_s64 (a, b, 1); +} + +int +main (int argc, char **argv) +{ + int i, off; + int64_t arr1[] = {0, 1}; + int64x2_t in1 = vld1q_s64 (arr1); + int64_t arr2[] = {2, 3}; + int64x2_t in2 = vld1q_s64 (arr2); + int64_t exp[2]; + int64x2_t expected; + int64x2_t actual = test_vextq_s64_1 (in1, in2); + + for (i = 0; i < 2; i++) + exp[i] = i + 1; + expected = vld1q_s64 (exp); + for (i = 0; i < 2; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16.x @@ -0,0 +1,27 @@ +extern void abort (void); + +poly16x4x2_t +test_vzipp16 (poly16x4_t _a, poly16x4_t _b) +{ + return vzip_p16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + poly16_t first[] = {1, 2, 3, 4}; + poly16_t second[] = {5, 6, 7, 8}; + poly16x4x2_t result = test_vzipp16 (vld1_p16 (first), vld1_p16 (second)); + poly16x4_t res1 = result.val[0], res2 = result.val[1]; + poly16_t exp1[] = {1, 5, 2, 6}; + poly16_t exp2[] = {3, 7, 4, 8}; + poly16x4_t expected1 = vld1_p16 (exp1); + poly16x4_t expected2 = vld1_p16 (exp2); + + for (i = 0; i < 4; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64.x @@ -0,0 +1,30 @@ +extern void abort (void); + +uint64x2_t +test_vextq_u64_1 (uint64x2_t a, uint64x2_t b) +{ + return vextq_u64 (a, b, 1); +} + +int +main (int argc, char **argv) +{ + int i, off; + uint64_t arr1[] = {0, 1}; + uint64x2_t in1 = vld1q_u64 (arr1); + uint64_t arr2[] = {2, 3}; + uint64x2_t in2 = vld1q_u64 (arr2); + uint64_t exp[2]; + uint64x2_t expected; + uint64x2_t actual = test_vextq_u64_1 (in1, in2); + + for (i = 0; i < 2; i++) + exp[i] = i + 1; + expected = vld1q_u64 (exp); + for (i = 0; i < 2; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev32q_u8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev32qu8.x" + +/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev64_u16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev64u16.x" + +/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs8.x @@ -0,0 +1,29 @@ +extern void abort (void); + +int8x16x2_t +test_vzipqs8 (int8x16_t _a, int8x16_t _b) +{ + return vzipq_s8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + int8_t second[] = + {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; + int8x16x2_t result = test_vzipqs8 (vld1q_s8 (first), vld1q_s8 (second)); + int8x16_t res1 = result.val[0], res2 = result.val[1]; + int8_t exp1[] = {1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24}; + int8_t exp2[] = + {9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, 16, 32}; + int8x16_t expected1 = vld1q_s8 (exp1); + int8x16_t expected2 = vld1q_s8 (exp2); + + for (i = 0; i < 16; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8.x @@ -0,0 +1,28 @@ +extern void abort (void); + +uint8x16x2_t +test_vtrnqu8 (uint8x16_t _a, uint8x16_t _b) +{ + return vtrnq_u8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + uint8_t second[] = + {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; + uint8x16x2_t result = test_vtrnqu8 (vld1q_u8 (first), vld1q_u8 (second)); + uint8x16_t res1 = result.val[0], res2 = result.val[1]; + uint8_t exp1[] = {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}; + uint8_t exp2[] = {2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30, 16, 32}; + uint8x16_t expected1 = vld1q_u8 (exp1); + uint8x16_t expected2 = vld1q_u8 (exp2); + + for (i = 0; i < 16; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32.x @@ -0,0 +1,30 @@ +extern void abort (void); + +int32x2_t +test_vext_s32_1 (int32x2_t a, int32x2_t b) +{ + return vext_s32 (a, b, 1); +} + +int +main (int argc, char **argv) +{ + int i, off; + int32_t arr1[] = {0, 1}; + int32x2_t in1 = vld1_s32 (arr1); + int32_t arr2[] = {2, 3}; + int32x2_t in2 = vld1_s32 (arr2); + int32_t exp[2]; + int32x2_t expected; + int32x2_t actual = test_vext_s32_1 (in1, in2); + + for (i = 0; i < 2; i++) + exp[i] = i + 1; + expected = vld1_s32 (exp); + for (i = 0; i < 2; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps16_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzp_s16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzps16.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u32.x @@ -0,0 +1,30 @@ +extern void abort (void); + +uint32x2_t +test_vext_u32_1 (uint32x2_t a, uint32x2_t b) +{ + return vext_u32 (a, b, 1); +} + +int +main (int argc, char **argv) +{ + int i, off; + uint32_t arr1[] = {0, 1}; + uint32x2_t in1 = vld1_u32 (arr1); + uint32_t arr2[] = {2, 3}; + uint32x2_t in2 = vld1_u32 (arr2); + uint32_t exp[2]; + uint32x2_t expected; + uint32x2_t actual = test_vext_u32_1 (in1, in2); + + for (i = 0; i < 2; i++) + exp[i] = i + 1; + expected = vld1_u32 (exp); + for (i = 0; i < 2; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzpq_s8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpqs8.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8.x @@ -0,0 +1,227 @@ +extern void abort (void); + +int8x16_t +test_vextq_s8_1 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 1); +} + +int8x16_t +test_vextq_s8_2 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 2); +} + +int8x16_t +test_vextq_s8_3 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 3); +} + +int8x16_t +test_vextq_s8_4 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 4); +} + +int8x16_t +test_vextq_s8_5 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 5); +} + +int8x16_t +test_vextq_s8_6 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 6); +} + +int8x16_t +test_vextq_s8_7 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 7); +} + +int8x16_t +test_vextq_s8_8 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 8); +} + +int8x16_t +test_vextq_s8_9 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 9); +} + +int8x16_t +test_vextq_s8_10 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 10); +} + +int8x16_t +test_vextq_s8_11 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 11); +} + +int8x16_t +test_vextq_s8_12 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 12); +} + +int8x16_t +test_vextq_s8_13 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 13); +} + +int8x16_t +test_vextq_s8_14 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 14); +} + +int8x16_t +test_vextq_s8_15 (int8x16_t a, int8x16_t b) +{ + return vextq_s8 (a, b, 15); +} + +int +main (int argc, char **argv) +{ + int i; + int8_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + int8x16_t in1 = vld1q_s8 (arr1); + int8_t arr2[] = + {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}; + int8x16_t in2 = vld1q_s8 (arr2); + int8_t exp[16]; + int8x16_t expected; + int8x16_t actual = test_vextq_s8_1 (in1, in2); + + for (i = 0; i < 16; i++) + exp[i] = i + 1; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_2 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 2; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_3 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 3; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_4 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 4; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_5 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 5; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_6 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 6; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_7 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 7; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_8 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 8; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_9 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 9; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_10 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 10; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_11 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 11; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_12 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 12; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_13 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 13; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_14 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 14; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s8_15 (in1, in2); + for (i = 0; i < 16; i++) + exp[i] = i + 15; + expected = vld1q_s8 (exp); + for (i = 0; i < 16; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f64_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f64_1.c @@ -0,0 +1,36 @@ +/* Test the `vextq_f64' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +extern void abort (void); +#include + +float64x2_t +test_vextq_f64_1 (float64x2_t a, float64x2_t b) +{ + return vextq_f64 (a, b, 1); +} + +int +main (int argc, char **argv) +{ + int i, off; + float64_t arr1[] = {0, 1}; + float64x2_t in1 = vld1q_f64 (arr1); + float64_t arr2[] = {2, 3}; + float64x2_t in2 = vld1q_f64 (arr2); + float64_t exp[] = {1, 2}; + float64x2_t expected = vld1q_f64 (exp); + float64x2_t actual = test_vextq_f64_1 (in1, in2); + + for (i = 0; i < 2; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.8\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs16_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev32q_s16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev32qs16.x" + +/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs16_1.c @@ -0,0 +1,11 @@ +/* Test the `vzipq_s16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vzipqs16.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipf32_1.c @@ -0,0 +1,11 @@ +/* Test the `vzip_f32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vzipf32.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16u8_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev16_u8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev16u8.x" + +/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16p8.x @@ -0,0 +1,22 @@ +extern void abort (void); + +poly8x8_t +test_vrev16p8 (poly8x8_t _arg) +{ + return vrev16_p8 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + poly8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; + poly8x8_t reversed = test_vrev16p8 (inorder); + poly8x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7}; + + for (i = 0; i < 8; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_p8_1.c @@ -0,0 +1,10 @@ +/* Test the `vextp8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "ext_p8.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#?\[0-9\]+\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns8.x @@ -0,0 +1,27 @@ +extern void abort (void); + +int8x8x2_t +test_vtrns8 (int8x8_t _a, int8x8_t _b) +{ + return vtrn_s8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + int8_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + int8x8x2_t result = test_vtrns8 (vld1_s8 (first), vld1_s8 (second)); + int8x8_t res1 = result.val[0], res2 = result.val[1]; + int8_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15}; + int8_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16}; + int8x8_t expected1 = vld1_s8 (exp1); + int8x8_t expected2 = vld1_s8 (exp2); + + for (i = 0; i < 8; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs16.x @@ -0,0 +1,27 @@ +extern void abort (void); + +int16x8x2_t +test_vtrnqs16 (int16x8_t _a, int16x8_t _b) +{ + return vtrnq_s16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + int16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + int16x8x2_t result = test_vtrnqs16 (vld1q_s16 (first), vld1q_s16 (second)); + int16x8_t res1 = result.val[0], res2 = result.val[1]; + int16_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15}; + int16_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16}; + int16x8_t expected1 = vld1q_s16 (exp1); + int16x8_t expected2 = vld1q_s16 (exp2); + + for (i = 0; i < 8; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16.x @@ -0,0 +1,27 @@ +extern void abort (void); + +uint16x8x2_t +test_vtrnqu16 (uint16x8_t _a, uint16x8_t _b) +{ + return vtrnq_u16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + uint16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + uint16x8x2_t result = test_vtrnqu16 (vld1q_u16 (first), vld1q_u16 (second)); + uint16x8_t res1 = result.val[0], res2 = result.val[1]; + uint16_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15}; + uint16_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16}; + uint16x8_t expected1 = vld1q_u16 (exp1); + uint16x8_t expected2 = vld1q_u16 (exp2); + + for (i = 0; i < 8; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16.x @@ -0,0 +1,114 @@ +extern void abort (void); + +poly16x8_t +test_vextq_p16_1 (poly16x8_t a, poly16x8_t b) +{ + return vextq_p16 (a, b, 1); +} + +poly16x8_t +test_vextq_p16_2 (poly16x8_t a, poly16x8_t b) +{ + return vextq_p16 (a, b, 2); +} + +poly16x8_t +test_vextq_p16_3 (poly16x8_t a, poly16x8_t b) +{ + return vextq_p16 (a, b, 3); +} + +poly16x8_t +test_vextq_p16_4 (poly16x8_t a, poly16x8_t b) +{ + return vextq_p16 (a, b, 4); +} + +poly16x8_t +test_vextq_p16_5 (poly16x8_t a, poly16x8_t b) +{ + return vextq_p16 (a, b, 5); +} + +poly16x8_t +test_vextq_p16_6 (poly16x8_t a, poly16x8_t b) +{ + return vextq_p16 (a, b, 6); +} + +poly16x8_t +test_vextq_p16_7 (poly16x8_t a, poly16x8_t b) +{ + return vextq_p16 (a, b, 7); +} + +int +main (int argc, char **argv) +{ + int i, off; + poly16_t arr1[] = {0, 1, 2, 3, 4, 5, 6, 7}; + poly16x8_t in1 = vld1q_p16 (arr1); + poly16_t arr2[] = {8, 9, 10, 11, 12, 13, 14, 15}; + poly16x8_t in2 = vld1q_p16 (arr2); + poly16_t exp[8]; + poly16x8_t expected; + poly16x8_t actual = test_vextq_p16_1 (in1, in2); + + for (i = 0; i < 8; i++) + exp[i] = i + 1; + expected = vld1q_p16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p16_2 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 2; + expected = vld1q_p16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p16_3 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 3; + expected = vld1q_p16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p16_4 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 4; + expected = vld1q_p16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p16_5 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 5; + expected = vld1q_p16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p16_6 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 6; + expected = vld1q_p16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_p16_7 (in1, in2); + for (i = 0; i < 8; i++) + exp[i] = i + 7; + expected = vld1q_p16 (exp); + for (i = 0; i < 8; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16.x @@ -0,0 +1,22 @@ +extern void abort (void); + +int16x8_t +test_vrev64qs16 (int16x8_t _arg) +{ + return vrev64q_s16 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + int16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; + int16x8_t reversed = test_vrev64qs16 (inorder); + int16x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5}; + + for (i = 0; i < 8; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu16.x @@ -0,0 +1,22 @@ +extern void abort (void); + +uint16x8_t +test_vrev64qu16 (uint16x8_t _arg) +{ + return vrev64q_u16 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + uint16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; + uint16x8_t reversed = test_vrev64qu16 (inorder); + uint16x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5}; + + for (i = 0; i < 8; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u8.x @@ -0,0 +1,22 @@ +extern void abort (void); + +uint8x8_t +test_vrev64u8 (uint8x8_t _arg) +{ + return vrev64_u8 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + uint8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; + uint8x8_t reversed = test_vrev64u8 (inorder); + uint8x8_t expected = {8, 7, 6, 5, 4, 3, 2, 1}; + + for (i = 0; i < 8; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp16.x @@ -0,0 +1,26 @@ +extern void abort (void); + +poly16x8x2_t +test_vuzpqp16 (poly16x8_t _a, poly16x8_t _b) +{ + return vuzpq_p16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + poly16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + poly16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + poly16x8x2_t result = test_vuzpqp16 (vld1q_p16 (first), vld1q_p16 (second)); + poly16_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15}; + poly16_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16}; + poly16x8_t expect1 = vld1q_p16 (exp1); + poly16x8_t expect2 = vld1q_p16 (exp2); + + for (i = 0; i < 8; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns16_1.c @@ -0,0 +1,11 @@ +/* Test the `vtrn_s16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vtrns16.x" + +/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16_1.c @@ -0,0 +1,11 @@ +/* Test the `vzip_u16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vzipu16.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32.x @@ -0,0 +1,26 @@ +extern void abort (void); + +float32x2x2_t +test_vuzpf32 (float32x2_t _a, float32x2_t _b) +{ + return vuzp_f32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + float32_t first[] = {1, 2}; + float32_t second[] = {3, 4}; + float32x2x2_t result = test_vuzpf32 (vld1_f32 (first), vld1_f32 (second)); + float32_t exp1[] = {1, 3}; + float32_t exp2[] = {2, 4}; + float32x2_t expect1 = vld1_f32 (exp1); + float32x2_t expect2 = vld1_f32 (exp2); + + for (i = 0; i < 2; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8_1.c @@ -0,0 +1,11 @@ +/* Test the `vtrnq_s8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vtrnqs8.x" + +/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32_1.c @@ -0,0 +1,11 @@ +/* Test the `vtrnq_f32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vtrnqf32.x" + +/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8.x @@ -0,0 +1,28 @@ +extern void abort (void); + +poly8x16x2_t +test_vtrnqp8 (poly8x16_t _a, poly8x16_t _b) +{ + return vtrnq_p8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + poly8_t second[] = + {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; + poly8x16x2_t result = test_vtrnqp8 (vld1q_p8 (first), vld1q_p8 (second)); + poly8x16_t res1 = result.val[0], res2 = result.val[1]; + poly8_t exp1[] = {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}; + poly8_t exp2[] = {2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30, 16, 32}; + poly8x16_t expected1 = vld1q_p8 (exp1); + poly8x16_t expected2 = vld1q_p8 (exp2); + + for (i = 0; i < 16; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s32.x @@ -0,0 +1,22 @@ +extern void abort (void); + +int32x2_t +test_vrev64s32 (int32x2_t _arg) +{ + return vrev64_s32 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + int32x2_t inorder = {1, 2}; + int32x2_t reversed = test_vrev64s32 (inorder); + int32x2_t expected = {2, 1}; + + for (i = 0; i < 2; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16_1.c @@ -0,0 +1,10 @@ +/* Test the `vexts16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "ext_s16.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu8.x @@ -0,0 +1,22 @@ +extern void abort (void); + +uint8x16_t +test_vrev32qu8 (uint8x16_t _arg) +{ + return vrev32q_u8 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + uint8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + uint8x16_t reversed = test_vrev32qu8 (inorder); + uint8x16_t expected = {4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13}; + + for (i = 0; i < 16; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32.x @@ -0,0 +1,22 @@ +extern void abort (void); + +uint32x2_t +test_vrev64u32 (uint32x2_t _arg) +{ + return vrev64_u32 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + uint32x2_t inorder = {1, 2}; + uint32x2_t reversed = test_vrev64u32 (inorder); + uint32x2_t expected = {2, 1}; + + for (i = 0; i < 2; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_f32_1.c @@ -0,0 +1,10 @@ +/* Test the `vextQf32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "extq_f32.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8.x @@ -0,0 +1,22 @@ +extern void abort (void); + +uint8x16_t +test_vrev16qu8 (uint8x16_t _arg) +{ + return vrev16q_u8 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + uint8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + uint8x16_t reversed = test_vrev16qu8 (inorder); + uint8x16_t expected = {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15}; + + for (i = 0; i < 16; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzpq_p8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpqp8.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev64q_p16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev64qp16.x" + +/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqp16.x @@ -0,0 +1,27 @@ +extern void abort (void); + +poly16x8x2_t +test_vzipqp16 (poly16x8_t _a, poly16x8_t _b) +{ + return vzipq_p16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + poly16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + poly16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + poly16x8x2_t result = test_vzipqp16 (vld1q_p16 (first), vld1q_p16 (second)); + poly16x8_t res1 = result.val[0], res2 = result.val[1]; + poly16_t exp1[] = {1, 9, 2, 10, 3, 11, 4, 12}; + poly16_t exp2[] = {5, 13, 6, 14, 7, 15, 8, 16}; + poly16x8_t expected1 = vld1q_p16 (exp1); + poly16x8_t expected2 = vld1q_p16 (exp2); + + for (i = 0; i < 8; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu16_1.c @@ -0,0 +1,11 @@ +/* Test the `vtrnq_u16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vtrnqu16.x" + +/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qu32_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev64q_u32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev64qu32.x" + +/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4s, ?v\[0-9\]+.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8.x @@ -0,0 +1,27 @@ +extern void abort (void); + +uint8x16x2_t +test_vuzpqu8 (uint8x16_t _a, uint8x16_t _b) +{ + return vuzpq_u8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + uint8_t second[] = + {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; + uint8x16x2_t result = test_vuzpqu8 (vld1q_u8 (first), vld1q_u8 (second)); + uint8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}; + uint8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32}; + uint8x16_t expect1 = vld1q_u8 (exp1); + uint8x16_t expect2 = vld1q_u8 (exp2); + + for (i = 0; i < 16; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p8.x @@ -0,0 +1,22 @@ +extern void abort (void); + +poly8x8_t +test_vrev64p8 (poly8x8_t _arg) +{ + return vrev64_p8 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + poly8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; + poly8x8_t reversed = test_vrev64p8 (inorder); + poly8x8_t expected = {8, 7, 6, 5, 4, 3, 2, 1}; + + for (i = 0; i < 8; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u8.x @@ -0,0 +1,22 @@ +extern void abort (void); + +uint8x8_t +test_vrev32u8 (uint8x8_t _arg) +{ + return vrev32_u8 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + uint8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; + uint8x8_t reversed = test_vrev32u8 (inorder); + uint8x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5}; + + for (i = 0; i < 8; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16s8.x @@ -0,0 +1,22 @@ +extern void abort (void); + +int8x8_t +test_vrev16s8 (int8x8_t _arg) +{ + return vrev16_s8 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + int8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; + int8x8_t reversed = test_vrev16s8 (inorder); + int8x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7}; + + for (i = 0; i < 8; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u8_1.c @@ -0,0 +1,10 @@ +/* Test the `vextu8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "ext_u8.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#?\[0-9\]+\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u16_1.c @@ -0,0 +1,10 @@ +/* Test the `vextQu16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "extq_u16.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzpq_s32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpqs32.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps8_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzp_s8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzps8.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp8_1.c @@ -0,0 +1,11 @@ +/* Test the `vtrnq_p8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vtrnqp8.x" + +/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64p16_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev64_p16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev64p16.x" + +/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32u16_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev32_u16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev32u16.x" + +/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnp16.x @@ -0,0 +1,27 @@ +extern void abort (void); + +poly16x4x2_t +test_vtrnp16 (poly16x4_t _a, poly16x4_t _b) +{ + return vtrn_p16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + poly16_t first[] = {1, 2, 3, 4}; + poly16_t second[] = {5, 6, 7, 8}; + poly16x4x2_t result = test_vtrnp16 (vld1_p16 (first), vld1_p16 (second)); + poly16x4_t res1 = result.val[0], res2 = result.val[1]; + poly16_t exp1[] = {1, 5, 3, 7}; + poly16_t exp2[] = {2, 6, 4, 8}; + poly16x4_t expected1 = vld1_p16 (exp1); + poly16x4_t expected2 = vld1_p16 (exp2); + + for (i = 0; i < 4; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp8.x @@ -0,0 +1,22 @@ +extern void abort (void); + +poly8x16_t +test_vrev32qp8 (poly8x16_t _arg) +{ + return vrev32q_p8 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + poly8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + poly8x16_t reversed = test_vrev32qp8 (inorder); + poly8x16_t expected = {4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13}; + + for (i = 0; i < 16; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev16q_s8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev16qs8.x" + +/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips32.x @@ -0,0 +1,27 @@ +extern void abort (void); + +int32x2x2_t +test_vzips32 (int32x2_t _a, int32x2_t _b) +{ + return vzip_s32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int32_t first[] = {1, 2}; + int32_t second[] = {3, 4}; + int32x2x2_t result = test_vzips32 (vld1_s32 (first), vld1_s32 (second)); + int32x2_t res1 = result.val[0], res2 = result.val[1]; + int32_t exp1[] = {1, 3}; + int32_t exp2[] = {2, 4}; + int32x2_t expected1 = vld1_s32 (exp1); + int32x2_t expected2 = vld1_s32 (exp2); + + for (i = 0; i < 2; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u32_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev64_u32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev64u32.x" + +/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.2s, ?v\[0-9\]+.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8.x @@ -0,0 +1,22 @@ +extern void abort (void); + +poly8x16_t +test_vrev16qp8 (poly8x16_t _arg) +{ + return vrev16q_p8 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + poly8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + poly8x16_t reversed = test_vrev16qp8 (inorder); + poly8x16_t expected = {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15}; + + for (i = 0; i < 16; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32.x @@ -0,0 +1,27 @@ +extern void abort (void); + +uint32x2x2_t +test_vzipu32 (uint32x2_t _a, uint32x2_t _b) +{ + return vzip_u32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint32_t first[] = {1, 2}; + uint32_t second[] = {3, 4}; + uint32x2x2_t result = test_vzipu32 (vld1_u32 (first), vld1_u32 (second)); + uint32x2_t res1 = result.val[0], res2 = result.val[1]; + uint32_t exp1[] = {1, 3}; + uint32_t exp2[] = {2, 4}; + uint32x2_t expected1 = vld1_u32 (exp1); + uint32x2_t expected2 = vld1_u32 (exp2); + + for (i = 0; i < 2; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqf32.x @@ -0,0 +1,27 @@ +extern void abort (void); + +float32x4x2_t +test_vtrnqf32 (float32x4_t _a, float32x4_t _b) +{ + return vtrnq_f32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + float32_t first[] = {1, 2, 3, 4}; + float32_t second[] = {5, 6, 7, 8}; + float32x4x2_t result = test_vtrnqf32 (vld1q_f32 (first), vld1q_f32 (second)); + float32x4_t res1 = result.val[0], res2 = result.val[1]; + float32_t exp1[] = {1, 5, 3, 7}; + float32_t exp2[] = {2, 6, 4, 8}; + float32x4_t expected1 = vld1q_f32 (exp1); + float32x4_t expected2 = vld1q_f32 (exp2); + + for (i = 0; i < 4; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqs8.x @@ -0,0 +1,28 @@ +extern void abort (void); + +int8x16x2_t +test_vtrnqs8 (int8x16_t _a, int8x16_t _b) +{ + return vtrnq_s8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + int8_t second[] = + {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; + int8x16x2_t result = test_vtrnqs8 (vld1q_s8 (first), vld1q_s8 (second)); + int8x16_t res1 = result.val[0], res2 = result.val[1]; + int8_t exp1[] = {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}; + int8_t exp2[] = {2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30, 16, 32}; + int8x16_t expected1 = vld1q_s8 (exp1); + int8x16_t expected2 = vld1q_s8 (exp2); + + for (i = 0; i < 16; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s64_1.c @@ -0,0 +1,11 @@ +/* Test the `vexts64' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "ext_s64.x" + +/* Do not scan-assembler. An EXT instruction could be emitted, but would merely + return its first argument, so it is legitimate to optimize it out. */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzps32_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzp_s32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzps32.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qf32.x @@ -0,0 +1,22 @@ +extern void abort (void); + +float32x4_t +test_vrev64qf32 (float32x4_t _arg) +{ + return vrev64q_f32 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + float32x4_t inorder = {1, 2, 3, 4}; + float32x4_t reversed = test_vrev64qf32 (inorder); + float32x4_t expected = {2, 1, 4, 3}; + + for (i = 0; i < 4; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s16.x @@ -0,0 +1,58 @@ +extern void abort (void); + +int16x4_t +test_vext_s16_1 (int16x4_t a, int16x4_t b) +{ + return vext_s16 (a, b, 1); +} + +int16x4_t +test_vext_s16_2 (int16x4_t a, int16x4_t b) +{ + return vext_s16 (a, b, 2); +} + +int16x4_t +test_vext_s16_3 (int16x4_t a, int16x4_t b) +{ + return vext_s16 (a, b, 3); +} + +int +main (int argc, char **argv) +{ + int i, off; + int16_t arr1[] = {0, 1, 2, 3}; + int16x4_t in1 = vld1_s16 (arr1); + int16_t arr2[] = {4, 5, 6, 7}; + int16x4_t in2 = vld1_s16 (arr2); + int16_t exp[4]; + int16x4_t expected; + int16x4_t actual = test_vext_s16_1 (in1, in2); + + for (i = 0; i < 4; i++) + exp[i] = i + 1; + expected = vld1_s16 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_s16_2 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 2; + expected = vld1_s16 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_s16_3 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 3; + expected = vld1_s16 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_u16.x @@ -0,0 +1,58 @@ +extern void abort (void); + +uint16x4_t +test_vext_u16_1 (uint16x4_t a, uint16x4_t b) +{ + return vext_u16 (a, b, 1); +} + +uint16x4_t +test_vext_u16_2 (uint16x4_t a, uint16x4_t b) +{ + return vext_u16 (a, b, 2); +} + +uint16x4_t +test_vext_u16_3 (uint16x4_t a, uint16x4_t b) +{ + return vext_u16 (a, b, 3); +} + +int +main (int argc, char **argv) +{ + int i, off; + uint16_t arr1[] = {0, 1, 2, 3}; + uint16x4_t in1 = vld1_u16 (arr1); + uint16_t arr2[] = {4, 5, 6, 7}; + uint16x4_t in2 = vld1_u16 (arr2); + uint16_t exp[4]; + uint16x4_t expected; + uint16x4_t actual = test_vext_u16_1 (in1, in2); + + for (i = 0; i < 4; i++) + exp[i] = i + 1; + expected = vld1_u16 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_u16_2 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 2; + expected = vld1_u16 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vext_u16_3 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 3; + expected = vld1_u16 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32_1.c @@ -0,0 +1,11 @@ +/* Test the `vzipq_s32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vzipqs32.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqp8.x @@ -0,0 +1,27 @@ +extern void abort (void); + +poly8x16x2_t +test_vuzpqp8 (poly8x16_t _a, poly8x16_t _b) +{ + return vuzpq_p8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + poly8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + poly8_t second[] = + {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; + poly8x16x2_t result = test_vuzpqp8 (vld1q_p8 (first), vld1q_p8 (second)); + poly8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}; + poly8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32}; + poly8x16_t expect1 = vld1q_p8 (exp1); + poly8x16_t expect2 = vld1q_p8 (exp2); + + for (i = 0; i < 16; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu8_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzpq_u8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpqu8.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips8_1.c @@ -0,0 +1,11 @@ +/* Test the `vzip_s8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vzips8.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p8.x @@ -0,0 +1,22 @@ +extern void abort (void); + +poly8x8_t +test_vrev32p8 (poly8x8_t _arg) +{ + return vrev32_p8 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + poly8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; + poly8x8_t reversed = test_vrev32p8 (inorder); + poly8x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5}; + + for (i = 0; i < 8; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s8.x @@ -0,0 +1,22 @@ +extern void abort (void); + +int8x8_t +test_vrev64s8 (int8x8_t _arg) +{ + return vrev64_s8 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + int8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; + int8x8_t reversed = test_vrev64s8 (inorder); + int8x8_t expected = {8, 7, 6, 5, 4, 3, 2, 1}; + + for (i = 0; i < 8; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp8_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzp_p8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpp8.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s32.x @@ -0,0 +1,58 @@ +extern void abort (void); + +int32x4_t +test_vextq_s32_1 (int32x4_t a, int32x4_t b) +{ + return vextq_s32 (a, b, 1); +} + +int32x4_t +test_vextq_s32_2 (int32x4_t a, int32x4_t b) +{ + return vextq_s32 (a, b, 2); +} + +int32x4_t +test_vextq_s32_3 (int32x4_t a, int32x4_t b) +{ + return vextq_s32 (a, b, 3); +} + +int +main (int argc, char **argv) +{ + int i, off; + int32_t arr1[] = {0, 1, 2, 3}; + int32x4_t in1 = vld1q_s32 (arr1); + int32_t arr2[] = {4, 5, 6, 7}; + int32x4_t in2 = vld1q_s32 (arr2); + int32_t exp[4]; + int32x4_t expected; + int32x4_t actual = test_vextq_s32_1 (in1, in2); + + for (i = 0; i < 4; i++) + exp[i] = i + 1; + expected = vld1q_s32 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s32_2 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 2; + expected = vld1q_s32 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_s32_3 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 3; + expected = vld1q_s32 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32.x @@ -0,0 +1,58 @@ +extern void abort (void); + +uint32x4_t +test_vextq_u32_1 (uint32x4_t a, uint32x4_t b) +{ + return vextq_u32 (a, b, 1); +} + +uint32x4_t +test_vextq_u32_2 (uint32x4_t a, uint32x4_t b) +{ + return vextq_u32 (a, b, 2); +} + +uint32x4_t +test_vextq_u32_3 (uint32x4_t a, uint32x4_t b) +{ + return vextq_u32 (a, b, 3); +} + +int +main (int argc, char **argv) +{ + int i, off; + uint32_t arr1[] = {0, 1, 2, 3}; + uint32x4_t in1 = vld1q_u32 (arr1); + uint32_t arr2[] = {4, 5, 6, 7}; + uint32x4_t in2 = vld1q_u32 (arr2); + uint32_t exp[4]; + uint32x4_t expected; + uint32x4_t actual = test_vextq_u32_1 (in1, in2); + + for (i = 0; i < 4; i++) + exp[i] = i + 1; + expected = vld1q_u32 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u32_2 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 2; + expected = vld1q_u32 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + actual = test_vextq_u32_3 (in1, in2); + for (i = 0; i < 4; i++) + exp[i] = i + 3; + expected = vld1q_u32 (exp); + for (i = 0; i < 4; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u64_1.c @@ -0,0 +1,10 @@ +/* Test the `vextQu64' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "extq_u64.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.8\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp16_1.c @@ -0,0 +1,11 @@ +/* Test the `vzip_p16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vzipp16.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32_1.c @@ -0,0 +1,11 @@ +/* Test the `vtrn_s32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vtrns32.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qp8_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev16q_p8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev16qp8.x" + +/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs32.x @@ -0,0 +1,26 @@ +extern void abort (void); + +int32x4x2_t +test_vuzpqs32 (int32x4_t _a, int32x4_t _b) +{ + return vuzpq_s32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int32_t first[] = {1, 2, 3, 4}; + int32_t second[] = {5, 6, 7, 8}; + int32x4x2_t result = test_vuzpqs32 (vld1q_s32 (first), vld1q_s32 (second)); + int32_t exp1[] = {1, 3, 5, 7}; + int32_t exp2[] = {2, 4, 6, 8}; + int32x4_t expect1 = vld1q_s32 (exp1); + int32x4_t expect2 = vld1q_s32 (exp2); + + for (i = 0; i < 4; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu32_1.c @@ -0,0 +1,11 @@ +/* Test the `vzip_u32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vzipu32.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16.x @@ -0,0 +1,22 @@ +extern void abort (void); + +poly16x4_t +test_vrev32p16 (poly16x4_t _arg) +{ + return vrev32_p16 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + poly16x4_t inorder = {1, 2, 3, 4}; + poly16x4_t reversed = test_vrev32p16 (inorder); + poly16x4_t expected = {2, 1, 4, 3}; + + for (i = 0; i < 4; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu32.x @@ -0,0 +1,26 @@ +extern void abort (void); + +uint32x4x2_t +test_vuzpqu32 (uint32x4_t _a, uint32x4_t _b) +{ + return vuzpq_u32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint32_t first[] = {1, 2, 3, 4}; + uint32_t second[] = {5, 6, 7, 8}; + uint32x4x2_t result = test_vuzpqu32 (vld1q_u32 (first), vld1q_u32 (second)); + uint32_t exp1[] = {1, 3, 5, 7}; + uint32_t exp2[] = {2, 4, 6, 8}; + uint32x4_t expect1 = vld1q_u32 (exp1); + uint32x4_t expect2 = vld1q_u32 (exp2); + + for (i = 0; i < 4; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_s32_1.c @@ -0,0 +1,10 @@ +/* Test the `vexts32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "ext_s32.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu8_1.c @@ -0,0 +1,11 @@ +/* Test the `vtrnq_u8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vtrnqu8.x" + +/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.16b, ?v\[0-9\]+\.16b, ?v\[0-9\]+\.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qs8.x @@ -0,0 +1,22 @@ +extern void abort (void); + +int8x16_t +test_vrev32qs8 (int8x16_t _arg) +{ + return vrev32q_s8 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + int8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + int8x16_t reversed = test_vrev32qs8 (inorder); + int8x16_t expected = {4, 3, 2, 1, 8, 7, 6, 5, 12, 11, 10, 9, 16, 15, 14, 13}; + + for (i = 0; i < 16; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qs8.x @@ -0,0 +1,22 @@ +extern void abort (void); + +int8x16_t +test_vrev16qs8 (int8x16_t _arg) +{ + return vrev16q_s8 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + int8x16_t inorder = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + int8x16_t reversed = test_vrev16qs8 (inorder); + int8x16_t expected = {2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15}; + + for (i = 0; i < 16; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16.x @@ -0,0 +1,22 @@ +extern void abort (void); + +int16x4_t +test_vrev64s16 (int16x4_t _arg) +{ + return vrev64_s16 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + int16x4_t inorder = {1, 2, 3, 4}; + int16x4_t reversed = test_vrev64s16 (inorder); + int16x4_t expected = {4, 3, 2, 1}; + + for (i = 0; i < 4; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_s8_1.c @@ -0,0 +1,10 @@ +/* Test the `vextQs8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "extq_s8.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#?\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64u16.x @@ -0,0 +1,22 @@ +extern void abort (void); + +uint16x4_t +test_vrev64u16 (uint16x4_t _arg) +{ + return vrev64_u16 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + uint16x4_t inorder = {1, 2, 3, 4}; + uint16x4_t reversed = test_vrev64u16 (inorder); + uint16x4_t expected = {4, 3, 2, 1}; + + for (i = 0; i < 4; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpp16.x @@ -0,0 +1,26 @@ +extern void abort (void); + +poly16x4x2_t +test_vuzpp16 (poly16x4_t _a, poly16x4_t _b) +{ + return vuzp_p16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + poly16_t first[] = {1, 2, 3, 4}; + poly16_t second[] = {5, 6, 7, 8}; + poly16x4x2_t result = test_vuzpp16 (vld1_p16 (first), vld1_p16 (second)); + poly16_t exp1[] = {1, 3, 5, 7}; + poly16_t exp2[] = {2, 4, 6, 8}; + poly16x4_t expect1 = vld1_p16 (exp1); + poly16x4_t expect2 = vld1_p16 (exp2); + + for (i = 0; i < 4; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqf32_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzpq_f32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpqf32.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipp8_1.c @@ -0,0 +1,11 @@ +/* Test the `vzip_p8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vzipp8.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16_1.c @@ -0,0 +1,11 @@ +/* Test the `vtrnq_p16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vtrnqp16.x" + +/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qp16.x @@ -0,0 +1,22 @@ +extern void abort (void); + +poly16x8_t +test_vrev32qp16 (poly16x8_t _arg) +{ + return vrev32q_p16 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + poly16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; + poly16x8_t reversed = test_vrev32qp16 (inorder); + poly16x8_t expected = {2, 1, 4, 3, 6, 5, 8, 7}; + + for (i = 0; i < 8; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqu32_1.c @@ -0,0 +1,11 @@ +/* Test the `vtrnq_u32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vtrnqu32.x" + +/* { dg-final { scan-assembler-times "trn1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "trn2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqs8.x @@ -0,0 +1,27 @@ +extern void abort (void); + +int8x16x2_t +test_vuzpqs8 (int8x16_t _a, int8x16_t _b) +{ + return vuzpq_s8 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int8_t first[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; + int8_t second[] = + {17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}; + int8x16x2_t result = test_vuzpqs8 (vld1q_s8 (first), vld1q_s8 (second)); + int8_t exp1[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}; + int8_t exp2[] = {2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32}; + int8x16_t expect1 = vld1q_s8 (exp1); + int8x16_t expect2 = vld1q_s8 (exp2); + + for (i = 0; i < 16; i++) + if ((result.val[0][i] != expect1[i]) || (result.val[1][i] != expect2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqs32.x @@ -0,0 +1,27 @@ +extern void abort (void); + +int32x4x2_t +test_vzipqs32 (int32x4_t _a, int32x4_t _b) +{ + return vzipq_s32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int32_t first[] = {1, 2, 3, 4}; + int32_t second[] = {5, 6, 7, 8}; + int32x4x2_t result = test_vzipqs32 (vld1q_s32 (first), vld1q_s32 (second)); + int32x4_t res1 = result.val[0], res2 = result.val[1]; + int32_t exp1[] = {1, 5, 2, 6}; + int32_t exp2[] = {3, 7, 4, 8}; + int32x4_t expected1 = vld1q_s32 (exp1); + int32x4_t expected2 = vld1q_s32 (exp2); + + for (i = 0; i < 4; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs16_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev64q_s16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev64qs16.x" + +/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8.x @@ -0,0 +1,22 @@ +extern void abort (void); + +int8x8_t +test_vrev32s8 (int8x8_t _arg) +{ + return vrev32_s8 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + int8x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; + int8x8_t reversed = test_vrev32s8 (inorder); + int8x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5}; + + for (i = 0; i < 8; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p16_1.c @@ -0,0 +1,10 @@ +/* Test the `vextQp16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "extq_p16.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 7 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu32.x @@ -0,0 +1,27 @@ +extern void abort (void); + +uint32x4x2_t +test_vzipqu32 (uint32x4_t _a, uint32x4_t _b) +{ + return vzipq_u32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint32_t first[] = {1, 2, 3, 4}; + uint32_t second[] = {5, 6, 7, 8}; + uint32x4x2_t result = test_vzipqu32 (vld1q_u32 (first), vld1q_u32 (second)); + uint32x4_t res1 = result.val[0], res2 = result.val[1]; + uint32_t exp1[] = {1, 5, 2, 6}; + uint32_t exp2[] = {3, 7, 4, 8}; + uint32x4_t expected1 = vld1q_u32 (exp1); + uint32x4_t expected2 = vld1q_u32 (exp2); + + for (i = 0; i < 4; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_u32_1.c @@ -0,0 +1,10 @@ +/* Test the `vextQu32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "extq_u32.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 3 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32p16_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev32_p16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev32p16.x" + +/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32.x @@ -0,0 +1,30 @@ +extern void abort (void); + +float32x2_t +test_vext_f32_1 (float32x2_t a, float32x2_t b) +{ + return vext_f32 (a, b, 1); +} + +int +main (int argc, char **argv) +{ + int i, off; + float32_t arr1[] = {0, 1}; + float32x2_t in1 = vld1_f32 (arr1); + float32_t arr2[] = {2, 3}; + float32x2_t in2 = vld1_f32 (arr2); + float32_t exp[2]; + float32x2_t expected; + float32x2_t actual = test_vext_f32_1 (in1, in2); + + for (i = 0; i < 2; i++) + exp[i] = i + 1; + expected = vld1_f32 (exp); + for (i = 0; i < 2; i++) + if (actual[i] != expected[i]) + abort (); + + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f64_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f64_1.c @@ -0,0 +1,25 @@ +/* Test the `vextf64' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" + +extern void abort (void); + +int +main (int argc, char **argv) +{ + int i, off; + float64x1_t in1 = {0}; + float64x1_t in2 = {1}; + float64x1_t actual = vext_f64 (in1, in2, 0); + if (actual != in1) + abort (); + + return 0; +} + +/* Do not scan-assembler. An EXT instruction could be emitted, but would merely + return its first argument, so it is legitimate to optimize it out. */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpf32_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzp_f32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpf32.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpqu16_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzpq_u16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpqu16.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu8_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzp_u8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpu8.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqf32_1.c @@ -0,0 +1,11 @@ +/* Test the `vzipq_f32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vzipqf32.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.4s, ?v\[0-9\]+\.4s, ?v\[0-9\]+\.4s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64s16_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev64_s16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev64s16.x" + +/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.4h, ?v\[0-9\]+.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrns32.x @@ -0,0 +1,27 @@ +extern void abort (void); + +int32x2x2_t +test_vtrns32 (int32x2_t _a, int32x2_t _b) +{ + return vtrn_s32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int32_t first[] = {1, 2}; + int32_t second[] = {3, 4}; + int32x2x2_t result = test_vtrns32 (vld1_s32 (first), vld1_s32 (second)); + int32x2_t res1 = result.val[0], res2 = result.val[1]; + int32_t exp1[] = {1, 3}; + int32_t exp2[] = {2, 4}; + int32x2_t expected1 = vld1_s32 (exp1); + int32x2_t expected2 = vld1_s32 (exp2); + + for (i = 0; i < 2; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev16qu8_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev16q_u8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev16qu8.x" + +/* { dg-final { scan-assembler-times "rev16\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzips16.x @@ -0,0 +1,27 @@ +extern void abort (void); + +int16x4x2_t +test_vzips16 (int16x4_t _a, int16x4_t _b) +{ + return vzip_s16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + int16_t first[] = {1, 2, 3, 4}; + int16_t second[] = {5, 6, 7, 8}; + int16x4x2_t result = test_vzips16 (vld1_s16 (first), vld1_s16 (second)); + int16x4_t res1 = result.val[0], res2 = result.val[1]; + int16_t exp1[] = {1, 5, 2, 6}; + int16_t exp2[] = {3, 7, 4, 8}; + int16x4_t expected1 = vld1_s16 (exp1); + int16x4_t expected2 = vld1_s16 (exp2); + + for (i = 0; i < 4; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qs8_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev64q_s8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev64qs8.x" + +/* { dg-final { scan-assembler-times "rev64\[ \t\]+v\[0-9\]+.16b, ?v\[0-9\]+.16b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/extq_p8_1.c @@ -0,0 +1,10 @@ +/* Test the `vextQp8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "extq_p8.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\], ?#?\[0-9\]+\(?:.2\)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 15 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnu32.x @@ -0,0 +1,27 @@ +extern void abort (void); + +uint32x2x2_t +test_vtrnu32 (uint32x2_t _a, uint32x2_t _b) +{ + return vtrn_u32 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint32_t first[] = {1, 2}; + uint32_t second[] = {3, 4}; + uint32x2x2_t result = test_vtrnu32 (vld1_u32 (first), vld1_u32 (second)); + uint32x2_t res1 = result.val[0], res2 = result.val[1]; + uint32_t exp1[] = {1, 3}; + uint32_t exp2[] = {2, 4}; + uint32x2_t expected1 = vld1_u32 (exp1); + uint32x2_t expected2 = vld1_u32 (exp2); + + for (i = 0; i < 2; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu16.x @@ -0,0 +1,27 @@ +extern void abort (void); + +uint16x4x2_t +test_vzipu16 (uint16x4_t _a, uint16x4_t _b) +{ + return vzip_u16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + uint16_t first[] = {1, 2, 3, 4}; + uint16_t second[] = {5, 6, 7, 8}; + uint16x4x2_t result = test_vzipu16 (vld1_u16 (first), vld1_u16 (second)); + uint16x4_t res1 = result.val[0], res2 = result.val[1]; + uint16_t exp1[] = {1, 5, 2, 6}; + uint16_t exp2[] = {3, 7, 4, 8}; + uint16x4_t expected1 = vld1_u16 (exp1); + uint16x4_t expected2 = vld1_u16 (exp2); + + for (i = 0; i < 4; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vuzpu16_1.c @@ -0,0 +1,11 @@ +/* Test the `vuzp_u16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vuzpu16.x" + +/* { dg-final { scan-assembler-times "uzp1\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "uzp2\[ \t\]+v\[0-9\]+\.4h, ?v\[0-9\]+\.4h, ?v\[0-9\]+\.4h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32s8_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev32_s8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev32s8.x" + +/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8b, ?v\[0-9\]+.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnf32_1.c @@ -0,0 +1,11 @@ +/* Test the `vtrn_f32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vtrnf32.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.2s, ?v\[0-9\]+\.2s, ?v\[0-9\]+\.2s!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev32qu16_1.c @@ -0,0 +1,10 @@ +/* Test the `vrev32q_u16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vrev32qu16.x" + +/* { dg-final { scan-assembler-times "rev32\[ \t\]+v\[0-9\]+.8h, ?v\[0-9\]+.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipqu16_1.c @@ -0,0 +1,11 @@ +/* Test the `vzipq_u16' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vzipqu16.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8h, ?v\[0-9\]+\.8h, ?v\[0-9\]+\.8h!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vzipu8_1.c @@ -0,0 +1,11 @@ +/* Test the `vzip_u8' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -fno-inline" } */ + +#include +#include "vzipu8.x" + +/* { dg-final { scan-assembler-times "zip1\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { scan-assembler-times "zip2\[ \t\]+v\[0-9\]+\.8b, ?v\[0-9\]+\.8b, ?v\[0-9\]+\.8b!?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vtrnqp16.x @@ -0,0 +1,27 @@ +extern void abort (void); + +poly16x8x2_t +test_vtrnqp16 (poly16x8_t _a, poly16x8_t _b) +{ + return vtrnq_p16 (_a, _b); +} + +int +main (int argc, char **argv) +{ + int i; + poly16_t first[] = {1, 2, 3, 4, 5, 6, 7, 8}; + poly16_t second[] = {9, 10, 11, 12, 13, 14, 15, 16}; + poly16x8x2_t result = test_vtrnqp16 (vld1q_p16 (first), vld1q_p16 (second)); + poly16x8_t res1 = result.val[0], res2 = result.val[1]; + poly16_t exp1[] = {1, 9, 3, 11, 5, 13, 7, 15}; + poly16_t exp2[] = {2, 10, 4, 12, 6, 14, 8, 16}; + poly16x8_t expected1 = vld1q_p16 (exp1); + poly16x8_t expected2 = vld1q_p16 (exp2); + + for (i = 0; i < 8; i++) + if ((res1[i] != expected1[i]) || (res2[i] != expected2[i])) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64qp16.x @@ -0,0 +1,22 @@ +extern void abort (void); + +poly16x8_t +test_vrev64qp16 (poly16x8_t _arg) +{ + return vrev64q_p16 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + poly16x8_t inorder = {1, 2, 3, 4, 5, 6, 7, 8}; + poly16x8_t reversed = test_vrev64qp16 (inorder); + poly16x8_t expected = {4, 3, 2, 1, 8, 7, 6, 5}; + + for (i = 0; i < 8; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/ext_f32_1.c @@ -0,0 +1,10 @@ +/* Test the `vextf32' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3 -fno-inline" } */ + +#include "arm_neon.h" +#include "ext_f32.x" + +/* { dg-final { scan-assembler-times "ext\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\], ?#\[0-9\]+\(?:.4)?\(?:\[ \t\]+@\[a-zA-Z0-9 \]+\)?\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32.x +++ b/src/gcc/testsuite/gcc.target/aarch64/simd/vrev64f32.x @@ -0,0 +1,22 @@ +extern void abort (void); + +float32x2_t +test_vrev64f32 (float32x2_t _arg) +{ + return vrev64_f32 (_arg); +} + +int +main (int argc, char **argv) +{ + int i; + float32x2_t inorder = {1, 2}; + float32x2_t reversed = test_vrev64f32 (inorder); + float32x2_t expected = {2, 1}; + + for (i = 0; i < 2; i++) + if (reversed[i] != expected[i]) + abort (); + return 0; +} + --- a/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_1.c @@ -0,0 +1,430 @@ +/* Test vdup_lane intrinsics work correctly. */ +/* { dg-do run } */ +/* { dg-options "--save-temps -O1" } */ + +#include + +extern void abort (void); + +float32x2_t __attribute__ ((noinline)) +wrap_vdup_lane_f32_0 (float32x2_t a) +{ + return vdup_lane_f32 (a, 0); +} + +float32x2_t __attribute__ ((noinline)) +wrap_vdup_lane_f32_1 (float32x2_t a) +{ + return vdup_lane_f32 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdup_lane_f32 () +{ + float32x2_t a; + float32x2_t b; + int i; + float32_t c[2] = { 0.0 , 3.14 }; + float32_t d[2]; + + a = vld1_f32 (c); + b = wrap_vdup_lane_f32_0 (a); + vst1_f32 (d, b); + for (i = 0; i < 2; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdup_lane_f32_1 (a); + vst1_f32 (d, b); + for (i = 0; i < 2; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +float32x4_t __attribute__ ((noinline)) +wrap_vdupq_lane_f32_0 (float32x2_t a) +{ + return vdupq_lane_f32 (a, 0); +} + +float32x4_t __attribute__ ((noinline)) +wrap_vdupq_lane_f32_1 (float32x2_t a) +{ + return vdupq_lane_f32 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdupq_lane_f32 () +{ + float32x2_t a; + float32x4_t b; + int i; + float32_t c[2] = { 0.0 , 3.14 }; + float32_t d[4]; + + a = vld1_f32 (c); + b = wrap_vdupq_lane_f32_0 (a); + vst1q_f32 (d, b); + for (i = 0; i < 4; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdupq_lane_f32_1 (a); + vst1q_f32 (d, b); + for (i = 0; i < 4; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +int8x8_t __attribute__ ((noinline)) +wrap_vdup_lane_s8_0 (int8x8_t a) +{ + return vdup_lane_s8 (a, 0); +} + +int8x8_t __attribute__ ((noinline)) +wrap_vdup_lane_s8_1 (int8x8_t a) +{ + return vdup_lane_s8 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdup_lane_s8 () +{ + int8x8_t a; + int8x8_t b; + int i; + /* Only two first cases are interesting. */ + int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + int8_t d[8]; + + a = vld1_s8 (c); + b = wrap_vdup_lane_s8_0 (a); + vst1_s8 (d, b); + for (i = 0; i < 8; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdup_lane_s8_1 (a); + vst1_s8 (d, b); + for (i = 0; i < 8; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +int8x16_t __attribute__ ((noinline)) +wrap_vdupq_lane_s8_0 (int8x8_t a) +{ + return vdupq_lane_s8 (a, 0); +} + +int8x16_t __attribute__ ((noinline)) +wrap_vdupq_lane_s8_1 (int8x8_t a) +{ + return vdupq_lane_s8 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdupq_lane_s8 () +{ + int8x8_t a; + int8x16_t b; + int i; + /* Only two first cases are interesting. */ + int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + int8_t d[16]; + + a = vld1_s8 (c); + b = wrap_vdupq_lane_s8_0 (a); + vst1q_s8 (d, b); + for (i = 0; i < 16; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdupq_lane_s8_1 (a); + vst1q_s8 (d, b); + for (i = 0; i < 16; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +int16x4_t __attribute__ ((noinline)) +wrap_vdup_lane_s16_0 (int16x4_t a) +{ + return vdup_lane_s16 (a, 0); +} + +int16x4_t __attribute__ ((noinline)) +wrap_vdup_lane_s16_1 (int16x4_t a) +{ + return vdup_lane_s16 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdup_lane_s16 () +{ + int16x4_t a; + int16x4_t b; + int i; + /* Only two first cases are interesting. */ + int16_t c[4] = { 0, 1, 2, 3 }; + int16_t d[4]; + + a = vld1_s16 (c); + b = wrap_vdup_lane_s16_0 (a); + vst1_s16 (d, b); + for (i = 0; i < 4; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdup_lane_s16_1 (a); + vst1_s16 (d, b); + for (i = 0; i < 4; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +int16x8_t __attribute__ ((noinline)) +wrap_vdupq_lane_s16_0 (int16x4_t a) +{ + return vdupq_lane_s16 (a, 0); +} + +int16x8_t __attribute__ ((noinline)) +wrap_vdupq_lane_s16_1 (int16x4_t a) +{ + return vdupq_lane_s16 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdupq_lane_s16 () +{ + int16x4_t a; + int16x8_t b; + int i; + /* Only two first cases are interesting. */ + int16_t c[4] = { 0, 1, 2, 3 }; + int16_t d[8]; + + a = vld1_s16 (c); + b = wrap_vdupq_lane_s16_0 (a); + vst1q_s16 (d, b); + for (i = 0; i < 8; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdupq_lane_s16_1 (a); + vst1q_s16 (d, b); + for (i = 0; i < 8; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +int32x2_t __attribute__ ((noinline)) +wrap_vdup_lane_s32_0 (int32x2_t a) +{ + return vdup_lane_s32 (a, 0); +} + +int32x2_t __attribute__ ((noinline)) +wrap_vdup_lane_s32_1 (int32x2_t a) +{ + return vdup_lane_s32 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdup_lane_s32 () +{ + int32x2_t a; + int32x2_t b; + int i; + int32_t c[2] = { 0, 1 }; + int32_t d[2]; + + a = vld1_s32 (c); + b = wrap_vdup_lane_s32_0 (a); + vst1_s32 (d, b); + for (i = 0; i < 2; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdup_lane_s32_1 (a); + vst1_s32 (d, b); + for (i = 0; i < 2; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +int32x4_t __attribute__ ((noinline)) +wrap_vdupq_lane_s32_0 (int32x2_t a) +{ + return vdupq_lane_s32 (a, 0); +} + +int32x4_t __attribute__ ((noinline)) +wrap_vdupq_lane_s32_1 (int32x2_t a) +{ + return vdupq_lane_s32 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdupq_lane_s32 () +{ + int32x2_t a; + int32x4_t b; + int i; + int32_t c[2] = { 0, 1 }; + int32_t d[4]; + + a = vld1_s32 (c); + b = wrap_vdupq_lane_s32_0 (a); + vst1q_s32 (d, b); + for (i = 0; i < 4; i++) + if (c[0] != d[i]) + return 1; + + b = wrap_vdupq_lane_s32_1 (a); + vst1q_s32 (d, b); + for (i = 0; i < 4; i++) + if (c[1] != d[i]) + return 1; + return 0; +} + +int64x1_t __attribute__ ((noinline)) +wrap_vdup_lane_s64_0 (int64x1_t a) +{ + return vdup_lane_s64 (a, 0); +} + +int64x1_t __attribute__ ((noinline)) +wrap_vdup_lane_s64_1 (int64x1_t a) +{ + return vdup_lane_s64 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdup_lane_s64 () +{ + int64x1_t a; + int64x1_t b; + int64_t c[1]; + int64_t d[1]; + + c[0] = 0; + a = vld1_s64 (c); + b = wrap_vdup_lane_s64_0 (a); + vst1_s64 (d, b); + if (c[0] != d[0]) + return 1; + + c[0] = 1; + a = vld1_s64 (c); + b = wrap_vdup_lane_s64_1 (a); + vst1_s64 (d, b); + if (c[0] != d[0]) + return 1; + return 0; +} + +int64x2_t __attribute__ ((noinline)) +wrap_vdupq_lane_s64_0 (int64x1_t a) +{ + return vdupq_lane_s64 (a, 0); +} + +int64x2_t __attribute__ ((noinline)) +wrap_vdupq_lane_s64_1 (int64x1_t a) +{ + return vdupq_lane_s64 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdupq_lane_s64 () +{ + int64x1_t a; + int64x2_t b; + int i; + int64_t c[1]; + int64_t d[2]; + + c[0] = 0; + a = vld1_s64 (c); + b = wrap_vdupq_lane_s64_0 (a); + vst1q_s64 (d, b); + for (i = 0; i < 2; i++) + if (c[0] != d[i]) + return 1; + + c[0] = 1; + a = vld1_s64 (c); + b = wrap_vdupq_lane_s64_1 (a); + vst1q_s64 (d, b); + for (i = 0; i < 2; i++) + if (c[0] != d[i]) + return 1; + return 0; +} + +int +main () +{ + + if (test_vdup_lane_f32 ()) + abort (); + if (test_vdup_lane_s8 ()) + abort (); + if (test_vdup_lane_s16 ()) + abort (); + if (test_vdup_lane_s32 ()) + abort (); + if (test_vdup_lane_s64 ()) + abort (); + if (test_vdupq_lane_f32 ()) + abort (); + if (test_vdupq_lane_s8 ()) + abort (); + if (test_vdupq_lane_s16 ()) + abort (); + if (test_vdupq_lane_s32 ()) + abort (); + if (test_vdupq_lane_s64 ()) + abort (); + + return 0; +} + +/* Asm check for test_vdup_lane_s8. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */ + +/* Asm check for test_vdupq_lane_s8. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[0\\\]" 1 } } */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, v\[0-9\]+\.b\\\[1\\\]" 1 } } */ + +/* Asm check for test_vdup_lane_s16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */ +/* Asm check for test_vdup_lane_s16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */ + +/* Asm check for test_vdupq_lane_s16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[0\\\]" 1 } } */ +/* Asm check for test_vdupq_lane_s16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, v\[0-9\]+\.h\\\[1\\\]" 1 } } */ + +/* Asm check for test_vdup_lane_f32 and test_vdup_lane_s32. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */ + +/* Asm check for test_vdupq_lane_f32 and test_vdupq_lane_s32. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[0\\\]" 2 } } */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, v\[0-9\]+\.s\\\[1\\\]" 2 } } */ + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_n_1.c @@ -0,0 +1,619 @@ +/* Test vdup_lane intrinsics work correctly. */ +/* { dg-do run } */ +/* { dg-options "-O1 --save-temps" } */ + +#include + +extern void abort (void); + +float32x2_t __attribute__ ((noinline)) +wrap_vdup_n_f32 (float32_t a) +{ + return vdup_n_f32 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_f32 () +{ + float32_t a = 1.0; + float32x2_t b; + float32_t c[2]; + int i; + + b = wrap_vdup_n_f32 (a); + vst1_f32 (c, b); + for (i = 0; i < 2; i++) + if (a != c[i]) + return 1; + return 0; +} + +float32x4_t __attribute__ ((noinline)) +wrap_vdupq_n_f32 (float32_t a) +{ + return vdupq_n_f32 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_f32 () +{ + float32_t a = 1.0; + float32x4_t b; + float32_t c[4]; + int i; + + b = wrap_vdupq_n_f32 (a); + vst1q_f32 (c, b); + for (i = 0; i < 4; i++) + if (a != c[i]) + return 1; + return 0; +} + +float64x1_t __attribute__ ((noinline)) +wrap_vdup_n_f64 (float64_t a) +{ + return vdup_n_f64 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_f64 () +{ + float64_t a = 1.0; + float64x1_t b; + float64_t c[1]; + int i; + + b = wrap_vdup_n_f64 (a); + vst1_f64 (c, b); + for (i = 0; i < 1; i++) + if (a != c[i]) + return 1; + return 0; +} + +float64x2_t __attribute__ ((noinline)) +wrap_vdupq_n_f64 (float64_t a) +{ + return vdupq_n_f64 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_f64 () +{ + float64_t a = 1.0; + float64x2_t b; + float64_t c[2]; + int i; + + b = wrap_vdupq_n_f64 (a); + vst1q_f64 (c, b); + for (i = 0; i < 2; i++) + if (a != c[i]) + return 1; + return 0; +} + +poly8x8_t __attribute__ ((noinline)) +wrap_vdup_n_p8 (poly8_t a) +{ + return vdup_n_p8 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_p8 () +{ + poly8_t a = 1; + poly8x8_t b; + poly8_t c[8]; + int i; + + b = wrap_vdup_n_p8 (a); + vst1_p8 (c, b); + for (i = 0; i < 8; i++) + if (a != c[i]) + return 1; + return 0; +} + +poly8x16_t __attribute__ ((noinline)) +wrap_vdupq_n_p8 (poly8_t a) +{ + return vdupq_n_p8 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_p8 () +{ + poly8_t a = 1; + poly8x16_t b; + poly8_t c[16]; + int i; + + b = wrap_vdupq_n_p8 (a); + vst1q_p8 (c, b); + for (i = 0; i < 16; i++) + if (a != c[i]) + return 1; + return 0; +} + +int8x8_t __attribute__ ((noinline)) +wrap_vdup_n_s8 (int8_t a) +{ + return vdup_n_s8 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_s8 () +{ + int8_t a = 1; + int8x8_t b; + int8_t c[8]; + int i; + + b = wrap_vdup_n_s8 (a); + vst1_s8 (c, b); + for (i = 0; i < 8; i++) + if (a != c[i]) + return 1; + return 0; +} + +int8x16_t __attribute__ ((noinline)) +wrap_vdupq_n_s8 (int8_t a) +{ + return vdupq_n_s8 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_s8 () +{ + int8_t a = 1; + int8x16_t b; + int8_t c[16]; + int i; + + b = wrap_vdupq_n_s8 (a); + vst1q_s8 (c, b); + for (i = 0; i < 16; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint8x8_t __attribute__ ((noinline)) +wrap_vdup_n_u8 (uint8_t a) +{ + return vdup_n_u8 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_u8 () +{ + uint8_t a = 1; + uint8x8_t b; + uint8_t c[8]; + int i; + + b = wrap_vdup_n_u8 (a); + vst1_u8 (c, b); + for (i = 0; i < 8; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint8x16_t __attribute__ ((noinline)) +wrap_vdupq_n_u8 (uint8_t a) +{ + return vdupq_n_u8 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_u8 () +{ + uint8_t a = 1; + uint8x16_t b; + uint8_t c[16]; + int i; + + b = wrap_vdupq_n_u8 (a); + vst1q_u8 (c, b); + for (i = 0; i < 16; i++) + if (a != c[i]) + return 1; + return 0; +} + +poly16x4_t __attribute__ ((noinline)) +wrap_vdup_n_p16 (poly16_t a) +{ + return vdup_n_p16 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_p16 () +{ + poly16_t a = 1; + poly16x4_t b; + poly16_t c[4]; + int i; + + b = wrap_vdup_n_p16 (a); + vst1_p16 (c, b); + for (i = 0; i < 4; i++) + if (a != c[i]) + return 1; + return 0; +} + +poly16x8_t __attribute__ ((noinline)) +wrap_vdupq_n_p16 (poly16_t a) +{ + return vdupq_n_p16 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_p16 () +{ + poly16_t a = 1; + poly16x8_t b; + poly16_t c[8]; + int i; + + b = wrap_vdupq_n_p16 (a); + vst1q_p16 (c, b); + for (i = 0; i < 8; i++) + if (a != c[i]) + return 1; + return 0; +} + +int16x4_t __attribute__ ((noinline)) +wrap_vdup_n_s16 (int16_t a) +{ + return vdup_n_s16 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_s16 () +{ + int16_t a = 1; + int16x4_t b; + int16_t c[4]; + int i; + + b = wrap_vdup_n_s16 (a); + vst1_s16 (c, b); + for (i = 0; i < 4; i++) + if (a != c[i]) + return 1; + return 0; +} + +int16x8_t __attribute__ ((noinline)) +wrap_vdupq_n_s16 (int16_t a) +{ + return vdupq_n_s16 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_s16 () +{ + int16_t a = 1; + int16x8_t b; + int16_t c[8]; + int i; + + b = wrap_vdupq_n_s16 (a); + vst1q_s16 (c, b); + for (i = 0; i < 8; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint16x4_t __attribute__ ((noinline)) +wrap_vdup_n_u16 (uint16_t a) +{ + return vdup_n_u16 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_u16 () +{ + uint16_t a = 1; + uint16x4_t b; + uint16_t c[4]; + int i; + + b = wrap_vdup_n_u16 (a); + vst1_u16 (c, b); + for (i = 0; i < 4; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint16x8_t __attribute__ ((noinline)) +wrap_vdupq_n_u16 (uint16_t a) +{ + return vdupq_n_u16 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_u16 () +{ + uint16_t a = 1; + uint16x8_t b; + uint16_t c[8]; + int i; + + b = wrap_vdupq_n_u16 (a); + vst1q_u16 (c, b); + for (i = 0; i < 8; i++) + if (a != c[i]) + return 1; + return 0; +} + +int32x2_t __attribute__ ((noinline)) +wrap_vdup_n_s32 (int32_t a) +{ + return vdup_n_s32 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_s32 () +{ + int32_t a = 1; + int32x2_t b; + int32_t c[2]; + int i; + + b = wrap_vdup_n_s32 (a); + vst1_s32 (c, b); + for (i = 0; i < 2; i++) + if (a != c[i]) + return 1; + return 0; +} + +int32x4_t __attribute__ ((noinline)) +wrap_vdupq_n_s32 (int32_t a) +{ + return vdupq_n_s32 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_s32 () +{ + int32_t a = 1; + int32x4_t b; + int32_t c[4]; + int i; + + b = wrap_vdupq_n_s32 (a); + vst1q_s32 (c, b); + for (i = 0; i < 4; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint32x2_t __attribute__ ((noinline)) +wrap_vdup_n_u32 (uint32_t a) +{ + return vdup_n_u32 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_u32 () +{ + uint32_t a = 1; + uint32x2_t b; + uint32_t c[2]; + int i; + + b = wrap_vdup_n_u32 (a); + vst1_u32 (c, b); + for (i = 0; i < 2; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint32x4_t __attribute__ ((noinline)) +wrap_vdupq_n_u32 (uint32_t a) +{ + return vdupq_n_u32 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_u32 () +{ + uint32_t a = 1; + uint32x4_t b; + uint32_t c[4]; + int i; + + b = wrap_vdupq_n_u32 (a); + vst1q_u32 (c, b); + for (i = 0; i < 4; i++) + if (a != c[i]) + return 1; + return 0; +} + +int64x1_t __attribute__ ((noinline)) +wrap_vdup_n_s64 (int64_t a) +{ + return vdup_n_s64 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_s64 () +{ + int64_t a = 1; + int64x1_t b; + int64_t c[1]; + int i; + + b = wrap_vdup_n_s64 (a); + vst1_s64 (c, b); + for (i = 0; i < 1; i++) + if (a != c[i]) + return 1; + return 0; +} + +int64x2_t __attribute__ ((noinline)) +wrap_vdupq_n_s64 (int64_t a) +{ + return vdupq_n_s64 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_s64 () +{ + int64_t a = 1; + int64x2_t b; + int64_t c[2]; + int i; + + b = wrap_vdupq_n_s64 (a); + vst1q_s64 (c, b); + for (i = 0; i < 2; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint64x1_t __attribute__ ((noinline)) +wrap_vdup_n_u64 (uint64_t a) +{ + return vdup_n_u64 (a); +} + +int __attribute__ ((noinline)) +test_vdup_n_u64 () +{ + uint64_t a = 1; + uint64x1_t b; + uint64_t c[1]; + int i; + + b = wrap_vdup_n_u64 (a); + vst1_u64 (c, b); + for (i = 0; i < 1; i++) + if (a != c[i]) + return 1; + return 0; +} + +uint64x2_t __attribute__ ((noinline)) +wrap_vdupq_n_u64 (uint64_t a) +{ + return vdupq_n_u64 (a); +} + +int __attribute__ ((noinline)) +test_vdupq_n_u64 () +{ + uint64_t a = 1; + uint64x2_t b; + uint64_t c[2]; + int i; + + b = wrap_vdupq_n_u64 (a); + vst1q_u64 (c, b); + for (i = 0; i < 2; i++) + if (a != c[i]) + return 1; + return 0; +} + +int +main () +{ + if (test_vdup_n_f32 ()) + abort (); + if (test_vdup_n_f64 ()) + abort (); + if (test_vdup_n_p8 ()) + abort (); + if (test_vdup_n_u8 ()) + abort (); + if (test_vdup_n_s8 ()) + abort (); + if (test_vdup_n_p16 ()) + abort (); + if (test_vdup_n_s16 ()) + abort (); + if (test_vdup_n_u16 ()) + abort (); + if (test_vdup_n_s32 ()) + abort (); + if (test_vdup_n_u32 ()) + abort (); + if (test_vdup_n_s64 ()) + abort (); + if (test_vdup_n_u64 ()) + abort (); + if (test_vdupq_n_f32 ()) + abort (); + if (test_vdupq_n_f64 ()) + abort (); + if (test_vdupq_n_p8 ()) + abort (); + if (test_vdupq_n_u8 ()) + abort (); + if (test_vdupq_n_s8 ()) + abort (); + if (test_vdupq_n_p16 ()) + abort (); + if (test_vdupq_n_s16 ()) + abort (); + if (test_vdupq_n_u16 ()) + abort (); + if (test_vdupq_n_s32 ()) + abort (); + if (test_vdupq_n_u32 ()) + abort (); + if (test_vdupq_n_s64 ()) + abort (); + if (test_vdupq_n_u64 ()) + abort (); + return 0; +} + +/* No asm checks for vdup_n_f32, vdupq_n_f32, vdup_n_f64 and vdupq_n_f64. + Cannot force floating point value in general purpose regester. */ + +/* Asm check for test_vdup_n_p8, test_vdup_n_s8, test_vdup_n_u8. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8b, w\[0-9\]+" 3 } } */ + +/* Asm check for test_vdupq_n_p8, test_vdupq_n_s8, test_vdupq_n_u8. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.16b, w\[0-9\]+" 3 } } */ + +/* Asm check for test_vdup_n_p16, test_vdup_n_s16, test_vdup_n_u16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4h, w\[0-9\]+" 3 } } */ + +/* Asm check for test_vdupq_n_p16, test_vdupq_n_s16, test_vdupq_n_u16. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.8h, w\[0-9\]+" 3 } } */ + +/* Asm check for test_vdup_n_s32, test_vdup_n_u32. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2s, w\[0-9\]+" 2 } } */ + +/* Asm check for test_vdupq_n_s32, test_vdupq_n_u32. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.4s, w\[0-9\]+" 2 } } */ + +/* Asm check for test_vdup_n_s64, test_vdup_n_u64 are left out. + Attempts to make the compiler generate "dup\\td\[0-9\]+, x\[0-9\]+" + are not practical. */ + +/* Asm check for test_vdupq_n_s64, test_vdupq_n_u64. */ +/* { dg-final { scan-assembler-times "dup\\tv\[0-9\]+\.2d, x\[0-9\]+" 2 } } */ + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/fcsel_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/fcsel_1.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options " -O2 " } */ + +float +f_1 (float a, float b, float c, float d) +{ + if (a > 0.0) + return c; + else + return 2.0; +} + +double +f_2 (double a, double b, double c, double d) +{ + if (a > b) + return c; + else + return d; +} + +/* { dg-final { scan-assembler-times "\tfcsel" 2 } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/rev16_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/rev16_1.c @@ -0,0 +1,59 @@ +/* { dg-options "-O2" } */ +/* { dg-do run } */ + +extern void abort (void); + +typedef unsigned int __u32; + +__u32 +__rev16_32_alt (__u32 x) +{ + return (((__u32)(x) & (__u32)0xff00ff00UL) >> 8) + | (((__u32)(x) & (__u32)0x00ff00ffUL) << 8); +} + +__u32 +__rev16_32 (__u32 x) +{ + return (((__u32)(x) & (__u32)0x00ff00ffUL) << 8) + | (((__u32)(x) & (__u32)0xff00ff00UL) >> 8); +} + +typedef unsigned long long __u64; + +__u64 +__rev16_64_alt (__u64 x) +{ + return (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8) + | (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8); +} + +__u64 +__rev16_64 (__u64 x) +{ + return (((__u64)(x) & (__u64)0x00ff00ff00ff00ffUL) << 8) + | (((__u64)(x) & (__u64)0xff00ff00ff00ff00UL) >> 8); +} + +int +main (void) +{ + volatile __u32 in32 = 0x12345678; + volatile __u32 expected32 = 0x34127856; + volatile __u64 in64 = 0x1234567890abcdefUL; + volatile __u64 expected64 = 0x34127856ab90efcdUL; + + if (__rev16_32 (in32) != expected32) + abort (); + + if (__rev16_32_alt (in32) != expected32) + abort (); + + if (__rev16_64 (in64) != expected64) + abort (); + + if (__rev16_64_alt (in64) != expected64) + abort (); + + return 0; +} --- a/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vdup_lane_2.c @@ -0,0 +1,343 @@ +/* Test vdup_lane intrinsics work correctly. */ +/* { dg-do run } */ +/* { dg-options "-O1 --save-temps" } */ + +#include + +#define force_simd(V1) asm volatile ("" \ + : "=w"(V1) \ + : "w"(V1) \ + : /* No clobbers */) + +extern void abort (void); + +float32_t __attribute__ ((noinline)) +wrap_vdups_lane_f32_0 (float32x2_t dummy, float32x2_t a) +{ + return vdups_lane_f32 (a, 0); +} + +float32_t __attribute__ ((noinline)) +wrap_vdups_lane_f32_1 (float32x2_t a) +{ + return vdups_lane_f32 (a, 1); +} + +int __attribute__ ((noinline)) +test_vdups_lane_f32 () +{ + float32x2_t a; + float32_t b; + float32_t c[2] = { 0.0, 1.0 }; + + a = vld1_f32 (c); + b = wrap_vdups_lane_f32_0 (a, a); + if (c[0] != b) + return 1; + b = wrap_vdups_lane_f32_1 (a); + if (c[1] != b) + return 1; + return 0; +} + +float64_t __attribute__ ((noinline)) +wrap_vdupd_lane_f64_0 (float64x1_t dummy, float64x1_t a) +{ + return vdupd_lane_f64 (a, 0); +} + +int __attribute__ ((noinline)) +test_vdupd_lane_f64 () +{ + float64x1_t a; + float64_t b; + float64_t c[1] = { 0.0 }; + a = vld1_f64 (c); + b = wrap_vdupd_lane_f64_0 (a, a); + if (c[0] != b) + return 1; + return 0; +} + +int8_t __attribute__ ((noinline)) +wrap_vdupb_lane_s8_0 (int8x8_t dummy, int8x8_t a) +{ + int8_t result = vdupb_lane_s8 (a, 0); + force_simd (result); + return result; +} + +int8_t __attribute__ ((noinline)) +wrap_vdupb_lane_s8_1 (int8x8_t a) +{ + int8_t result = vdupb_lane_s8 (a, 1); + force_simd (result); + return result; +} + +int __attribute__ ((noinline)) +test_vdupb_lane_s8 () +{ + int8x8_t a; + int8_t b; + int8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + + a = vld1_s8 (c); + b = wrap_vdupb_lane_s8_0 (a, a); + if (c[0] != b) + return 1; + b = wrap_vdupb_lane_s8_1 (a); + if (c[1] != b) + return 1; + + return 0; +} + +uint8_t __attribute__ ((noinline)) +wrap_vdupb_lane_u8_0 (uint8x8_t dummy, uint8x8_t a) +{ + uint8_t result = vdupb_lane_u8 (a, 0); + force_simd (result); + return result; +} + +uint8_t __attribute__ ((noinline)) +wrap_vdupb_lane_u8_1 (uint8x8_t a) +{ + uint8_t result = vdupb_lane_u8 (a, 1); + force_simd (result); + return result; +} + +int __attribute__ ((noinline)) +test_vdupb_lane_u8 () +{ + uint8x8_t a; + uint8_t b; + uint8_t c[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + + a = vld1_u8 (c); + b = wrap_vdupb_lane_u8_0 (a, a); + if (c[0] != b) + return 1; + b = wrap_vdupb_lane_u8_1 (a); + if (c[1] != b) + return 1; + return 0; +} + +int16_t __attribute__ ((noinline)) +wrap_vduph_lane_s16_0 (int16x4_t dummy, int16x4_t a) +{ + int16_t result = vduph_lane_s16 (a, 0); + force_simd (result); + return result; +} + +int16_t __attribute__ ((noinline)) +wrap_vduph_lane_s16_1 (int16x4_t a) +{ + int16_t result = vduph_lane_s16 (a, 1); + force_simd (result); + return result; +} + +int __attribute__ ((noinline)) +test_vduph_lane_s16 () +{ + int16x4_t a; + int16_t b; + int16_t c[4] = { 0, 1, 2, 3 }; + + a = vld1_s16 (c); + b = wrap_vduph_lane_s16_0 (a, a); + if (c[0] != b) + return 1; + b = wrap_vduph_lane_s16_1 (a); + if (c[1] != b) + return 1; + return 0; +} + +uint16_t __attribute__ ((noinline)) +wrap_vduph_lane_u16_0 (uint16x4_t dummy, uint16x4_t a) +{ + uint16_t result = vduph_lane_u16 (a, 0); + force_simd (result); + return result; +} + +uint16_t __attribute__ ((noinline)) +wrap_vduph_lane_u16_1 (uint16x4_t a) +{ + uint16_t result = vduph_lane_u16 (a, 1); + force_simd (result); + return result; +} + +int __attribute__ ((noinline)) +test_vduph_lane_u16 () +{ + uint16x4_t a; + uint16_t b; + uint16_t c[4] = { 0, 1, 2, 3 }; + + a = vld1_u16 (c); + b = wrap_vduph_lane_u16_0 (a, a); + if (c[0] != b) + return 1; + b = wrap_vduph_lane_u16_1 (a); + if (c[1] != b) + return 1; + return 0; +} + +int32_t __attribute__ ((noinline)) +wrap_vdups_lane_s32_0 (int32x2_t dummy, int32x2_t a) +{ + int32_t result = vdups_lane_s32 (a, 0); + force_simd (result); + return result; +} + +int32_t __attribute__ ((noinline)) +wrap_vdups_lane_s32_1 (int32x2_t a) +{ + int32_t result = vdups_lane_s32 (a, 1); + force_simd (result); + return result; +} + +int __attribute__ ((noinline)) +test_vdups_lane_s32 () +{ + int32x2_t a; + int32_t b; + int32_t c[2] = { 0, 1 }; + + a = vld1_s32 (c); + b = wrap_vdups_lane_s32_0 (vcreate_s32 (0), a); + if (c[0] != b) + return 1; + b = wrap_vdups_lane_s32_1 (a); + if (c[1] != b) + return 1; + return 0; +} + +uint32_t __attribute__ ((noinline)) +wrap_vdups_lane_u32_0 (uint32x2_t dummy, uint32x2_t a) +{ + uint32_t result = vdups_lane_u32 (a, 0); + force_simd (result); + return result; +} + +uint32_t __attribute__ ((noinline)) +wrap_vdups_lane_u32_1 (uint32x2_t a) +{ + uint32_t result = vdups_lane_u32 (a, 1); + force_simd (result); + return result; +} + +int __attribute__ ((noinline)) +test_vdups_lane_u32 () +{ + uint32x2_t a; + uint32_t b; + uint32_t c[2] = { 0, 1 }; + a = vld1_u32 (c); + b = wrap_vdups_lane_u32_0 (a, a); + if (c[0] != b) + return 1; + b = wrap_vdups_lane_u32_1 (a); + if (c[1] != b) + return 1; + return 0; +} + +uint64_t __attribute__ ((noinline)) +wrap_vdupd_lane_u64_0 (uint64x1_t dummy, uint64x1_t a) +{ + return vdupd_lane_u64 (a, 0);; +} + +int __attribute__ ((noinline)) +test_vdupd_lane_u64 () +{ + uint64x1_t a; + uint64_t b; + uint64_t c[1] = { 0 }; + + a = vld1_u64 (c); + b = wrap_vdupd_lane_u64_0 (a, a); + if (c[0] != b) + return 1; + return 0; +} + +int64_t __attribute__ ((noinline)) +wrap_vdupd_lane_s64_0 (uint64x1_t dummy, int64x1_t a) +{ + return vdupd_lane_u64 (a, 0); +} + +int __attribute__ ((noinline)) +test_vdupd_lane_s64 () +{ + int64x1_t a; + int64_t b; + int64_t c[1] = { 0 }; + + a = vld1_s64 (c); + b = wrap_vdupd_lane_s64_0 (a, a); + if (c[0] != b) + return 1; + return 0; +} + +int +main () +{ + if (test_vdups_lane_f32 ()) + abort (); + if (test_vdupd_lane_f64 ()) + abort (); + if (test_vdupb_lane_s8 ()) + abort (); + if (test_vdupb_lane_u8 ()) + abort (); + if (test_vduph_lane_s16 ()) + abort (); + if (test_vduph_lane_u16 ()) + abort (); + if (test_vdups_lane_s32 ()) + abort (); + if (test_vdups_lane_u32 ()) + abort (); + if (test_vdupd_lane_s64 ()) + abort (); + if (test_vdupd_lane_u64 ()) + abort (); + return 0; +} + +/* Asm check for vdupb_lane_s8, vdupb_lane_u8. */ +/* { dg-final { scan-assembler-not "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[0\\\]" } } */ +/* { dg-final { scan-assembler-times "dup\\tb\[0-9\]+, v\[0-9\]+\.b\\\[1\\\]" 2 } } */ + +/* Asm check for vduph_lane_h16, vduph_lane_h16. */ +/* { dg-final { scan-assembler-not "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[0\\\]" } } */ +/* { dg-final { scan-assembler-times "dup\\th\[0-9\]+, v\[0-9\]+\.h\\\[1\\\]" 2 } } */ + +/* Asm check for vdups_lane_f32, vdups_lane_s32, vdups_lane_u32. */ +/* Can't generate "dup s, v[0]" for vdups_lane_s32 and vdups_lane_u32. */ +/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[0\\\]" 1} } */ +/* { dg-final { scan-assembler-times "dup\\ts\[0-9\]+, v\[0-9\]+\.s\\\[1\\\]" 3 } } */ + +/* Asm check for vdupd_lane_f64, vdupd_lane_s64, vdupd_lane_u64. */ +/* Attempts to make the compiler generate vdupd are not practical. */ +/* { dg-final { scan-assembler-not "dup\\td\[0-9\]+, v\[0-9\]+\.d\\\[0\\\]" } } + +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/tail_indirect_call_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/tail_indirect_call_1.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +typedef void FP (int); + +/* { dg-final { scan-assembler "br" } } */ +/* { dg-final { scan-assembler-not "blr" } } */ +void +f1 (FP fp, int n) +{ + (fp) (n); +} + +void +f2 (int n, FP fp) +{ + (fp) (n); +} --- a/src/gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c @@ -193,7 +193,6 @@ return b; } /* { dg-final { scan-assembler "sshr\td\[0-9\]+,\ d\[0-9\]+,\ 63" } } */ -/* { dg-final { scan-assembler "shl\td\[0-9\]+,\ d\[0-9\]+,\ 1" } } */ Int32x1 test_corners_sisd_si (Int32x1 b) @@ -207,7 +206,6 @@ return b; } /* { dg-final { scan-assembler "sshr\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ 31" } } */ -/* { dg-final { scan-assembler "shl\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ 1" } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vrnd_f64_1.c @@ -0,0 +1,105 @@ +/* Test vrnd_f64 works correctly. */ +/* { dg-do run } */ +/* { dg-options "--save-temps" } */ + +#include "arm_neon.h" + +extern void abort (void); + +/* Bit offset to round mode field in FPCR. */ +#define RMODE_START 22 + +#define FPROUNDING_ZERO 3 + +/* Set RMODE field of FPCR control register + to rounding mode passed. */ +void __inline __attribute__ ((__always_inline__)) +set_rounding_mode (uint32_t mode) +{ + uint32_t r; + + /* Read current FPCR. */ + asm volatile ("mrs %[r], fpcr" : [r] "=r" (r) : :); + + /* Clear rmode. */ + r &= ~(3 << RMODE_START); + /* Calculate desired FPCR. */ + r |= mode << RMODE_START; + + /* Write desired FPCR back. */ + asm volatile ("msr fpcr, %[r]" : : [r] "r" (r) :); +} + +float64x1_t __attribute__ ((noinline)) +compare_f64 (float64x1_t passed, float64_t expected) +{ + return (__builtin_fabs (vget_lane_f64 (passed, 0) - expected) + > __DBL_EPSILON__); +} + +void __attribute__ ((noinline)) +run_round_tests (float64x1_t *tests, + float64_t expectations[][6]) +{ + int i; + + for (i = 0; i < 6; i++) + { + if (compare_f64 (vrnd_f64 (tests[i]), expectations[0][i])) + abort (); + if (compare_f64 (vrndx_f64 (tests[i]), expectations[1][i])) + abort (); + if (compare_f64 (vrndp_f64 (tests[i]), expectations[2][i])) + abort (); + if (compare_f64 (vrndn_f64 (tests[i]), expectations[3][i])) + abort (); + if (compare_f64 (vrndm_f64 (tests[i]), expectations[4][i])) + abort (); + if (compare_f64 (vrndi_f64 (tests[i]), expectations[5][i])) + abort (); + if (compare_f64 (vrnda_f64 (tests[i]), expectations[6][i])) + abort (); + } +} + +int +main (int argc, char **argv) +{ + float64x1_t tests[6] = + { + vcreate_f64 (0x3FE0000000000000), /* Hex for: 0.5. */ + vcreate_f64 (0x3FD999999999999A), /* Hex for: 0.4. */ + vcreate_f64 (0x3FE3333333333333), /* Hex for: 0.6. */ + vcreate_f64 (0xBFE0000000000000), /* Hex for: -0.5. */ + vcreate_f64 (0xBFD999999999999A), /* Hex for: -0.4. */ + vcreate_f64 (0xBFE3333333333333), /* Hex for: -0.6. */ + }; + + float64_t expectations[7][6] = + { + { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, /* vrnd - round towards zero. */ + { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, /* vrndx - round using FPCR mode. */ + { 1.0, 1.0, 1.0, 0.0, 0.0, 0.0 }, /* vrndp - round to plus infinity. */ + { 0.0, 0.0, 1.0, 0.0, 0.0, -1.0 }, /* vrndn - round ties to even. */ + { 0.0, 0.0, 0.0, -1.0, -1.0, -1.0 }, /* vrndm - round to minus infinity. */ + { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, /* vrndi - round using FPCR mode. */ + { 1.0, 0.0, 1.0, -1.0, 0.0, -1.0 }, /* vrnda - round ties away from 0. */ + }; + + /* Set floating point control register + to have predictable vrndx and vrndi behaviour. */ + set_rounding_mode (FPROUNDING_ZERO); + + run_round_tests (tests, expectations); + + return 0; +} + +/* { dg-final { scan-assembler-times "frintz\\td\[0-9\]+, d\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "frintx\\td\[0-9\]+, d\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "frintp\\td\[0-9\]+, d\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "frintn\\td\[0-9\]+, d\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "frintm\\td\[0-9\]+, d\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "frinti\\td\[0-9\]+, d\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "frinta\\td\[0-9\]+, d\[0-9\]+" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/gcc.target/aarch64/vqneg_s64_1.c +++ b/src/gcc/testsuite/gcc.target/aarch64/vqneg_s64_1.c @@ -0,0 +1,47 @@ +/* Test vqneg_s64 intrinsics work correctly. */ +/* { dg-do run } */ +/* { dg-options "--save-temps" } */ + +#include + +extern void abort (void); + +int __attribute__ ((noinline)) +test_vqneg_s64 (int64x1_t passed, int64_t expected) +{ + return vget_lane_s64 (vqneg_s64 (passed), 0) != expected; +} + +int __attribute__ ((noinline)) +test_vqnegd_s64 (int64_t passed, int64_t expected) +{ + return vqnegd_s64 (passed) != expected; +} + +/* { dg-final { scan-assembler-times "sqneg\\td\[0-9\]+, d\[0-9\]+" 2 } } */ + +int +main (int argc, char **argv) +{ + /* Basic test. */ + if (test_vqneg_s64 (vcreate_s64 (-1), 1)) + abort (); + if (test_vqnegd_s64 (-1, 1)) + abort (); + + /* Negating max int64_t. */ + if (test_vqneg_s64 (vcreate_s64 (0x7fffffffffffffff), 0x8000000000000001)) + abort (); + if (test_vqnegd_s64 (0x7fffffffffffffff, 0x8000000000000001)) + abort (); + + /* Negating min int64_t. + Note, exact negation cannot be represented as int64_t. */ + if (test_vqneg_s64 (vcreate_s64 (0x8000000000000000), 0x7fffffffffffffff)) + abort (); + if (test_vqnegd_s64 (0x8000000000000000, 0x7fffffffffffffff)) + abort (); + + return 0; +} +/* { dg-final { cleanup-saved-temps } } */ --- a/src/gcc/testsuite/lib/target-supports.exp +++ b/src/gcc/testsuite/lib/target-supports.exp @@ -3306,6 +3306,27 @@ return $et_vect_shift_saved } +# Return 1 if the target supports vector bswap operations. + +proc check_effective_target_vect_bswap { } { + global et_vect_bswap_saved + + if [info exists et_vect_bswap_saved] { + verbose "check_effective_target_vect_bswap: using cached result" 2 + } else { + set et_vect_bswap_saved 0 + if { [istarget aarch64*-*-*] + || ([istarget arm*-*-*] + && [check_effective_target_arm_neon]) + } { + set et_vect_bswap_saved 1 + } + } + + verbose "check_effective_target_vect_bswap: returning $et_vect_bswap_saved" 2 + return $et_vect_bswap_saved +} + # Return 1 if the target supports hardware vector shift operation for char. proc check_effective_target_vect_shift_char { } { @@ -3504,8 +3525,7 @@ } else { set et_vect_perm_saved 0 if { [is-effective-target arm_neon_ok] - || ([istarget aarch64*-*-*] - && [is-effective-target aarch64_little_endian]) + || [istarget aarch64*-*-*] || [istarget powerpc*-*-*] || [istarget spu-*-*] || [istarget i?86-*-*] --- a/src/gcc/testsuite/ChangeLog.linaro +++ b/src/gcc/testsuite/ChangeLog.linaro @@ -0,0 +1,527 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-07-17 Yvan Roux + + Backport from trunk r211887. + 2014-06-23 James Greenhalgh + + * gcc.target/aarch64/scalar_shift_1.c: Fix expected assembler. + +2014-07-17 Yvan Roux + + Backport from trunk r211441. + 2014-06-11 Kyrylo Tkachov + + * gcc.target/aarch64/acle/acle.exp: New. + * gcc.target/aarch64/acle/crc32b.c: New test. + * gcc.target/aarch64/acle/crc32cb.c: Likewise. + * gcc.target/aarch64/acle/crc32cd.c: Likewise. + * gcc.target/aarch64/acle/crc32ch.c: Likewise. + * gcc.target/aarch64/acle/crc32cw.c: Likewise. + * gcc.target/aarch64/acle/crc32d.c: Likewise. + * gcc.target/aarch64/acle/crc32h.c: Likewise. + * gcc.target/aarch64/acle/crc32w.c: Likewise. + +2014-07-17 Yvan Roux + + Backport from trunk r210153. + 2014-05-07 Alan Lawrence + + * gcc.target/aarch64/simd/vrev16p8_1.c: New file. + * gcc.target/aarch64/simd/vrev16p8.x: New file. + * gcc.target/aarch64/simd/vrev16qp8_1.c: New file. + * gcc.target/aarch64/simd/vrev16qp8.x: New file. + * gcc.target/aarch64/simd/vrev16qs8_1.c: New file. + * gcc.target/aarch64/simd/vrev16qs8.x: New file. + * gcc.target/aarch64/simd/vrev16qu8_1.c: New file. + * gcc.target/aarch64/simd/vrev16qu8.x: New file. + * gcc.target/aarch64/simd/vrev16s8_1.c: New file. + * gcc.target/aarch64/simd/vrev16s8.x: New file. + * gcc.target/aarch64/simd/vrev16u8_1.c: New file. + * gcc.target/aarch64/simd/vrev16u8.x: New file. + * gcc.target/aarch64/simd/vrev32p16_1.c: New file. + * gcc.target/aarch64/simd/vrev32p16.x: New file. + * gcc.target/aarch64/simd/vrev32p8_1.c: New file. + * gcc.target/aarch64/simd/vrev32p8.x: New file. + * gcc.target/aarch64/simd/vrev32qp16_1.c: New file. + * gcc.target/aarch64/simd/vrev32qp16.x: New file. + * gcc.target/aarch64/simd/vrev32qp8_1.c: New file. + * gcc.target/aarch64/simd/vrev32qp8.x: New file. + * gcc.target/aarch64/simd/vrev32qs16_1.c: New file. + * gcc.target/aarch64/simd/vrev32qs16.x: New file. + * gcc.target/aarch64/simd/vrev32qs8_1.c: New file. + * gcc.target/aarch64/simd/vrev32qs8.x: New file. + * gcc.target/aarch64/simd/vrev32qu16_1.c: New file. + * gcc.target/aarch64/simd/vrev32qu16.x: New file. + * gcc.target/aarch64/simd/vrev32qu8_1.c: New file. + * gcc.target/aarch64/simd/vrev32qu8.x: New file. + * gcc.target/aarch64/simd/vrev32s16_1.c: New file. + * gcc.target/aarch64/simd/vrev32s16.x: New file. + * gcc.target/aarch64/simd/vrev32s8_1.c: New file. + * gcc.target/aarch64/simd/vrev32s8.x: New file. + * gcc.target/aarch64/simd/vrev32u16_1.c: New file. + * gcc.target/aarch64/simd/vrev32u16.x: New file. + * gcc.target/aarch64/simd/vrev32u8_1.c: New file. + * gcc.target/aarch64/simd/vrev32u8.x: New file. + * gcc.target/aarch64/simd/vrev64f32_1.c: New file. + * gcc.target/aarch64/simd/vrev64f32.x: New file. + * gcc.target/aarch64/simd/vrev64p16_1.c: New file. + * gcc.target/aarch64/simd/vrev64p16.x: New file. + * gcc.target/aarch64/simd/vrev64p8_1.c: New file. + * gcc.target/aarch64/simd/vrev64p8.x: New file. + * gcc.target/aarch64/simd/vrev64qf32_1.c: New file. + * gcc.target/aarch64/simd/vrev64qf32.x: New file. + * gcc.target/aarch64/simd/vrev64qp16_1.c: New file. + * gcc.target/aarch64/simd/vrev64qp16.x: New file. + * gcc.target/aarch64/simd/vrev64qp8_1.c: New file. + * gcc.target/aarch64/simd/vrev64qp8.x: New file. + * gcc.target/aarch64/simd/vrev64qs16_1.c: New file. + * gcc.target/aarch64/simd/vrev64qs16.x: New file. + * gcc.target/aarch64/simd/vrev64qs32_1.c: New file. + * gcc.target/aarch64/simd/vrev64qs32.x: New file. + * gcc.target/aarch64/simd/vrev64qs8_1.c: New file. + * gcc.target/aarch64/simd/vrev64qs8.x: New file. + * gcc.target/aarch64/simd/vrev64qu16_1.c: New file. + * gcc.target/aarch64/simd/vrev64qu16.x: New file. + * gcc.target/aarch64/simd/vrev64qu32_1.c: New file. + * gcc.target/aarch64/simd/vrev64qu32.x: New file. + * gcc.target/aarch64/simd/vrev64qu8_1.c: New file. + * gcc.target/aarch64/simd/vrev64qu8.x: New file. + * gcc.target/aarch64/simd/vrev64s16_1.c: New file. + * gcc.target/aarch64/simd/vrev64s16.x: New file. + * gcc.target/aarch64/simd/vrev64s32_1.c: New file. + * gcc.target/aarch64/simd/vrev64s32.x: New file. + * gcc.target/aarch64/simd/vrev64s8_1.c: New file. + * gcc.target/aarch64/simd/vrev64s8.x: New file. + * gcc.target/aarch64/simd/vrev64u16_1.c: New file. + * gcc.target/aarch64/simd/vrev64u16.x: New file. + * gcc.target/aarch64/simd/vrev64u32_1.c: New file. + * gcc.target/aarch64/simd/vrev64u32.x: New file. + * gcc.target/aarch64/simd/vrev64u8_1.c: New file. + * gcc.target/aarch64/simd/vrev64u8.x: New file. + +2014-07-16 Yvan Roux + + Backport from trunk r210148, r210151, r210422. + 2014-05-14 Alan Lawrence + + * gcc.target/arm/simd/vtrnqf32_1.c: New file. + * gcc.target/arm/simd/vtrnqp16_1.c: New file. + * gcc.target/arm/simd/vtrnqp8_1.c: New file. + * gcc.target/arm/simd/vtrnqs16_1.c: New file. + * gcc.target/arm/simd/vtrnqs32_1.c: New file. + * gcc.target/arm/simd/vtrnqs8_1.c: New file. + * gcc.target/arm/simd/vtrnqu16_1.c: New file. + * gcc.target/arm/simd/vtrnqu32_1.c: New file. + * gcc.target/arm/simd/vtrnqu8_1.c: New file. + * gcc.target/arm/simd/vtrnf32_1.c: New file. + * gcc.target/arm/simd/vtrnp16_1.c: New file. + * gcc.target/arm/simd/vtrnp8_1.c: New file. + * gcc.target/arm/simd/vtrns16_1.c: New file. + * gcc.target/arm/simd/vtrns32_1.c: New file. + * gcc.target/arm/simd/vtrns8_1.c: New file. + * gcc.target/arm/simd/vtrnu16_1.c: New file. + * gcc.target/arm/simd/vtrnu32_1.c: New file. + * gcc.target/arm/simd/vtrnu8_1.c: New file. + + 2014-05-07 Alan Lawrence + + * gcc.target/aarch64/vtrns32.c: Expect zip[12] insn rather than trn[12]. + * gcc.target/aarch64/vtrnu32.c: Likewise. + * gcc.target/aarch64/vtrnf32.c: Likewise. + + 2014-05-07 Alan Lawrence + + * gcc.target/aarch64/simd/vtrnf32_1.c: New file. + * gcc.target/aarch64/simd/vtrnf32.x: New file. + * gcc.target/aarch64/simd/vtrnp16_1.c: New file. + * gcc.target/aarch64/simd/vtrnp16.x: New file. + * gcc.target/aarch64/simd/vtrnp8_1.c: New file. + * gcc.target/aarch64/simd/vtrnp8.x: New file. + * gcc.target/aarch64/simd/vtrnqf32_1.c: New file. + * gcc.target/aarch64/simd/vtrnqf32.x: New file. + * gcc.target/aarch64/simd/vtrnqp16_1.c: New file. + * gcc.target/aarch64/simd/vtrnqp16.x: New file. + * gcc.target/aarch64/simd/vtrnqp8_1.c: New file. + * gcc.target/aarch64/simd/vtrnqp8.x: New file. + * gcc.target/aarch64/simd/vtrnqs16_1.c: New file. + * gcc.target/aarch64/simd/vtrnqs16.x: New file. + * gcc.target/aarch64/simd/vtrnqs32_1.c: New file. + * gcc.target/aarch64/simd/vtrnqs32.x: New file. + * gcc.target/aarch64/simd/vtrnqs8_1.c: New file. + * gcc.target/aarch64/simd/vtrnqs8.x: New file. + * gcc.target/aarch64/simd/vtrnqu16_1.c: New file. + * gcc.target/aarch64/simd/vtrnqu16.x: New file. + * gcc.target/aarch64/simd/vtrnqu32_1.c: New file. + * gcc.target/aarch64/simd/vtrnqu32.x: New file. + * gcc.target/aarch64/simd/vtrnqu8_1.c: New file. + * gcc.target/aarch64/simd/vtrnqu8.x: New file. + * gcc.target/aarch64/simd/vtrns16_1.c: New file. + * gcc.target/aarch64/simd/vtrns16.x: New file. + * gcc.target/aarch64/simd/vtrns32_1.c: New file. + * gcc.target/aarch64/simd/vtrns32.x: New file. + * gcc.target/aarch64/simd/vtrns8_1.c: New file. + * gcc.target/aarch64/simd/vtrns8.x: New file. + * gcc.target/aarch64/simd/vtrnu16_1.c: New file. + * gcc.target/aarch64/simd/vtrnu16.x: New file. + * gcc.target/aarch64/simd/vtrnu32_1.c: New file. + * gcc.target/aarch64/simd/vtrnu32.x: New file. + * gcc.target/aarch64/simd/vtrnu8_1.c: New file. + * gcc.target/aarch64/simd/vtrnu8.x: New file. + +2014-07-16 Yvan Roux + + Backport from trunk r209794, 209858. + 2014-04-25 Marek Polacek + + PR c/60114 + * gcc.dg/pr60114.c: New test. + + 2014-04-28 Kyrylo Tkachov + + PR c/60983 + * gcc.dg/pr60114.c: Use signed chars. + +2014-07-16 Yvan Roux + + Backport from trunk r210861. + 2014-05-23 Jiong Wang + + * gcc.target/aarch64/tail_indirect_call_1.c: New. + +2014-07-16 Yvan Roux + + Backport from trunk r211314. + 2014-06-06 James Greenhalgh + + * gcc.dg/tree-ssa/pr42585.c: Skip for AArch64. + * gcc.dg/tree-ssa/sra-12.c: Likewise. + +2014-07-16 Yvan Roux + + Backport from trunk r210967. + 2014-05-27 Kyrylo Tkachov + + * lib/target-supports.exp (check_effective_target_vect_bswap): + Specify arm*-*-* support. + +2014-07-16 Yvan Roux + + Backport from trunk r210152, 211059. + 2014-05-29 Alan Lawrence + + * gcc.target/arm/simd/vextQf32_1.c: New file. + * gcc.target/arm/simd/vextQp16_1.c: New file. + * gcc.target/arm/simd/vextQp8_1.c: New file. + * gcc.target/arm/simd/vextQs16_1.c: New file. + * gcc.target/arm/simd/vextQs32_1.c: New file. + * gcc.target/arm/simd/vextQs64_1.c: New file. + * gcc.target/arm/simd/vextQs8_1.c: New file. + * gcc.target/arm/simd/vextQu16_1.c: New file. + * gcc.target/arm/simd/vextQu32_1.c: New file. + * gcc.target/arm/simd/vextQu64_1.c: New file. + * gcc.target/arm/simd/vextQu8_1.c: New file. + * gcc.target/arm/simd/vextQp64_1.c: New file. + * gcc.target/arm/simd/vextf32_1.c: New file. + * gcc.target/arm/simd/vextp16_1.c: New file. + * gcc.target/arm/simd/vextp8_1.c: New file. + * gcc.target/arm/simd/vexts16_1.c: New file. + * gcc.target/arm/simd/vexts32_1.c: New file. + * gcc.target/arm/simd/vexts64_1.c: New file. + * gcc.target/arm/simd/vexts8_1.c: New file. + * gcc.target/arm/simd/vextu16_1.c: New file. + * gcc.target/arm/simd/vextu32_1.c: New file. + * gcc.target/arm/simd/vextu64_1.c: New file. + * gcc.target/arm/simd/vextu8_1.c: New file. + * gcc.target/arm/simd/vextp64_1.c: New file. + + 2014-05-07 Alan Lawrence + + * gcc.target/aarch64/simd/ext_f32.x: New file. + * gcc.target/aarch64/simd/ext_f32_1.c: New file. + * gcc.target/aarch64/simd/ext_p16.x: New file. + * gcc.target/aarch64/simd/ext_p16_1.c: New file. + * gcc.target/aarch64/simd/ext_p8.x: New file. + * gcc.target/aarch64/simd/ext_p8_1.c: New file. + * gcc.target/aarch64/simd/ext_s16.x: New file. + * gcc.target/aarch64/simd/ext_s16_1.c: New file. + * gcc.target/aarch64/simd/ext_s32.x: New file. + * gcc.target/aarch64/simd/ext_s32_1.c: New file. + * gcc.target/aarch64/simd/ext_s64.x: New file. + * gcc.target/aarch64/simd/ext_s64_1.c: New file. + * gcc.target/aarch64/simd/ext_s8.x: New file. + * gcc.target/aarch64/simd/ext_s8_1.c: New file. + * gcc.target/aarch64/simd/ext_u16.x: New file. + * gcc.target/aarch64/simd/ext_u16_1.c: New file. + * gcc.target/aarch64/simd/ext_u32.x: New file. + * gcc.target/aarch64/simd/ext_u32_1.c: New file. + * gcc.target/aarch64/simd/ext_u64.x: New file. + * gcc.target/aarch64/simd/ext_u64_1.c: New file. + * gcc.target/aarch64/simd/ext_u8.x: New file. + * gcc.target/aarch64/simd/ext_u8_1.c: New file. + * gcc.target/aarch64/simd/ext_f64.c: New file. + * gcc.target/aarch64/simd/extq_f32.x: New file. + * gcc.target/aarch64/simd/extq_f32_1.c: New file. + * gcc.target/aarch64/simd/extq_p16.x: New file. + * gcc.target/aarch64/simd/extq_p16_1.c: New file. + * gcc.target/aarch64/simd/extq_p8.x: New file. + * gcc.target/aarch64/simd/extq_p8_1.c: New file. + * gcc.target/aarch64/simd/extq_s16.x: New file. + * gcc.target/aarch64/simd/extq_s16_1.c: New file. + * gcc.target/aarch64/simd/extq_s32.x: New file. + * gcc.target/aarch64/simd/extq_s32_1.c: New file. + * gcc.target/aarch64/simd/extq_s64.x: New file. + * gcc.target/aarch64/simd/extq_s64_1.c: New file. + * gcc.target/aarch64/simd/extq_s8.x: New file. + * gcc.target/aarch64/simd/extq_s8_1.c: New file. + * gcc.target/aarch64/simd/extq_u16.x: New file. + * gcc.target/aarch64/simd/extq_u16_1.c: New file. + * gcc.target/aarch64/simd/extq_u32.x: New file. + +2014-07-16 Yvan Roux + + Backport from trunk r209940, r209943, r209947. + 2014-04-30 Alan Lawrence + + * gcc.target/arm/simd/vuzpqf32_1.c: New file. + * gcc.target/arm/simd/vuzpqp16_1.c: New file. + * gcc.target/arm/simd/vuzpqp8_1.c: New file. + * gcc.target/arm/simd/vuzpqs16_1.c: New file. + * gcc.target/arm/simd/vuzpqs32_1.c: New file. + * gcc.target/arm/simd/vuzpqs8_1.c: New file. + * gcc.target/arm/simd/vuzpqu16_1.c: New file. + * gcc.target/arm/simd/vuzpqu32_1.c: New file. + * gcc.target/arm/simd/vuzpqu8_1.c: New file. + * gcc.target/arm/simd/vuzpf32_1.c: New file. + * gcc.target/arm/simd/vuzpp16_1.c: New file. + * gcc.target/arm/simd/vuzpp8_1.c: New file. + * gcc.target/arm/simd/vuzps16_1.c: New file. + * gcc.target/arm/simd/vuzps32_1.c: New file. + * gcc.target/arm/simd/vuzps8_1.c: New file. + * gcc.target/arm/simd/vuzpu16_1.c: New file. + * gcc.target/arm/simd/vuzpu32_1.c: New file. + * gcc.target/arm/simd/vuzpu8_1.c: New file. + + 2014-04-30 Alan Lawrence + + * gcc.target/aarch64/vuzps32_1.c: Expect zip1/2 insn rather than uzp1/2. + * gcc.target/aarch64/vuzpu32_1.c: Likewise. + * gcc.target/aarch64/vuzpf32_1.c: Likewise. + + 2014-04-30 Alan Lawrence + + * gcc.target/aarch64/simd/vuzpf32_1.c: New file. + * gcc.target/aarch64/simd/vuzpf32.x: New file. + * gcc.target/aarch64/simd/vuzpp16_1.c: New file. + * gcc.target/aarch64/simd/vuzpp16.x: New file. + * gcc.target/aarch64/simd/vuzpp8_1.c: New file. + * gcc.target/aarch64/simd/vuzpp8.x: New file. + * gcc.target/aarch64/simd/vuzpqf32_1.c: New file. + * gcc.target/aarch64/simd/vuzpqf32.x: New file. + * gcc.target/aarch64/simd/vuzpqp16_1.c: New file. + * gcc.target/aarch64/simd/vuzpqp16.x: New file. + * gcc.target/aarch64/simd/vuzpqp8_1.c: New file. + * gcc.target/aarch64/simd/vuzpqp8.x: New file. + * gcc.target/aarch64/simd/vuzpqs16_1.c: New file. + * gcc.target/aarch64/simd/vuzpqs16.x: New file. + * gcc.target/aarch64/simd/vuzpqs32_1.c: New file. + * gcc.target/aarch64/simd/vuzpqs32.x: New file. + * gcc.target/aarch64/simd/vuzpqs8_1.c: New file. + * gcc.target/aarch64/simd/vuzpqs8.x: New file. + * gcc.target/aarch64/simd/vuzpqu16_1.c: New file. + * gcc.target/aarch64/simd/vuzpqu16.x: New file. + * gcc.target/aarch64/simd/vuzpqu32_1.c: New file. + * gcc.target/aarch64/simd/vuzpqu32.x: New file. + * gcc.target/aarch64/simd/vuzpqu8_1.c: New file. + * gcc.target/aarch64/simd/vuzpqu8.x: New file. + * gcc.target/aarch64/simd/vuzps16_1.c: New file. + * gcc.target/aarch64/simd/vuzps16.x: New file. + * gcc.target/aarch64/simd/vuzps32_1.c: New file. + * gcc.target/aarch64/simd/vuzps32.x: New file. + * gcc.target/aarch64/simd/vuzps8_1.c: New file. + * gcc.target/aarch64/simd/vuzps8.x: New file. + * gcc.target/aarch64/simd/vuzpu16_1.c: New file. + * gcc.target/aarch64/simd/vuzpu16.x: New file. + * gcc.target/aarch64/simd/vuzpu32_1.c: New file. + * gcc.target/aarch64/simd/vuzpu32.x: New file. + * gcc.target/aarch64/simd/vuzpu8_1.c: New file. + * gcc.target/aarch64/simd/vuzpu8.x: New file. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-13 Yvan Roux + + Backport from trunk r211206. + 2014-06-03 Andrew Pinski + + * gcc.c-torture/compile/20140528-1.c: New testcase. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-25 Yvan Roux + + Backport from trunk r209908. + 2013-04-29 Alan Lawrence + + * gcc.target/arm/simd/simd.exp: New file. + * gcc.target/arm/simd/vzipqf32_1.c: New file. + * gcc.target/arm/simd/vzipqp16_1.c: New file. + * gcc.target/arm/simd/vzipqp8_1.c: New file. + * gcc.target/arm/simd/vzipqs16_1.c: New file. + * gcc.target/arm/simd/vzipqs32_1.c: New file. + * gcc.target/arm/simd/vzipqs8_1.c: New file. + * gcc.target/arm/simd/vzipqu16_1.c: New file. + * gcc.target/arm/simd/vzipqu32_1.c: New file. + * gcc.target/arm/simd/vzipqu8_1.c: New file. + * gcc.target/arm/simd/vzipf32_1.c: New file. + * gcc.target/arm/simd/vzipp16_1.c: New file. + * gcc.target/arm/simd/vzipp8_1.c: New file. + * gcc.target/arm/simd/vzips16_1.c: New file. + * gcc.target/arm/simd/vzips32_1.c: New file. + * gcc.target/arm/simd/vzips8_1.c: New file. + * gcc.target/arm/simd/vzipu16_1.c: New file. + * gcc.target/arm/simd/vzipu32_1.c: New file. + * gcc.target/arm/simd/vzipu8_1.c: New file. + +2014-05-25 Yvan Roux + + Backport from trunk r209893. + 2014-04-29 Alan Lawrence + + * gcc.target/aarch64/simd/simd.exp: New file. + * gcc.target/aarch64/simd/vzipf32_1.c: New file. + * gcc.target/aarch64/simd/vzipf32.x: New file. + * gcc.target/aarch64/simd/vzipp16_1.c: New file. + * gcc.target/aarch64/simd/vzipp16.x: New file. + * gcc.target/aarch64/simd/vzipp8_1.c: New file. + * gcc.target/aarch64/simd/vzipp8.x: New file. + * gcc.target/aarch64/simd/vzipqf32_1.c: New file. + * gcc.target/aarch64/simd/vzipqf32.x: New file. + * gcc.target/aarch64/simd/vzipqp16_1.c: New file. + * gcc.target/aarch64/simd/vzipqp16.x: New file. + * gcc.target/aarch64/simd/vzipqp8_1.c: New file. + * gcc.target/aarch64/simd/vzipqp8.x: New file. + * gcc.target/aarch64/simd/vzipqs16_1.c: New file. + * gcc.target/aarch64/simd/vzipqs16.x: New file. + * gcc.target/aarch64/simd/vzipqs32_1.c: New file. + * gcc.target/aarch64/simd/vzipqs32.x: New file. + * gcc.target/aarch64/simd/vzipqs8_1.c: New file. + * gcc.target/aarch64/simd/vzipqs8.x: New file. + * gcc.target/aarch64/simd/vzipqu16_1.c: New file. + * gcc.target/aarch64/simd/vzipqu16.x: New file. + * gcc.target/aarch64/simd/vzipqu32_1.c: New file. + * gcc.target/aarch64/simd/vzipqu32.x: New file. + * gcc.target/aarch64/simd/vzipqu8_1.c: New file. + * gcc.target/aarch64/simd/vzipqu8.x: New file. + * gcc.target/aarch64/simd/vzips16_1.c: New file. + * gcc.target/aarch64/simd/vzips16.x: New file. + * gcc.target/aarch64/simd/vzips32_1.c: New file. + * gcc.target/aarch64/simd/vzips32.x: New file. + * gcc.target/aarch64/simd/vzips8_1.c: New file. + * gcc.target/aarch64/simd/vzips8.x: New file. + * gcc.target/aarch64/simd/vzipu16_1.c: New file. + * gcc.target/aarch64/simd/vzipu16.x: New file. + * gcc.target/aarch64/simd/vzipu32_1.c: New file. + * gcc.target/aarch64/simd/vzipu32.x: New file. + * gcc.target/aarch64/simd/vzipu8_1.c: New file. + * gcc.target/aarch64/simd/vzipu8.x: New file. + +2014-05-25 Yvan Roux + + Backport from trunk r209808. + 2014-04-25 Jiong Wang + + * gcc.target/arm/tail-long-call.c: New test. + +2014-05-25 Yvan Roux + + Backport from trunk r209749. + 2014-04-24 Alan Lawrence + + * lib/target-supports.exp (check_effective_target_vect_perm): Return + true for aarch64_be. + +2014-05-23 Yvan Roux + + Backport from trunk r209736. + 2014-04-24 Kyrylo Tkachov + + * lib/target-supports.exp (check_effective_target_vect_bswap): New. + * gcc.dg/vect/vect-bswap16: New test. + * gcc.dg/vect/vect-bswap32: Likewise. + * gcc.dg/vect/vect-bswap64: Likewise. + +2014-05-23 Yvan Roux + + Backport from trunk r209713. + 2014-04-23 Alex Velenko + + * gcc.target/aarch64/vdup_lane_1.c: New testcase. + * gcc.target/aarch64/vdup_lane_2.c: New testcase. + * gcc.target/aarch64/vdup_n_1.c: New testcase. + +2014-05-23 Yvan Roux + + Backport from trunk r209704, 209705. + 2014-04-23 Kyrylo Tkachov + + * gcc.target/arm/rev16.c: New test. + + 2014-04-23 Kyrylo Tkachov + + * gcc.target/aarch64/rev16_1.c: New test. + +2014-05-23 Yvan Roux + + Backport from trunk r209642. + 2014-04-22 Alex Velenko + + * gcc.target/aarch64/vreinterpret_f64_1.c: New. + +2014-05-23 Yvan Roux + + Backport from trunk r209640. + 2014-04-22 Alex Velenko + + * gcc.target/aarch64/vqneg_s64_1.c: New testcase. + * gcc.target/aarch64/vqabs_s64_1.c: New testcase. + +2014-05-23 Yvan Roux + + Backport from trunk r209613, 209614. + 2014-04-22 Ian Bolton + + * gcc.target/arm/anddi_notdi-1.c: New test. + * gcc.target/arm/iordi_notdi-1.c: New test case. + + 2014-04-22 Ian Bolton + + * gcc.target/arm/iordi_notdi-1.c: New test. + +2014-05-23 Yvan Roux + + Backport from trunk r209559. + 2014-04-22 Alex Velenko + + * gcc.target/aarch64/vrnd_f64_1.c : New file. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-05-13 Yvan Roux + + Backport from trunk r209889. + 2014-04-29 Zhenqiang Chen + + * gcc.target/aarch64/fcsel_1.c: New test case. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/gcc/testsuite/gcc.c-torture/compile/20140528-1.c +++ b/src/gcc/testsuite/gcc.c-torture/compile/20140528-1.c @@ -0,0 +1,9 @@ +unsigned f(unsigned flags, unsigned capabilities) +{ + unsigned gfp_mask; + unsigned gfp_notmask = 0; + gfp_mask = flags & ((1 << 25) - 1); + if (!(capabilities & 0x00000001)) + gfp_mask |= 0x1000000u; + return (gfp_mask & ~gfp_notmask); +} --- a/src/gcc/testsuite/gcc.dg/tree-ssa/pr42585.c +++ b/src/gcc/testsuite/gcc.dg/tree-ssa/pr42585.c @@ -35,6 +35,6 @@ /* Whether the structs are totally scalarized or not depends on the MOVE_RATIO macro definition in the back end. The scalarization will not take place when using small values for MOVE_RATIO. */ -/* { dg-final { scan-tree-dump-times "struct _fat_ptr _ans" 0 "optimized" { target { ! "arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */ -/* { dg-final { scan-tree-dump-times "struct _fat_ptr _T2" 0 "optimized" { target { ! "arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */ +/* { dg-final { scan-tree-dump-times "struct _fat_ptr _ans" 0 "optimized" { target { ! "aarch64*-*-* arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */ +/* { dg-final { scan-tree-dump-times "struct _fat_ptr _T2" 0 "optimized" { target { ! "aarch64*-*-* arm*-*-* avr-*-* nds32*-*-* powerpc*-*-* s390*-*-* sh*-*-*" } } } } */ /* { dg-final { cleanup-tree-dump "optimized" } } */ --- a/src/gcc/testsuite/gcc.dg/tree-ssa/sra-12.c +++ b/src/gcc/testsuite/gcc.dg/tree-ssa/sra-12.c @@ -21,5 +21,5 @@ *p = l; } -/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" { target { ! "avr*-*-* nds32*-*-*" } } } } */ +/* { dg-final { scan-tree-dump-times "l;" 0 "release_ssa" { target { ! "aarch64*-*-* avr*-*-* nds32*-*-*" } } } } */ /* { dg-final { cleanup-tree-dump "release_ssa" } } */ --- a/src/gcc/testsuite/gcc.dg/pr60114.c +++ b/src/gcc/testsuite/gcc.dg/pr60114.c @@ -0,0 +1,31 @@ +/* PR c/60114 */ +/* { dg-do compile } */ +/* { dg-options "-Wconversion" } */ + +struct S { int n, u[2]; }; +const signed char z[] = { + [0] = 0x100, /* { dg-warning "9:overflow in implicit constant conversion" } */ + [2] = 0x101, /* { dg-warning "9:overflow in implicit constant conversion" } */ +}; +int A[] = { + 0, 0x80000000, /* { dg-warning "16:conversion of unsigned constant value to negative integer" } */ + 0xA, 0x80000000, /* { dg-warning "18:conversion of unsigned constant value to negative integer" } */ + 0xA, 0xA, 0x80000000 /* { dg-warning "23:conversion of unsigned constant value to negative integer" } */ + }; +int *p = (int []) { 0x80000000 }; /* { dg-warning "21:conversion of unsigned constant value to negative integer" } */ +union { int k; } u = { .k = 0x80000000 }; /* { dg-warning "29:conversion of unsigned constant value to negative integer" } */ +typedef int H[]; +void +foo (void) +{ + signed char a[][3] = { { 0x100, /* { dg-warning "28:overflow in implicit constant conversion" } */ + 1, 0x100 }, /* { dg-warning "24:overflow in implicit constant conversion" } */ + { '\0', 0x100, '\0' } /* { dg-warning "27:overflow in implicit constant conversion" } */ + }; + (const signed char []) { 0x100 }; /* { dg-warning "28:overflow in implicit constant conversion" } */ + (const float []) { 1e0, 1e1, 1e100 }; /* { dg-warning "32:conversion" } */ + struct S s1 = { 0x80000000 }; /* { dg-warning "19:conversion of unsigned constant value to negative integer" } */ + struct S s2 = { .n = 0x80000000 }; /* { dg-warning "24:conversion of unsigned constant value to negative integer" } */ + struct S s3 = { .u[1] = 0x80000000 }; /* { dg-warning "27:conversion of unsigned constant value to negative integer" } */ + H h = { 1, 2, 0x80000000 }; /* { dg-warning "17:conversion of unsigned constant value to negative integer" } */ +} --- a/src/gcc/testsuite/gcc.dg/vect/vect-bswap32.c +++ b/src/gcc/testsuite/gcc.dg/vect/vect-bswap32.c @@ -0,0 +1,44 @@ +/* { dg-require-effective-target vect_bswap } */ + +#include "tree-vect.h" + +#define N 128 + +volatile int y = 0; + +static inline void +vfoo32 (unsigned int* a) +{ + int i = 0; + for (i = 0; i < N; ++i) + a[i] = __builtin_bswap32 (a[i]); +} + +int +main (void) +{ + unsigned int arr[N]; + unsigned int expect[N]; + int i; + + for (i = 0; i < N; ++i) + { + arr[i] = i; + expect[i] = __builtin_bswap32 (i); + if (y) /* Avoid vectorisation. */ + abort (); + } + + vfoo32 (arr); + + for (i = 0; i < N; ++i) + { + if (arr[i] != expect[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ --- a/src/gcc/testsuite/gcc.dg/vect/vect-bswap16.c +++ b/src/gcc/testsuite/gcc.dg/vect/vect-bswap16.c @@ -0,0 +1,44 @@ +/* { dg-require-effective-target vect_bswap } */ + +#include "tree-vect.h" + +#define N 128 + +volatile int y = 0; + +static inline void +vfoo16 (unsigned short int* a) +{ + int i = 0; + for (i = 0; i < N; ++i) + a[i] = __builtin_bswap16 (a[i]); +} + +int +main (void) +{ + unsigned short arr[N]; + unsigned short expect[N]; + int i; + + for (i = 0; i < N; ++i) + { + arr[i] = i; + expect[i] = __builtin_bswap16 (i); + if (y) /* Avoid vectorisation. */ + abort (); + } + + vfoo16 (arr); + + for (i = 0; i < N; ++i) + { + if (arr[i] != expect[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ --- a/src/gcc/testsuite/gcc.dg/vect/vect-bswap64.c +++ b/src/gcc/testsuite/gcc.dg/vect/vect-bswap64.c @@ -0,0 +1,44 @@ +/* { dg-require-effective-target vect_bswap } */ + +#include "tree-vect.h" + +#define N 128 + +volatile int y = 0; + +static inline void +vfoo64 (unsigned long long* a) +{ + int i = 0; + for (i = 0; i < N; ++i) + a[i] = __builtin_bswap64 (a[i]); +} + +int +main (void) +{ + unsigned long long arr[N]; + unsigned long long expect[N]; + int i; + + for (i = 0; i < N; ++i) + { + arr[i] = i; + expect[i] = __builtin_bswap64 (i); + if (y) /* Avoid vectorisation. */ + abort (); + } + + vfoo64 (arr); + + for (i = 0; i < N; ++i) + { + if (arr[i] != expect[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ --- a/src/gcc/objcp/ChangeLog.linaro +++ b/src/gcc/objcp/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/gcc/cp/ChangeLog.linaro +++ b/src/gcc/cp/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/gcc/expr.c +++ b/src/gcc/expr.c @@ -68,22 +68,6 @@ #include "tree-ssa-address.h" #include "cfgexpand.h" -/* Decide whether a function's arguments should be processed - from first to last or from last to first. - - They should if the stack and args grow in opposite directions, but - only if we have push insns. */ - -#ifdef PUSH_ROUNDING - -#ifndef PUSH_ARGS_REVERSED -#if defined (STACK_GROWS_DOWNWARD) != defined (ARGS_GROW_DOWNWARD) -#define PUSH_ARGS_REVERSED /* If it's last to first. */ -#endif -#endif - -#endif - #ifndef STACK_PUSH_CODE #ifdef STACK_GROWS_DOWNWARD #define STACK_PUSH_CODE PRE_DEC @@ -4353,11 +4337,7 @@ /* Loop over all the words allocated on the stack for this arg. */ /* We can do it by words, because any scalar bigger than a word has a size a multiple of a word. */ -#ifndef PUSH_ARGS_REVERSED - for (i = not_stack; i < size; i++) -#else for (i = size - 1; i >= not_stack; i--) -#endif if (i >= not_stack + offset) emit_push_insn (operand_subword_force (x, i, mode), word_mode, NULL_TREE, NULL_RTX, align, 0, NULL_RTX, --- a/src/gcc/go/ChangeLog.linaro +++ b/src/gcc/go/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/gcc/genattrtab.c +++ b/src/gcc/genattrtab.c @@ -4765,6 +4765,7 @@ static struct bypass_list *all_bypasses; static size_t n_bypasses; +static size_t n_bypassed; static void gen_bypass_1 (const char *s, size_t len) @@ -4810,12 +4811,18 @@ struct bypass_list *b; struct insn_reserv *r; + n_bypassed = 0; + /* The reservation list is likely to be much longer than the bypass list. */ for (r = all_insn_reservs; r; r = r->next) for (b = all_bypasses; b; b = b->next) if (fnmatch (b->pattern, r->name, 0) == 0) - r->bypassed = true; + { + n_bypassed++; + r->bypassed = true; + break; + } } /* Check that attribute NAME is used in define_insn_reservation condition @@ -5074,7 +5081,7 @@ process_bypasses (); byps_exp = rtx_alloc (COND); - XVEC (byps_exp, 0) = rtvec_alloc (n_bypasses * 2); + XVEC (byps_exp, 0) = rtvec_alloc (n_bypassed * 2); XEXP (byps_exp, 1) = make_numeric_value (0); for (decl = all_insn_reservs, i = 0; decl; --- a/src/gcc/ada/ChangeLog.linaro +++ b/src/gcc/ada/ChangeLog.linaro @@ -0,0 +1,63 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-05-13 Yvan Roux + + Backport from trunk r209653,209866,209871. + + 2014-04-28 Richard Henderson + + * gcc-interface/Makefile.in: Support aarch64-linux. + + 2014-04-28 Eric Botcazou + + * exp_dbug.ads (Get_External_Name): Add 'False' default to Has_Suffix, + add 'Suffix' parameter and adjust comment. + (Get_External_Name_With_Suffix): Delete. + * exp_dbug.adb (Get_External_Name_With_Suffix): Merge into... + (Get_External_Name): ...here. Add 'False' default to Has_Suffix, add + 'Suffix' parameter. + (Get_Encoded_Name): Remove 2nd argument in call to Get_External_Name. + Call Get_External_Name instead of Get_External_Name_With_Suffix. + (Get_Secondary_DT_External_Name): Likewise. + * exp_cg.adb (Write_Call_Info): Likewise. + * exp_disp.adb (Export_DT): Likewise. + (Import_DT): Likewise. + * comperr.ads (Compiler_Abort): Remove Code parameter and add From_GCC + parameter with False default. + * comperr.adb (Compiler_Abort): Likewise. Adjust accordingly. + * types.h (Fat_Pointer): Rename into... + (String_Pointer): ...this. Add comment on interfacing rules. + * fe.h (Compiler_Abort): Adjust for above renaming. + (Error_Msg_N): Likewise. + (Error_Msg_NE): Likewise. + (Get_External_Name): Likewise. Add third parameter. + (Get_External_Name_With_Suffix): Delete. + * gcc-interface/decl.c (STDCALL_PREFIX): Define. + (create_concat_name): Adjust call to Get_External_Name, remove call to + Get_External_Name_With_Suffix, use STDCALL_PREFIX, adjust for renaming. + * gcc-interface/trans.c (post_error): Likewise. + (post_error_ne): Likewise. + * gcc-interface/misc.c (internal_error_function): Likewise. + + 2014-04-22 Richard Henderson + + * init.c [__linux__] (HAVE_GNAT_ALTERNATE_STACK): New define. + (__gnat_alternate_stack): Enable for all linux except ia64. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/gcc/fortran/ChangeLog.linaro +++ b/src/gcc/fortran/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/gcc/configure.ac +++ b/src/gcc/configure.ac @@ -809,7 +809,7 @@ ) AC_SUBST(CONFIGURE_SPECS) -ACX_PKGVERSION([GCC]) +ACX_PKGVERSION([Linaro GCC `cat $srcdir/LINARO-VERSION`]) ACX_BUGURL([http://gcc.gnu.org/bugs.html]) # Sanity check enable_languages in case someone does not run the toplevel --- a/src/gcc/calls.c +++ b/src/gcc/calls.c @@ -1104,8 +1104,6 @@ { CUMULATIVE_ARGS *args_so_far_pnt = get_cumulative_args (args_so_far); location_t loc = EXPR_LOCATION (exp); - /* 1 if scanning parms front to back, -1 if scanning back to front. */ - int inc; /* Count arg position in order args appear. */ int argpos; @@ -1116,22 +1114,9 @@ args_size->var = 0; /* In this loop, we consider args in the order they are written. - We fill up ARGS from the front or from the back if necessary - so that in any case the first arg to be pushed ends up at the front. */ + We fill up ARGS from the back. */ - if (PUSH_ARGS_REVERSED) - { - i = num_actuals - 1, inc = -1; - /* In this case, must reverse order of args - so that we compute and push the last arg first. */ - } - else - { - i = 0, inc = 1; - } - - /* First fill in the actual arguments in the ARGS array, splitting - complex arguments if necessary. */ + i = num_actuals - 1; { int j = i; call_expr_arg_iterator iter; @@ -1140,7 +1125,7 @@ if (struct_value_addr_value) { args[j].tree_value = struct_value_addr_value; - j += inc; + j--; } FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) { @@ -1152,17 +1137,17 @@ { tree subtype = TREE_TYPE (argtype); args[j].tree_value = build1 (REALPART_EXPR, subtype, arg); - j += inc; + j--; args[j].tree_value = build1 (IMAGPART_EXPR, subtype, arg); } else args[j].tree_value = arg; - j += inc; + j--; } } /* I counts args in order (to be) pushed; ARGPOS counts in order written. */ - for (argpos = 0; argpos < num_actuals; i += inc, argpos++) + for (argpos = 0; argpos < num_actuals; i--, argpos++) { tree type = TREE_TYPE (args[i].tree_value); int unsignedp; @@ -2952,9 +2937,8 @@ compute_argument_addresses (args, argblock, num_actuals); - /* If we push args individually in reverse order, perform stack alignment - before the first push (the last arg). */ - if (PUSH_ARGS_REVERSED && argblock == 0 + /* Perform stack alignment before the first push (the last arg). */ + if (argblock == 0 && adjusted_args_size.constant > reg_parm_stack_space && adjusted_args_size.constant != unadjusted_args_size) { @@ -3097,12 +3081,6 @@ sibcall_failure = 1; } - /* If we pushed args in forward order, perform stack alignment - after pushing the last arg. */ - if (!PUSH_ARGS_REVERSED && argblock == 0) - anti_adjust_stack (GEN_INT (adjusted_args_size.constant - - unadjusted_args_size)); - /* If register arguments require space on the stack and stack space was not preallocated, allocate stack space here for arguments passed in registers. */ @@ -3152,8 +3130,7 @@ if (pass == 1 && (return_flags & ERF_RETURNS_ARG)) { int arg_nr = return_flags & ERF_RETURN_ARG_MASK; - if (PUSH_ARGS_REVERSED) - arg_nr = num_actuals - arg_nr - 1; + arg_nr = num_actuals - arg_nr - 1; if (arg_nr >= 0 && arg_nr < num_actuals && args[arg_nr].reg @@ -3597,7 +3574,6 @@ isn't present here, so we default to native calling abi here. */ tree fndecl ATTRIBUTE_UNUSED = NULL_TREE; /* library calls default to host calling abi ? */ tree fntype ATTRIBUTE_UNUSED = NULL_TREE; /* library calls default to host calling abi ? */ - int inc; int count; rtx argblock = 0; CUMULATIVE_ARGS args_so_far_v; @@ -3946,22 +3922,13 @@ argblock = push_block (GEN_INT (args_size.constant), 0, 0); } - /* If we push args individually in reverse order, perform stack alignment + /* We push args individually in reverse order, perform stack alignment before the first push (the last arg). */ - if (argblock == 0 && PUSH_ARGS_REVERSED) + if (argblock == 0) anti_adjust_stack (GEN_INT (args_size.constant - original_args_size.constant)); - if (PUSH_ARGS_REVERSED) - { - inc = -1; - argnum = nargs - 1; - } - else - { - inc = 1; - argnum = 0; - } + argnum = nargs - 1; #ifdef REG_PARM_STACK_SPACE if (ACCUMULATE_OUTGOING_ARGS) @@ -3978,7 +3945,7 @@ /* ARGNUM indexes the ARGVEC array in the order in which the arguments are to be pushed. */ - for (count = 0; count < nargs; count++, argnum += inc) + for (count = 0; count < nargs; count++, argnum--) { enum machine_mode mode = argvec[argnum].mode; rtx val = argvec[argnum].value; @@ -4080,17 +4047,8 @@ } } - /* If we pushed args in forward order, perform stack alignment - after pushing the last arg. */ - if (argblock == 0 && !PUSH_ARGS_REVERSED) - anti_adjust_stack (GEN_INT (args_size.constant - - original_args_size.constant)); + argnum = nargs - 1; - if (PUSH_ARGS_REVERSED) - argnum = nargs - 1; - else - argnum = 0; - fun = prepare_call_address (NULL, fun, NULL, &call_fusage, 0, 0); /* Now load any reg parms into their regs. */ @@ -4097,7 +4055,7 @@ /* ARGNUM indexes the ARGVEC array in the order in which the arguments are to be pushed. */ - for (count = 0; count < nargs; count++, argnum += inc) + for (count = 0; count < nargs; count++, argnum--) { enum machine_mode mode = argvec[argnum].mode; rtx val = argvec[argnum].value; --- a/src/gcc/lto/ChangeLog.linaro +++ b/src/gcc/lto/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/gcc/po/ChangeLog.linaro +++ b/src/gcc/po/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/gcc/config.gcc +++ b/src/gcc/config.gcc @@ -311,8 +311,7 @@ ;; aarch64*-*-*) cpu_type=aarch64 - need_64bit_hwint=yes - extra_headers="arm_neon.h" + extra_headers="arm_neon.h arm_acle.h" extra_objs="aarch64-builtins.o aarch-common.o" target_has_targetm_common=yes ;; --- a/src/gcc/Makefile.in +++ b/src/gcc/Makefile.in @@ -2798,7 +2798,7 @@ contribute.texi compat.texi funding.texi gnu.texi gpl_v3.texi \ fdl.texi contrib.texi cppenv.texi cppopts.texi avr-mmcu.texi \ implement-c.texi implement-cxx.texi arm-neon-intrinsics.texi \ - arm-acle-intrinsics.texi + arm-acle-intrinsics.texi aarch64-acle-intrinsics.texi # we explicitly use $(srcdir)/doc/tm.texi here to avoid confusion with # the generated tm.texi; the latter might have a more recent timestamp, --- a/src/gcc/config/host-linux.c +++ b/src/gcc/config/host-linux.c @@ -86,6 +86,8 @@ # define TRY_EMPTY_VM_SPACE 0x60000000 #elif defined(__mc68000__) # define TRY_EMPTY_VM_SPACE 0x40000000 +#elif defined(__aarch64__) && defined(__ILP32__) +# define TRY_EMPTY_VM_SPACE 0x60000000 #elif defined(__aarch64__) # define TRY_EMPTY_VM_SPACE 0x1000000000 #elif defined(__ARM_EABI__) --- a/src/gcc/config/aarch64/aarch64-simd.md +++ b/src/gcc/config/aarch64/aarch64-simd.md @@ -19,8 +19,8 @@ ;; . (define_expand "mov" - [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "") - (match_operand:VALL 1 "aarch64_simd_general_operand" ""))] + [(set (match_operand:VALL 0 "nonimmediate_operand" "") + (match_operand:VALL 1 "general_operand" ""))] "TARGET_SIMD" " if (GET_CODE (operands[0]) == MEM) @@ -29,8 +29,8 @@ ) (define_expand "movmisalign" - [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "") - (match_operand:VALL 1 "aarch64_simd_general_operand" ""))] + [(set (match_operand:VALL 0 "nonimmediate_operand" "") + (match_operand:VALL 1 "general_operand" ""))] "TARGET_SIMD" { /* This pattern is not permitted to fail during expansion: if both arguments @@ -91,9 +91,9 @@ ) (define_insn "*aarch64_simd_mov" - [(set (match_operand:VD 0 "aarch64_simd_nonimmediate_operand" + [(set (match_operand:VD 0 "nonimmediate_operand" "=w, m, w, ?r, ?w, ?r, w") - (match_operand:VD 1 "aarch64_simd_general_operand" + (match_operand:VD 1 "general_operand" "m, w, w, w, r, r, Dn"))] "TARGET_SIMD && (register_operand (operands[0], mode) @@ -119,9 +119,9 @@ ) (define_insn "*aarch64_simd_mov" - [(set (match_operand:VQ 0 "aarch64_simd_nonimmediate_operand" + [(set (match_operand:VQ 0 "nonimmediate_operand" "=w, m, w, ?r, ?w, ?r, w") - (match_operand:VQ 1 "aarch64_simd_general_operand" + (match_operand:VQ 1 "general_operand" "m, w, w, w, r, r, Dn"))] "TARGET_SIMD && (register_operand (operands[0], mode) @@ -286,6 +286,14 @@ [(set_attr "type" "neon_mul_")] ) +(define_insn "bswap" + [(set (match_operand:VDQHSD 0 "register_operand" "=w") + (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] + "TARGET_SIMD" + "rev\\t%0., %1." + [(set_attr "type" "neon_rev")] +) + (define_insn "*aarch64_mul3_elt" [(set (match_operand:VMUL 0 "register_operand" "=w") (mult:VMUL @@ -954,7 +962,7 @@ dup\\t%d0, %1.d[0] fmov\\t%d0, %1 dup\\t%d0, %1" - [(set_attr "type" "neon_dup,fmov,neon_dup") + [(set_attr "type" "neon_dup,f_mcr,neon_dup") (set_attr "simd" "yes,*,yes") (set_attr "fp" "*,yes,*") (set_attr "length" "4")] @@ -1509,7 +1517,7 @@ ) ;; Vector versions of the floating-point frint patterns. -;; Expands to btrunc, ceil, floor, nearbyint, rint, round. +;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. (define_insn "2" [(set (match_operand:VDQF 0 "register_operand" "=w") (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")] @@ -2316,6 +2324,15 @@ DONE; }) +(define_expand "aarch64_reinterpretdf" + [(match_operand:DF 0 "register_operand" "") + (match_operand:VD_RE 1 "register_operand" "")] + "TARGET_SIMD" +{ + aarch64_simd_reinterpret (operands[0], operands[1]); + DONE; +}) + (define_expand "aarch64_reinterpretv16qi" [(match_operand:V16QI 0 "register_operand" "") (match_operand:VQ 1 "register_operand" "")] @@ -2702,9 +2719,9 @@ ;; q (define_insn "aarch64_s" - [(set (match_operand:VSDQ_I_BHSI 0 "register_operand" "=w") - (UNQOPS:VSDQ_I_BHSI - (match_operand:VSDQ_I_BHSI 1 "register_operand" "w")))] + [(set (match_operand:VSDQ_I 0 "register_operand" "=w") + (UNQOPS:VSDQ_I + (match_operand:VSDQ_I 1 "register_operand" "w")))] "TARGET_SIMD" "s\\t%0, %1" [(set_attr "type" "neon_")] @@ -3756,26 +3773,46 @@ ))) (clobber (reg:CC CC_REGNUM))] "TARGET_SIMD" - "@ - cm\t%d0, %d, %d - cm\t%d0, %d1, #0 - #" - "reload_completed - /* We need to prevent the split from - happening in the 'w' constraint cases. */ - && GP_REGNUM_P (REGNO (operands[0])) - && GP_REGNUM_P (REGNO (operands[1]))" - [(const_int 0)] + "#" + "reload_completed" + [(set (match_operand:DI 0 "register_operand") + (neg:DI + (COMPARISONS:DI + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "aarch64_simd_reg_or_zero") + )))] { - enum machine_mode mode = SELECT_CC_MODE (, operands[1], operands[2]); - rtx cc_reg = aarch64_gen_compare_reg (, operands[1], operands[2]); - rtx comparison = gen_rtx_ (mode, operands[1], operands[2]); - emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); - DONE; + /* If we are in the general purpose register file, + we split to a sequence of comparison and store. */ + if (GP_REGNUM_P (REGNO (operands[0])) + && GP_REGNUM_P (REGNO (operands[1]))) + { + enum machine_mode mode = SELECT_CC_MODE (, operands[1], operands[2]); + rtx cc_reg = aarch64_gen_compare_reg (, operands[1], operands[2]); + rtx comparison = gen_rtx_ (mode, operands[1], operands[2]); + emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); + DONE; + } + /* Otherwise, we expand to a similar pattern which does not + clobber CC_REGNUM. */ } [(set_attr "type" "neon_compare, neon_compare_zero, multiple")] ) +(define_insn "*aarch64_cmdi" + [(set (match_operand:DI 0 "register_operand" "=w,w") + (neg:DI + (COMPARISONS:DI + (match_operand:DI 1 "register_operand" "w,w") + (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz") + )))] + "TARGET_SIMD && reload_completed" + "@ + cm\t%d0, %d, %d + cm\t%d0, %d1, #0" + [(set_attr "type" "neon_compare, neon_compare_zero")] +) + ;; cm(hs|hi) (define_insn "aarch64_cm" @@ -3799,25 +3836,44 @@ ))) (clobber (reg:CC CC_REGNUM))] "TARGET_SIMD" - "@ - cm\t%d0, %d, %d - #" - "reload_completed - /* We need to prevent the split from - happening in the 'w' constraint cases. */ - && GP_REGNUM_P (REGNO (operands[0])) - && GP_REGNUM_P (REGNO (operands[1]))" - [(const_int 0)] + "#" + "reload_completed" + [(set (match_operand:DI 0 "register_operand") + (neg:DI + (UCOMPARISONS:DI + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "aarch64_simd_reg_or_zero") + )))] { - enum machine_mode mode = CCmode; - rtx cc_reg = aarch64_gen_compare_reg (, operands[1], operands[2]); - rtx comparison = gen_rtx_ (mode, operands[1], operands[2]); - emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); - DONE; + /* If we are in the general purpose register file, + we split to a sequence of comparison and store. */ + if (GP_REGNUM_P (REGNO (operands[0])) + && GP_REGNUM_P (REGNO (operands[1]))) + { + enum machine_mode mode = CCmode; + rtx cc_reg = aarch64_gen_compare_reg (, operands[1], operands[2]); + rtx comparison = gen_rtx_ (mode, operands[1], operands[2]); + emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); + DONE; + } + /* Otherwise, we expand to a similar pattern which does not + clobber CC_REGNUM. */ } - [(set_attr "type" "neon_compare, neon_compare_zero")] + [(set_attr "type" "neon_compare,multiple")] ) +(define_insn "*aarch64_cmdi" + [(set (match_operand:DI 0 "register_operand" "=w") + (neg:DI + (UCOMPARISONS:DI + (match_operand:DI 1 "register_operand" "w") + (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w") + )))] + "TARGET_SIMD && reload_completed" + "cm\t%d0, %d, %d" + [(set_attr "type" "neon_compare")] +) + ;; cmtst (define_insn "aarch64_cmtst" @@ -3843,23 +3899,44 @@ (const_int 0)))) (clobber (reg:CC CC_REGNUM))] "TARGET_SIMD" - "@ - cmtst\t%d0, %d1, %d2 - #" - "reload_completed - /* We need to prevent the split from - happening in the 'w' constraint cases. */ - && GP_REGNUM_P (REGNO (operands[0])) - && GP_REGNUM_P (REGNO (operands[1]))" - [(const_int 0)] + "#" + "reload_completed" + [(set (match_operand:DI 0 "register_operand") + (neg:DI + (ne:DI + (and:DI + (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "register_operand")) + (const_int 0))))] { - rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]); - enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx); - rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx); - rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx); - emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); - DONE; + /* If we are in the general purpose register file, + we split to a sequence of comparison and store. */ + if (GP_REGNUM_P (REGNO (operands[0])) + && GP_REGNUM_P (REGNO (operands[1]))) + { + rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]); + enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx); + rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx); + rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx); + emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); + DONE; + } + /* Otherwise, we expand to a similar pattern which does not + clobber CC_REGNUM. */ } + [(set_attr "type" "neon_tst,multiple")] +) + +(define_insn "*aarch64_cmtstdi" + [(set (match_operand:DI 0 "register_operand" "=w") + (neg:DI + (ne:DI + (and:DI + (match_operand:DI 1 "register_operand" "w") + (match_operand:DI 2 "register_operand" "w")) + (const_int 0))))] + "TARGET_SIMD" + "cmtst\t%d0, %d1, %d2" [(set_attr "type" "neon_tst")] ) @@ -3950,6 +4027,17 @@ [(set_attr "type" "neon_store2_2reg")] ) +(define_insn "vec_store_lanesoi_lane" + [(set (match_operand: 0 "aarch64_simd_struct_operand" "=Utv") + (unspec: [(match_operand:OI 1 "register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_ST2_LANE))] + "TARGET_SIMD" + "st2\\t{%S1. - %T1.}[%2], %0" + [(set_attr "type" "neon_store3_one_lane")] +) + (define_insn "vec_load_lanesci" [(set (match_operand:CI 0 "register_operand" "=w") (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") @@ -3970,6 +4058,17 @@ [(set_attr "type" "neon_store3_3reg")] ) +(define_insn "vec_store_lanesci_lane" + [(set (match_operand: 0 "aarch64_simd_struct_operand" "=Utv") + (unspec: [(match_operand:CI 1 "register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_ST3_LANE))] + "TARGET_SIMD" + "st3\\t{%S1. - %U1.}[%2], %0" + [(set_attr "type" "neon_store3_one_lane")] +) + (define_insn "vec_load_lanesxi" [(set (match_operand:XI 0 "register_operand" "=w") (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") @@ -3990,6 +4089,17 @@ [(set_attr "type" "neon_store4_4reg")] ) +(define_insn "vec_store_lanesxi_lane" + [(set (match_operand: 0 "aarch64_simd_struct_operand" "=Utv") + (unspec: [(match_operand:XI 1 "register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) + (match_operand:SI 2 "immediate_operand" "i")] + UNSPEC_ST4_LANE))] + "TARGET_SIMD" + "st4\\t{%S1. - %V1.}[%2], %0" + [(set_attr "type" "neon_store4_one_lane")] +) + ;; Reload patterns for AdvSIMD register list operands. (define_expand "mov" @@ -4398,6 +4508,44 @@ [(set_attr "type" "neon_permute")] ) +;; Note immediate (third) operand is lane index not byte index. +(define_insn "aarch64_ext" + [(set (match_operand:VALL 0 "register_operand" "=w") + (unspec:VALL [(match_operand:VALL 1 "register_operand" "w") + (match_operand:VALL 2 "register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_EXT))] + "TARGET_SIMD" +{ + operands[3] = GEN_INT (INTVAL (operands[3]) + * GET_MODE_SIZE (GET_MODE_INNER (mode))); + return "ext\\t%0., %1., %2., #%3"; +} + [(set_attr "type" "neon_ext")] +) + +;; This exists solely to check the arguments to the corresponding __builtin. +;; Used where we want an error for out-of-range indices which would otherwise +;; be silently wrapped (e.g. the mask to a __builtin_shuffle). +(define_expand "aarch64_im_lane_boundsi" + [(match_operand:SI 0 "immediate_operand" "i") + (match_operand:SI 1 "immediate_operand" "i")] + "TARGET_SIMD" +{ + aarch64_simd_lane_bounds (operands[0], 0, INTVAL (operands[1])); + DONE; +} +) + +(define_insn "aarch64_rev" + [(set (match_operand:VALL 0 "register_operand" "=w") + (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")] + REVERSE))] + "TARGET_SIMD" + "rev\\t%0., %1." + [(set_attr "type" "neon_rev")] +) + (define_insn "aarch64_st2_dreg" [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv") (unspec:TI [(match_operand:OI 1 "register_operand" "w") @@ -4484,6 +4632,57 @@ DONE; }) +(define_expand "aarch64_st2_lane" + [(match_operand:DI 0 "register_operand" "r") + (match_operand:OI 1 "register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) + (match_operand:SI 2 "immediate_operand")] + "TARGET_SIMD" +{ + enum machine_mode mode = mode; + rtx mem = gen_rtx_MEM (mode, operands[0]); + operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); + + emit_insn (gen_vec_store_lanesoi_lane (mem, + operands[1], + operands[2])); + DONE; +}) + +(define_expand "aarch64_st3_lane" + [(match_operand:DI 0 "register_operand" "r") + (match_operand:CI 1 "register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) + (match_operand:SI 2 "immediate_operand")] + "TARGET_SIMD" +{ + enum machine_mode mode = mode; + rtx mem = gen_rtx_MEM (mode, operands[0]); + operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); + + emit_insn (gen_vec_store_lanesci_lane (mem, + operands[1], + operands[2])); + DONE; +}) + +(define_expand "aarch64_st4_lane" + [(match_operand:DI 0 "register_operand" "r") + (match_operand:XI 1 "register_operand" "w") + (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) + (match_operand:SI 2 "immediate_operand")] + "TARGET_SIMD" +{ + enum machine_mode mode = mode; + rtx mem = gen_rtx_MEM (mode, operands[0]); + operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); + + emit_insn (gen_vec_store_lanesxi_lane (mem, + operands[1], + operands[2])); + DONE; +}) + (define_expand "aarch64_st1" [(match_operand:DI 0 "register_operand") (match_operand:VALL 1 "register_operand")] --- a/src/gcc/config/aarch64/predicates.md +++ b/src/gcc/config/aarch64/predicates.md @@ -26,6 +26,10 @@ && GET_MODE_CLASS (GET_MODE (op)) == MODE_CC")))) ) +(define_predicate "aarch64_call_insn_operand" + (ior (match_code "symbol_ref") + (match_operand 0 "register_operand"))) + (define_predicate "aarch64_simd_register" (and (match_code "reg") (ior (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_LO_REGS") --- a/src/gcc/config/aarch64/arm_neon.h +++ b/src/gcc/config/aarch64/arm_neon.h @@ -2119,29 +2119,26 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vqadd_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_uqaddv8qi ((int8x8_t) __a, - (int8x8_t) __b); + return __builtin_aarch64_uqaddv8qi_uuu (__a, __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vqadd_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t) __builtin_aarch64_uqaddv4hi ((int16x4_t) __a, - (int16x4_t) __b); + return __builtin_aarch64_uqaddv4hi_uuu (__a, __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vqadd_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_uqaddv2si ((int32x2_t) __a, - (int32x2_t) __b); + return __builtin_aarch64_uqaddv2si_uuu (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vqadd_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_uqadddi ((int64x1_t) __a, - (int64x1_t) __b); + return (uint64x1_t) __builtin_aarch64_uqadddi_uuu ((uint64_t) __a, + (uint64_t) __b); } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) @@ -2171,29 +2168,25 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vqaddq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_uqaddv16qi ((int8x16_t) __a, - (int8x16_t) __b); + return __builtin_aarch64_uqaddv16qi_uuu (__a, __b); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vqaddq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t) __builtin_aarch64_uqaddv8hi ((int16x8_t) __a, - (int16x8_t) __b); + return __builtin_aarch64_uqaddv8hi_uuu (__a, __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vqaddq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_uqaddv4si ((int32x4_t) __a, - (int32x4_t) __b); + return __builtin_aarch64_uqaddv4si_uuu (__a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vqaddq_u64 (uint64x2_t __a, uint64x2_t __b) { - return (uint64x2_t) __builtin_aarch64_uqaddv2di ((int64x2_t) __a, - (int64x2_t) __b); + return __builtin_aarch64_uqaddv2di_uuu (__a, __b); } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) @@ -2223,29 +2216,26 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vqsub_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_uqsubv8qi ((int8x8_t) __a, - (int8x8_t) __b); + return __builtin_aarch64_uqsubv8qi_uuu (__a, __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vqsub_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t) __builtin_aarch64_uqsubv4hi ((int16x4_t) __a, - (int16x4_t) __b); + return __builtin_aarch64_uqsubv4hi_uuu (__a, __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vqsub_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_uqsubv2si ((int32x2_t) __a, - (int32x2_t) __b); + return __builtin_aarch64_uqsubv2si_uuu (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vqsub_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_uqsubdi ((int64x1_t) __a, - (int64x1_t) __b); + return (uint64x1_t) __builtin_aarch64_uqsubdi_uuu ((uint64_t) __a, + (uint64_t) __b); } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) @@ -2275,29 +2265,25 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vqsubq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_uqsubv16qi ((int8x16_t) __a, - (int8x16_t) __b); + return __builtin_aarch64_uqsubv16qi_uuu (__a, __b); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vqsubq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t) __builtin_aarch64_uqsubv8hi ((int16x8_t) __a, - (int16x8_t) __b); + return __builtin_aarch64_uqsubv8hi_uuu (__a, __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vqsubq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_uqsubv4si ((int32x4_t) __a, - (int32x4_t) __b); + return __builtin_aarch64_uqsubv4si_uuu (__a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vqsubq_u64 (uint64x2_t __a, uint64x2_t __b) { - return (uint64x2_t) __builtin_aarch64_uqsubv2di ((int64x2_t) __a, - (int64x2_t) __b); + return __builtin_aarch64_uqsubv2di_uuu (__a, __b); } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) @@ -2318,6 +2304,12 @@ return (int32x2_t) __builtin_aarch64_sqnegv2si (__a); } +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqneg_s64 (int64x1_t __a) +{ + return __builtin_aarch64_sqnegdi (__a); +} + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vqnegq_s8 (int8x16_t __a) { @@ -2354,6 +2346,12 @@ return (int32x2_t) __builtin_aarch64_sqabsv2si (__a); } +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vqabs_s64 (int64x1_t __a) +{ + return __builtin_aarch64_sqabsdi (__a); +} + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vqabsq_s8 (int8x16_t __a) { @@ -2643,1352 +2641,1587 @@ /* vreinterpret */ __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vreinterpret_p8_f64 (float64x1_t __a) +{ + return __builtin_aarch64_reinterpretv8qidf_ps (__a); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vreinterpret_p8_s8 (int8x8_t __a) { - return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a); + return (poly8x8_t) __a; } __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vreinterpret_p8_s16 (int16x4_t __a) { - return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a); + return (poly8x8_t) __a; } __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vreinterpret_p8_s32 (int32x2_t __a) { - return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a); + return (poly8x8_t) __a; } __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vreinterpret_p8_s64 (int64x1_t __a) { - return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi (__a); + return (poly8x8_t) __a; } __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vreinterpret_p8_f32 (float32x2_t __a) { - return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a); + return (poly8x8_t) __a; } __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vreinterpret_p8_u8 (uint8x8_t __a) { - return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); + return (poly8x8_t) __a; } __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vreinterpret_p8_u16 (uint16x4_t __a) { - return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); + return (poly8x8_t) __a; } __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vreinterpret_p8_u32 (uint32x2_t __a) { - return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a); + return (poly8x8_t) __a; } __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vreinterpret_p8_u64 (uint64x1_t __a) { - return (poly8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a); + return (poly8x8_t) __a; } __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vreinterpret_p8_p16 (poly16x4_t __a) { - return (poly8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); + return (poly8x8_t) __a; } __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_f64 (float64x2_t __a) +{ + return (poly8x16_t) __a; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) vreinterpretq_p8_s8 (int8x16_t __a) { - return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a); + return (poly8x16_t) __a; } __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) vreinterpretq_p8_s16 (int16x8_t __a) { - return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a); + return (poly8x16_t) __a; } __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) vreinterpretq_p8_s32 (int32x4_t __a) { - return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a); + return (poly8x16_t) __a; } __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) vreinterpretq_p8_s64 (int64x2_t __a) { - return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a); + return (poly8x16_t) __a; } __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) vreinterpretq_p8_f32 (float32x4_t __a) { - return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a); + return (poly8x16_t) __a; } __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) vreinterpretq_p8_u8 (uint8x16_t __a) { - return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) - __a); + return (poly8x16_t) __a; } __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) vreinterpretq_p8_u16 (uint16x8_t __a) { - return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) - __a); + return (poly8x16_t) __a; } __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) vreinterpretq_p8_u32 (uint32x4_t __a) { - return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) - __a); + return (poly8x16_t) __a; } __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) vreinterpretq_p8_u64 (uint64x2_t __a) { - return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) - __a); + return (poly8x16_t) __a; } __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) vreinterpretq_p8_p16 (poly16x8_t __a) { - return (poly8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) - __a); + return (poly8x16_t) __a; } __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vreinterpret_p16_f64 (float64x1_t __a) +{ + return __builtin_aarch64_reinterpretv4hidf_ps (__a); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) vreinterpret_p16_s8 (int8x8_t __a) { - return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a); + return (poly16x4_t) __a; } __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) vreinterpret_p16_s16 (int16x4_t __a) { - return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a); + return (poly16x4_t) __a; } __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) vreinterpret_p16_s32 (int32x2_t __a) { - return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a); + return (poly16x4_t) __a; } __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) vreinterpret_p16_s64 (int64x1_t __a) { - return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi (__a); + return (poly16x4_t) __a; } __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) vreinterpret_p16_f32 (float32x2_t __a) { - return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a); + return (poly16x4_t) __a; } __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) vreinterpret_p16_u8 (uint8x8_t __a) { - return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); + return (poly16x4_t) __a; } __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) vreinterpret_p16_u16 (uint16x4_t __a) { - return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); + return (poly16x4_t) __a; } __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) vreinterpret_p16_u32 (uint32x2_t __a) { - return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a); + return (poly16x4_t) __a; } __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) vreinterpret_p16_u64 (uint64x1_t __a) { - return (poly16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a); + return (poly16x4_t) __a; } __extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) vreinterpret_p16_p8 (poly8x8_t __a) { - return (poly16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); + return (poly16x4_t) __a; } __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_f64 (float64x2_t __a) +{ + return (poly16x8_t) __a; +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) vreinterpretq_p16_s8 (int8x16_t __a) { - return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a); + return (poly16x8_t) __a; } __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) vreinterpretq_p16_s16 (int16x8_t __a) { - return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a); + return (poly16x8_t) __a; } __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) vreinterpretq_p16_s32 (int32x4_t __a) { - return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a); + return (poly16x8_t) __a; } __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) vreinterpretq_p16_s64 (int64x2_t __a) { - return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a); + return (poly16x8_t) __a; } __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) vreinterpretq_p16_f32 (float32x4_t __a) { - return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a); + return (poly16x8_t) __a; } __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) vreinterpretq_p16_u8 (uint8x16_t __a) { - return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) - __a); + return (poly16x8_t) __a; } __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) vreinterpretq_p16_u16 (uint16x8_t __a) { - return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); + return (poly16x8_t) __a; } __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) vreinterpretq_p16_u32 (uint32x4_t __a) { - return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a); + return (poly16x8_t) __a; } __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) vreinterpretq_p16_u64 (uint64x2_t __a) { - return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a); + return (poly16x8_t) __a; } __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) vreinterpretq_p16_p8 (poly8x16_t __a) { - return (poly16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) - __a); + return (poly16x8_t) __a; } __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vreinterpret_f32_f64 (float64x1_t __a) +{ + return __builtin_aarch64_reinterpretv2sfdf (__a); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vreinterpret_f32_s8 (int8x8_t __a) { - return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi (__a); + return (float32x2_t) __a; } __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vreinterpret_f32_s16 (int16x4_t __a) { - return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi (__a); + return (float32x2_t) __a; } __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vreinterpret_f32_s32 (int32x2_t __a) { - return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si (__a); + return (float32x2_t) __a; } __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vreinterpret_f32_s64 (int64x1_t __a) { - return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi (__a); + return (float32x2_t) __a; } __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vreinterpret_f32_u8 (uint8x8_t __a) { - return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a); + return (float32x2_t) __a; } __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vreinterpret_f32_u16 (uint16x4_t __a) { - return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t) - __a); + return (float32x2_t) __a; } __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vreinterpret_f32_u32 (uint32x2_t __a) { - return (float32x2_t) __builtin_aarch64_reinterpretv2sfv2si ((int32x2_t) - __a); + return (float32x2_t) __a; } __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vreinterpret_f32_u64 (uint64x1_t __a) { - return (float32x2_t) __builtin_aarch64_reinterpretv2sfdi ((int64x1_t) __a); + return (float32x2_t) __a; } __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vreinterpret_f32_p8 (poly8x8_t __a) { - return (float32x2_t) __builtin_aarch64_reinterpretv2sfv8qi ((int8x8_t) __a); + return (float32x2_t) __a; } __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vreinterpret_f32_p16 (poly16x4_t __a) { - return (float32x2_t) __builtin_aarch64_reinterpretv2sfv4hi ((int16x4_t) - __a); + return (float32x2_t) __a; } __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_f64 (float64x2_t __a) +{ + return (float32x4_t) __a; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vreinterpretq_f32_s8 (int8x16_t __a) { - return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi (__a); + return (float32x4_t) __a; } __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vreinterpretq_f32_s16 (int16x8_t __a) { - return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi (__a); + return (float32x4_t) __a; } __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vreinterpretq_f32_s32 (int32x4_t __a) { - return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si (__a); + return (float32x4_t) __a; } __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vreinterpretq_f32_s64 (int64x2_t __a) { - return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di (__a); + return (float32x4_t) __a; } __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vreinterpretq_f32_u8 (uint8x16_t __a) { - return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t) - __a); + return (float32x4_t) __a; } __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vreinterpretq_f32_u16 (uint16x8_t __a) { - return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t) - __a); + return (float32x4_t) __a; } __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vreinterpretq_f32_u32 (uint32x4_t __a) { - return (float32x4_t) __builtin_aarch64_reinterpretv4sfv4si ((int32x4_t) - __a); + return (float32x4_t) __a; } __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vreinterpretq_f32_u64 (uint64x2_t __a) { - return (float32x4_t) __builtin_aarch64_reinterpretv4sfv2di ((int64x2_t) - __a); + return (float32x4_t) __a; } __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vreinterpretq_f32_p8 (poly8x16_t __a) { - return (float32x4_t) __builtin_aarch64_reinterpretv4sfv16qi ((int8x16_t) - __a); + return (float32x4_t) __a; } __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vreinterpretq_f32_p16 (poly16x8_t __a) { - return (float32x4_t) __builtin_aarch64_reinterpretv4sfv8hi ((int16x8_t) - __a); + return (float32x4_t) __a; } +__extension__ static __inline float64x1_t __attribute__((__always_inline__)) +vreinterpret_f64_f32 (float32x2_t __a) +{ + return __builtin_aarch64_reinterpretdfv2sf (__a); +} + +__extension__ static __inline float64x1_t __attribute__((__always_inline__)) +vreinterpret_f64_p8 (poly8x8_t __a) +{ + return __builtin_aarch64_reinterpretdfv8qi_sp (__a); +} + +__extension__ static __inline float64x1_t __attribute__((__always_inline__)) +vreinterpret_f64_p16 (poly16x4_t __a) +{ + return __builtin_aarch64_reinterpretdfv4hi_sp (__a); +} + +__extension__ static __inline float64x1_t __attribute__((__always_inline__)) +vreinterpret_f64_s8 (int8x8_t __a) +{ + return __builtin_aarch64_reinterpretdfv8qi (__a); +} + +__extension__ static __inline float64x1_t __attribute__((__always_inline__)) +vreinterpret_f64_s16 (int16x4_t __a) +{ + return __builtin_aarch64_reinterpretdfv4hi (__a); +} + +__extension__ static __inline float64x1_t __attribute__((__always_inline__)) +vreinterpret_f64_s32 (int32x2_t __a) +{ + return __builtin_aarch64_reinterpretdfv2si (__a); +} + +__extension__ static __inline float64x1_t __attribute__((__always_inline__)) +vreinterpret_f64_s64 (int64x1_t __a) +{ + return __builtin_aarch64_createdf ((uint64_t) vget_lane_s64 (__a, 0)); +} + +__extension__ static __inline float64x1_t __attribute__((__always_inline__)) +vreinterpret_f64_u8 (uint8x8_t __a) +{ + return __builtin_aarch64_reinterpretdfv8qi_su (__a); +} + +__extension__ static __inline float64x1_t __attribute__((__always_inline__)) +vreinterpret_f64_u16 (uint16x4_t __a) +{ + return __builtin_aarch64_reinterpretdfv4hi_su (__a); +} + +__extension__ static __inline float64x1_t __attribute__((__always_inline__)) +vreinterpret_f64_u32 (uint32x2_t __a) +{ + return __builtin_aarch64_reinterpretdfv2si_su (__a); +} + +__extension__ static __inline float64x1_t __attribute__((__always_inline__)) +vreinterpret_f64_u64 (uint64x1_t __a) +{ + return __builtin_aarch64_createdf (vget_lane_u64 (__a, 0)); +} + +__extension__ static __inline float64x2_t __attribute__((__always_inline__)) +vreinterpretq_f64_f32 (float32x4_t __a) +{ + return (float64x2_t) __a; +} + +__extension__ static __inline float64x2_t __attribute__((__always_inline__)) +vreinterpretq_f64_p8 (poly8x16_t __a) +{ + return (float64x2_t) __a; +} + +__extension__ static __inline float64x2_t __attribute__((__always_inline__)) +vreinterpretq_f64_p16 (poly16x8_t __a) +{ + return (float64x2_t) __a; +} + +__extension__ static __inline float64x2_t __attribute__((__always_inline__)) +vreinterpretq_f64_s8 (int8x16_t __a) +{ + return (float64x2_t) __a; +} + +__extension__ static __inline float64x2_t __attribute__((__always_inline__)) +vreinterpretq_f64_s16 (int16x8_t __a) +{ + return (float64x2_t) __a; +} + +__extension__ static __inline float64x2_t __attribute__((__always_inline__)) +vreinterpretq_f64_s32 (int32x4_t __a) +{ + return (float64x2_t) __a; +} + +__extension__ static __inline float64x2_t __attribute__((__always_inline__)) +vreinterpretq_f64_s64 (int64x2_t __a) +{ + return (float64x2_t) __a; +} + +__extension__ static __inline float64x2_t __attribute__((__always_inline__)) +vreinterpretq_f64_u8 (uint8x16_t __a) +{ + return (float64x2_t) __a; +} + +__extension__ static __inline float64x2_t __attribute__((__always_inline__)) +vreinterpretq_f64_u16 (uint16x8_t __a) +{ + return (float64x2_t) __a; +} + +__extension__ static __inline float64x2_t __attribute__((__always_inline__)) +vreinterpretq_f64_u32 (uint32x4_t __a) +{ + return (float64x2_t) __a; +} + +__extension__ static __inline float64x2_t __attribute__((__always_inline__)) +vreinterpretq_f64_u64 (uint64x2_t __a) +{ + return (float64x2_t) __a; +} + __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vreinterpret_s64_f64 (float64x1_t __a) +{ + return __builtin_aarch64_reinterpretdidf (__a); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vreinterpret_s64_s8 (int8x8_t __a) { - return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a); + return (int64x1_t) __a; } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vreinterpret_s64_s16 (int16x4_t __a) { - return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a); + return (int64x1_t) __a; } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vreinterpret_s64_s32 (int32x2_t __a) { - return (int64x1_t) __builtin_aarch64_reinterpretdiv2si (__a); + return (int64x1_t) __a; } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vreinterpret_s64_f32 (float32x2_t __a) { - return (int64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a); + return (int64x1_t) __a; } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vreinterpret_s64_u8 (uint8x8_t __a) { - return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); + return (int64x1_t) __a; } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vreinterpret_s64_u16 (uint16x4_t __a) { - return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); + return (int64x1_t) __a; } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vreinterpret_s64_u32 (uint32x2_t __a) { - return (int64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a); + return (int64x1_t) __a; } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vreinterpret_s64_u64 (uint64x1_t __a) { - return (int64x1_t) __builtin_aarch64_reinterpretdidi ((int64x1_t) __a); + return (int64x1_t) __a; } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vreinterpret_s64_p8 (poly8x8_t __a) { - return (int64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); + return (int64x1_t) __a; } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vreinterpret_s64_p16 (poly16x4_t __a) { - return (int64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); + return (int64x1_t) __a; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_f64 (float64x2_t __a) +{ + return (int64x2_t) __a; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vreinterpretq_s64_s8 (int8x16_t __a) { - return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a); + return (int64x2_t) __a; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vreinterpretq_s64_s16 (int16x8_t __a) { - return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a); + return (int64x2_t) __a; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vreinterpretq_s64_s32 (int32x4_t __a) { - return (int64x2_t) __builtin_aarch64_reinterpretv2div4si (__a); + return (int64x2_t) __a; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vreinterpretq_s64_f32 (float32x4_t __a) { - return (int64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a); + return (int64x2_t) __a; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vreinterpretq_s64_u8 (uint8x16_t __a) { - return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a); + return (int64x2_t) __a; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vreinterpretq_s64_u16 (uint16x8_t __a) { - return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); + return (int64x2_t) __a; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vreinterpretq_s64_u32 (uint32x4_t __a) { - return (int64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a); + return (int64x2_t) __a; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vreinterpretq_s64_u64 (uint64x2_t __a) { - return (int64x2_t) __builtin_aarch64_reinterpretv2div2di ((int64x2_t) __a); + return (int64x2_t) __a; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vreinterpretq_s64_p8 (poly8x16_t __a) { - return (int64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) __a); + return (int64x2_t) __a; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vreinterpretq_s64_p16 (poly16x8_t __a) { - return (int64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); + return (int64x2_t) __a; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vreinterpret_u64_f64 (float64x1_t __a) +{ + return __builtin_aarch64_reinterpretdidf_us (__a); +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vreinterpret_u64_s8 (int8x8_t __a) { - return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi (__a); + return (uint64x1_t) __a; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vreinterpret_u64_s16 (int16x4_t __a) { - return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi (__a); + return (uint64x1_t) __a; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vreinterpret_u64_s32 (int32x2_t __a) { - return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si (__a); + return (uint64x1_t) __a; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vreinterpret_u64_s64 (int64x1_t __a) { - return (uint64x1_t) __builtin_aarch64_reinterpretdidi (__a); + return (uint64x1_t) __a; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vreinterpret_u64_f32 (float32x2_t __a) { - return (uint64x1_t) __builtin_aarch64_reinterpretdiv2sf (__a); + return (uint64x1_t) __a; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vreinterpret_u64_u8 (uint8x8_t __a) { - return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); + return (uint64x1_t) __a; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vreinterpret_u64_u16 (uint16x4_t __a) { - return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); + return (uint64x1_t) __a; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vreinterpret_u64_u32 (uint32x2_t __a) { - return (uint64x1_t) __builtin_aarch64_reinterpretdiv2si ((int32x2_t) __a); + return (uint64x1_t) __a; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vreinterpret_u64_p8 (poly8x8_t __a) { - return (uint64x1_t) __builtin_aarch64_reinterpretdiv8qi ((int8x8_t) __a); + return (uint64x1_t) __a; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vreinterpret_u64_p16 (poly16x4_t __a) { - return (uint64x1_t) __builtin_aarch64_reinterpretdiv4hi ((int16x4_t) __a); + return (uint64x1_t) __a; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_u64_f64 (float64x2_t __a) +{ + return (uint64x2_t) __a; +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vreinterpretq_u64_s8 (int8x16_t __a) { - return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi (__a); + return (uint64x2_t) __a; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vreinterpretq_u64_s16 (int16x8_t __a) { - return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi (__a); + return (uint64x2_t) __a; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vreinterpretq_u64_s32 (int32x4_t __a) { - return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si (__a); + return (uint64x2_t) __a; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vreinterpretq_u64_s64 (int64x2_t __a) { - return (uint64x2_t) __builtin_aarch64_reinterpretv2div2di (__a); + return (uint64x2_t) __a; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vreinterpretq_u64_f32 (float32x4_t __a) { - return (uint64x2_t) __builtin_aarch64_reinterpretv2div4sf (__a); + return (uint64x2_t) __a; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vreinterpretq_u64_u8 (uint8x16_t __a) { - return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) - __a); + return (uint64x2_t) __a; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vreinterpretq_u64_u16 (uint16x8_t __a) { - return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); + return (uint64x2_t) __a; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vreinterpretq_u64_u32 (uint32x4_t __a) { - return (uint64x2_t) __builtin_aarch64_reinterpretv2div4si ((int32x4_t) __a); + return (uint64x2_t) __a; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vreinterpretq_u64_p8 (poly8x16_t __a) { - return (uint64x2_t) __builtin_aarch64_reinterpretv2div16qi ((int8x16_t) - __a); + return (uint64x2_t) __a; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vreinterpretq_u64_p16 (poly16x8_t __a) { - return (uint64x2_t) __builtin_aarch64_reinterpretv2div8hi ((int16x8_t) __a); + return (uint64x2_t) __a; } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vreinterpret_s8_f64 (float64x1_t __a) +{ + return __builtin_aarch64_reinterpretv8qidf (__a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vreinterpret_s8_s16 (int16x4_t __a) { - return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a); + return (int8x8_t) __a; } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vreinterpret_s8_s32 (int32x2_t __a) { - return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a); + return (int8x8_t) __a; } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vreinterpret_s8_s64 (int64x1_t __a) { - return (int8x8_t) __builtin_aarch64_reinterpretv8qidi (__a); + return (int8x8_t) __a; } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vreinterpret_s8_f32 (float32x2_t __a) { - return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a); + return (int8x8_t) __a; } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vreinterpret_s8_u8 (uint8x8_t __a) { - return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); + return (int8x8_t) __a; } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vreinterpret_s8_u16 (uint16x4_t __a) { - return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); + return (int8x8_t) __a; } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vreinterpret_s8_u32 (uint32x2_t __a) { - return (int8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a); + return (int8x8_t) __a; } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vreinterpret_s8_u64 (uint64x1_t __a) { - return (int8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a); + return (int8x8_t) __a; } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vreinterpret_s8_p8 (poly8x8_t __a) { - return (int8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); + return (int8x8_t) __a; } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vreinterpret_s8_p16 (poly16x4_t __a) { - return (int8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); + return (int8x8_t) __a; } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_f64 (float64x2_t __a) +{ + return (int8x16_t) __a; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vreinterpretq_s8_s16 (int16x8_t __a) { - return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a); + return (int8x16_t) __a; } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vreinterpretq_s8_s32 (int32x4_t __a) { - return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a); + return (int8x16_t) __a; } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vreinterpretq_s8_s64 (int64x2_t __a) { - return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a); + return (int8x16_t) __a; } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vreinterpretq_s8_f32 (float32x4_t __a) { - return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a); + return (int8x16_t) __a; } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vreinterpretq_s8_u8 (uint8x16_t __a) { - return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) - __a); + return (int8x16_t) __a; } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vreinterpretq_s8_u16 (uint16x8_t __a) { - return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a); + return (int8x16_t) __a; } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vreinterpretq_s8_u32 (uint32x4_t __a) { - return (int8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) __a); + return (int8x16_t) __a; } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vreinterpretq_s8_u64 (uint64x2_t __a) { - return (int8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) __a); + return (int8x16_t) __a; } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vreinterpretq_s8_p8 (poly8x16_t __a) { - return (int8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) - __a); + return (int8x16_t) __a; } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vreinterpretq_s8_p16 (poly16x8_t __a) { - return (int8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) __a); + return (int8x16_t) __a; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vreinterpret_s16_f64 (float64x1_t __a) +{ + return __builtin_aarch64_reinterpretv4hidf (__a); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vreinterpret_s16_s8 (int8x8_t __a) { - return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a); + return (int16x4_t) __a; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vreinterpret_s16_s32 (int32x2_t __a) { - return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a); + return (int16x4_t) __a; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vreinterpret_s16_s64 (int64x1_t __a) { - return (int16x4_t) __builtin_aarch64_reinterpretv4hidi (__a); + return (int16x4_t) __a; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vreinterpret_s16_f32 (float32x2_t __a) { - return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a); + return (int16x4_t) __a; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vreinterpret_s16_u8 (uint8x8_t __a) { - return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); + return (int16x4_t) __a; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vreinterpret_s16_u16 (uint16x4_t __a) { - return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); + return (int16x4_t) __a; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vreinterpret_s16_u32 (uint32x2_t __a) { - return (int16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a); + return (int16x4_t) __a; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vreinterpret_s16_u64 (uint64x1_t __a) { - return (int16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a); + return (int16x4_t) __a; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vreinterpret_s16_p8 (poly8x8_t __a) { - return (int16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); + return (int16x4_t) __a; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vreinterpret_s16_p16 (poly16x4_t __a) { - return (int16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); + return (int16x4_t) __a; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_f64 (float64x2_t __a) +{ + return (int16x8_t) __a; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vreinterpretq_s16_s8 (int8x16_t __a) { - return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a); + return (int16x8_t) __a; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vreinterpretq_s16_s32 (int32x4_t __a) { - return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a); + return (int16x8_t) __a; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vreinterpretq_s16_s64 (int64x2_t __a) { - return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a); + return (int16x8_t) __a; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vreinterpretq_s16_f32 (float32x4_t __a) { - return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a); + return (int16x8_t) __a; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vreinterpretq_s16_u8 (uint8x16_t __a) { - return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a); + return (int16x8_t) __a; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vreinterpretq_s16_u16 (uint16x8_t __a) { - return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); + return (int16x8_t) __a; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vreinterpretq_s16_u32 (uint32x4_t __a) { - return (int16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a); + return (int16x8_t) __a; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vreinterpretq_s16_u64 (uint64x2_t __a) { - return (int16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a); + return (int16x8_t) __a; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vreinterpretq_s16_p8 (poly8x16_t __a) { - return (int16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) __a); + return (int16x8_t) __a; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vreinterpretq_s16_p16 (poly16x8_t __a) { - return (int16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); + return (int16x8_t) __a; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vreinterpret_s32_f64 (float64x1_t __a) +{ + return __builtin_aarch64_reinterpretv2sidf (__a); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vreinterpret_s32_s8 (int8x8_t __a) { - return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a); + return (int32x2_t) __a; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vreinterpret_s32_s16 (int16x4_t __a) { - return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a); + return (int32x2_t) __a; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vreinterpret_s32_s64 (int64x1_t __a) { - return (int32x2_t) __builtin_aarch64_reinterpretv2sidi (__a); + return (int32x2_t) __a; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vreinterpret_s32_f32 (float32x2_t __a) { - return (int32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a); + return (int32x2_t) __a; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vreinterpret_s32_u8 (uint8x8_t __a) { - return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); + return (int32x2_t) __a; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vreinterpret_s32_u16 (uint16x4_t __a) { - return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); + return (int32x2_t) __a; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vreinterpret_s32_u32 (uint32x2_t __a) { - return (int32x2_t) __builtin_aarch64_reinterpretv2siv2si ((int32x2_t) __a); + return (int32x2_t) __a; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vreinterpret_s32_u64 (uint64x1_t __a) { - return (int32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a); + return (int32x2_t) __a; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vreinterpret_s32_p8 (poly8x8_t __a) { - return (int32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); + return (int32x2_t) __a; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vreinterpret_s32_p16 (poly16x4_t __a) { - return (int32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); + return (int32x2_t) __a; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_f64 (float64x2_t __a) +{ + return (int32x4_t) __a; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vreinterpretq_s32_s8 (int8x16_t __a) { - return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a); + return (int32x4_t) __a; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vreinterpretq_s32_s16 (int16x8_t __a) { - return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a); + return (int32x4_t) __a; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vreinterpretq_s32_s64 (int64x2_t __a) { - return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a); + return (int32x4_t) __a; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vreinterpretq_s32_f32 (float32x4_t __a) { - return (int32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a); + return (int32x4_t) __a; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vreinterpretq_s32_u8 (uint8x16_t __a) { - return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a); + return (int32x4_t) __a; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vreinterpretq_s32_u16 (uint16x8_t __a) { - return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); + return (int32x4_t) __a; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vreinterpretq_s32_u32 (uint32x4_t __a) { - return (int32x4_t) __builtin_aarch64_reinterpretv4siv4si ((int32x4_t) __a); + return (int32x4_t) __a; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vreinterpretq_s32_u64 (uint64x2_t __a) { - return (int32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a); + return (int32x4_t) __a; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vreinterpretq_s32_p8 (poly8x16_t __a) { - return (int32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) __a); + return (int32x4_t) __a; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vreinterpretq_s32_p16 (poly16x8_t __a) { - return (int32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); + return (int32x4_t) __a; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vreinterpret_u8_f64 (float64x1_t __a) +{ + return __builtin_aarch64_reinterpretv8qidf_us (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vreinterpret_u8_s8 (int8x8_t __a) { - return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi (__a); + return (uint8x8_t) __a; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vreinterpret_u8_s16 (int16x4_t __a) { - return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi (__a); + return (uint8x8_t) __a; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vreinterpret_u8_s32 (int32x2_t __a) { - return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si (__a); + return (uint8x8_t) __a; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vreinterpret_u8_s64 (int64x1_t __a) { - return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi (__a); + return (uint8x8_t) __a; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vreinterpret_u8_f32 (float32x2_t __a) { - return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2sf (__a); + return (uint8x8_t) __a; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vreinterpret_u8_u16 (uint16x4_t __a) { - return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); + return (uint8x8_t) __a; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vreinterpret_u8_u32 (uint32x2_t __a) { - return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv2si ((int32x2_t) __a); + return (uint8x8_t) __a; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vreinterpret_u8_u64 (uint64x1_t __a) { - return (uint8x8_t) __builtin_aarch64_reinterpretv8qidi ((int64x1_t) __a); + return (uint8x8_t) __a; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vreinterpret_u8_p8 (poly8x8_t __a) { - return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv8qi ((int8x8_t) __a); + return (uint8x8_t) __a; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vreinterpret_u8_p16 (poly16x4_t __a) { - return (uint8x8_t) __builtin_aarch64_reinterpretv8qiv4hi ((int16x4_t) __a); + return (uint8x8_t) __a; } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_f64 (float64x2_t __a) +{ + return (uint8x16_t) __a; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vreinterpretq_u8_s8 (int8x16_t __a) { - return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi (__a); + return (uint8x16_t) __a; } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vreinterpretq_u8_s16 (int16x8_t __a) { - return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi (__a); + return (uint8x16_t) __a; } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vreinterpretq_u8_s32 (int32x4_t __a) { - return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si (__a); + return (uint8x16_t) __a; } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vreinterpretq_u8_s64 (int64x2_t __a) { - return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di (__a); + return (uint8x16_t) __a; } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vreinterpretq_u8_f32 (float32x4_t __a) { - return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4sf (__a); + return (uint8x16_t) __a; } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vreinterpretq_u8_u16 (uint16x8_t __a) { - return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) - __a); + return (uint8x16_t) __a; } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vreinterpretq_u8_u32 (uint32x4_t __a) { - return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv4si ((int32x4_t) - __a); + return (uint8x16_t) __a; } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vreinterpretq_u8_u64 (uint64x2_t __a) { - return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv2di ((int64x2_t) - __a); + return (uint8x16_t) __a; } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vreinterpretq_u8_p8 (poly8x16_t __a) { - return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv16qi ((int8x16_t) - __a); + return (uint8x16_t) __a; } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vreinterpretq_u8_p16 (poly16x8_t __a) { - return (uint8x16_t) __builtin_aarch64_reinterpretv16qiv8hi ((int16x8_t) - __a); + return (uint8x16_t) __a; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vreinterpret_u16_f64 (float64x1_t __a) +{ + return __builtin_aarch64_reinterpretv4hidf_us (__a); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vreinterpret_u16_s8 (int8x8_t __a) { - return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi (__a); + return (uint16x4_t) __a; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vreinterpret_u16_s16 (int16x4_t __a) { - return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi (__a); + return (uint16x4_t) __a; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vreinterpret_u16_s32 (int32x2_t __a) { - return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si (__a); + return (uint16x4_t) __a; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vreinterpret_u16_s64 (int64x1_t __a) { - return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi (__a); + return (uint16x4_t) __a; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vreinterpret_u16_f32 (float32x2_t __a) { - return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2sf (__a); + return (uint16x4_t) __a; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vreinterpret_u16_u8 (uint8x8_t __a) { - return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); + return (uint16x4_t) __a; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vreinterpret_u16_u32 (uint32x2_t __a) { - return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv2si ((int32x2_t) __a); + return (uint16x4_t) __a; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vreinterpret_u16_u64 (uint64x1_t __a) { - return (uint16x4_t) __builtin_aarch64_reinterpretv4hidi ((int64x1_t) __a); + return (uint16x4_t) __a; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vreinterpret_u16_p8 (poly8x8_t __a) { - return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv8qi ((int8x8_t) __a); + return (uint16x4_t) __a; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vreinterpret_u16_p16 (poly16x4_t __a) { - return (uint16x4_t) __builtin_aarch64_reinterpretv4hiv4hi ((int16x4_t) __a); + return (uint16x4_t) __a; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_f64 (float64x2_t __a) +{ + return (uint16x8_t) __a; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vreinterpretq_u16_s8 (int8x16_t __a) { - return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi (__a); + return (uint16x8_t) __a; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vreinterpretq_u16_s16 (int16x8_t __a) { - return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi (__a); + return (uint16x8_t) __a; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vreinterpretq_u16_s32 (int32x4_t __a) { - return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si (__a); + return (uint16x8_t) __a; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vreinterpretq_u16_s64 (int64x2_t __a) { - return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di (__a); + return (uint16x8_t) __a; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vreinterpretq_u16_f32 (float32x4_t __a) { - return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4sf (__a); + return (uint16x8_t) __a; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vreinterpretq_u16_u8 (uint8x16_t __a) { - return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) - __a); + return (uint16x8_t) __a; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vreinterpretq_u16_u32 (uint32x4_t __a) { - return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv4si ((int32x4_t) __a); + return (uint16x8_t) __a; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vreinterpretq_u16_u64 (uint64x2_t __a) { - return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv2di ((int64x2_t) __a); + return (uint16x8_t) __a; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vreinterpretq_u16_p8 (poly8x16_t __a) { - return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv16qi ((int8x16_t) - __a); + return (uint16x8_t) __a; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vreinterpretq_u16_p16 (poly16x8_t __a) { - return (uint16x8_t) __builtin_aarch64_reinterpretv8hiv8hi ((int16x8_t) __a); + return (uint16x8_t) __a; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vreinterpret_u32_f64 (float64x1_t __a) +{ + return __builtin_aarch64_reinterpretv2sidf_us (__a); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vreinterpret_u32_s8 (int8x8_t __a) { - return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi (__a); + return (uint32x2_t) __a; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vreinterpret_u32_s16 (int16x4_t __a) { - return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi (__a); + return (uint32x2_t) __a; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vreinterpret_u32_s32 (int32x2_t __a) { - return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2si (__a); + return (uint32x2_t) __a; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vreinterpret_u32_s64 (int64x1_t __a) { - return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi (__a); + return (uint32x2_t) __a; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vreinterpret_u32_f32 (float32x2_t __a) { - return (uint32x2_t) __builtin_aarch64_reinterpretv2siv2sf (__a); + return (uint32x2_t) __a; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vreinterpret_u32_u8 (uint8x8_t __a) { - return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); + return (uint32x2_t) __a; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vreinterpret_u32_u16 (uint16x4_t __a) { - return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); + return (uint32x2_t) __a; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vreinterpret_u32_u64 (uint64x1_t __a) { - return (uint32x2_t) __builtin_aarch64_reinterpretv2sidi ((int64x1_t) __a); + return (uint32x2_t) __a; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vreinterpret_u32_p8 (poly8x8_t __a) { - return (uint32x2_t) __builtin_aarch64_reinterpretv2siv8qi ((int8x8_t) __a); + return (uint32x2_t) __a; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vreinterpret_u32_p16 (poly16x4_t __a) { - return (uint32x2_t) __builtin_aarch64_reinterpretv2siv4hi ((int16x4_t) __a); + return (uint32x2_t) __a; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_f64 (float64x2_t __a) +{ + return (uint32x4_t) __a; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vreinterpretq_u32_s8 (int8x16_t __a) { - return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi (__a); + return (uint32x4_t) __a; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vreinterpretq_u32_s16 (int16x8_t __a) { - return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi (__a); + return (uint32x4_t) __a; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vreinterpretq_u32_s32 (int32x4_t __a) { - return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4si (__a); + return (uint32x4_t) __a; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vreinterpretq_u32_s64 (int64x2_t __a) { - return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di (__a); + return (uint32x4_t) __a; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vreinterpretq_u32_f32 (float32x4_t __a) { - return (uint32x4_t) __builtin_aarch64_reinterpretv4siv4sf (__a); + return (uint32x4_t) __a; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vreinterpretq_u32_u8 (uint8x16_t __a) { - return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) - __a); + return (uint32x4_t) __a; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vreinterpretq_u32_u16 (uint16x8_t __a) { - return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); + return (uint32x4_t) __a; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vreinterpretq_u32_u64 (uint64x2_t __a) { - return (uint32x4_t) __builtin_aarch64_reinterpretv4siv2di ((int64x2_t) __a); + return (uint32x4_t) __a; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vreinterpretq_u32_p8 (poly8x16_t __a) { - return (uint32x4_t) __builtin_aarch64_reinterpretv4siv16qi ((int8x16_t) - __a); + return (uint32x4_t) __a; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vreinterpretq_u32_p16 (poly16x8_t __a) { - return (uint32x4_t) __builtin_aarch64_reinterpretv4siv8hi ((int16x8_t) __a); + return (uint32x4_t) __a; } #define __GET_LOW(__TYPE) \ @@ -5414,318 +5647,6 @@ return result; } -#define vext_f32(a, b, c) \ - __extension__ \ - ({ \ - float32x2_t b_ = (b); \ - float32x2_t a_ = (a); \ - float32x2_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_f64(a, b, c) \ - __extension__ \ - ({ \ - float64x1_t b_ = (b); \ - float64x1_t a_ = (a); \ - float64x1_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_p8(a, b, c) \ - __extension__ \ - ({ \ - poly8x8_t b_ = (b); \ - poly8x8_t a_ = (a); \ - poly8x8_t result; \ - __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_p16(a, b, c) \ - __extension__ \ - ({ \ - poly16x4_t b_ = (b); \ - poly16x4_t a_ = (a); \ - poly16x4_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_s8(a, b, c) \ - __extension__ \ - ({ \ - int8x8_t b_ = (b); \ - int8x8_t a_ = (a); \ - int8x8_t result; \ - __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x4_t b_ = (b); \ - int16x4_t a_ = (a); \ - int16x4_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x2_t b_ = (b); \ - int32x2_t a_ = (a); \ - int32x2_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_s64(a, b, c) \ - __extension__ \ - ({ \ - int64x1_t b_ = (b); \ - int64x1_t a_ = (a); \ - int64x1_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_u8(a, b, c) \ - __extension__ \ - ({ \ - uint8x8_t b_ = (b); \ - uint8x8_t a_ = (a); \ - uint8x8_t result; \ - __asm__ ("ext %0.8b,%1.8b,%2.8b,%3" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x4_t b_ = (b); \ - uint16x4_t a_ = (a); \ - uint16x4_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x2_t b_ = (b); \ - uint32x2_t a_ = (a); \ - uint32x2_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vext_u64(a, b, c) \ - __extension__ \ - ({ \ - uint64x1_t b_ = (b); \ - uint64x1_t a_ = (a); \ - uint64x1_t result; \ - __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_f32(a, b, c) \ - __extension__ \ - ({ \ - float32x4_t b_ = (b); \ - float32x4_t a_ = (a); \ - float32x4_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_f64(a, b, c) \ - __extension__ \ - ({ \ - float64x2_t b_ = (b); \ - float64x2_t a_ = (a); \ - float64x2_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_p8(a, b, c) \ - __extension__ \ - ({ \ - poly8x16_t b_ = (b); \ - poly8x16_t a_ = (a); \ - poly8x16_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_p16(a, b, c) \ - __extension__ \ - ({ \ - poly16x8_t b_ = (b); \ - poly16x8_t a_ = (a); \ - poly16x8_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_s8(a, b, c) \ - __extension__ \ - ({ \ - int8x16_t b_ = (b); \ - int8x16_t a_ = (a); \ - int8x16_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_s16(a, b, c) \ - __extension__ \ - ({ \ - int16x8_t b_ = (b); \ - int16x8_t a_ = (a); \ - int16x8_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_s32(a, b, c) \ - __extension__ \ - ({ \ - int32x4_t b_ = (b); \ - int32x4_t a_ = (a); \ - int32x4_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_s64(a, b, c) \ - __extension__ \ - ({ \ - int64x2_t b_ = (b); \ - int64x2_t a_ = (a); \ - int64x2_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_u8(a, b, c) \ - __extension__ \ - ({ \ - uint8x16_t b_ = (b); \ - uint8x16_t a_ = (a); \ - uint8x16_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_u16(a, b, c) \ - __extension__ \ - ({ \ - uint16x8_t b_ = (b); \ - uint16x8_t a_ = (a); \ - uint16x8_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_u32(a, b, c) \ - __extension__ \ - ({ \ - uint32x4_t b_ = (b); \ - uint32x4_t a_ = (a); \ - uint32x4_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - -#define vextq_u64(a, b, c) \ - __extension__ \ - ({ \ - uint64x2_t b_ = (b); \ - uint64x2_t a_ = (a); \ - uint64x2_t result; \ - __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c) { @@ -10628,402 +10549,6 @@ return result; } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vrev16_p8 (poly8x8_t a) -{ - poly8x8_t result; - __asm__ ("rev16 %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vrev16_s8 (int8x8_t a) -{ - int8x8_t result; - __asm__ ("rev16 %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vrev16_u8 (uint8x8_t a) -{ - uint8x8_t result; - __asm__ ("rev16 %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vrev16q_p8 (poly8x16_t a) -{ - poly8x16_t result; - __asm__ ("rev16 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vrev16q_s8 (int8x16_t a) -{ - int8x16_t result; - __asm__ ("rev16 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vrev16q_u8 (uint8x16_t a) -{ - uint8x16_t result; - __asm__ ("rev16 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vrev32_p8 (poly8x8_t a) -{ - poly8x8_t result; - __asm__ ("rev32 %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vrev32_p16 (poly16x4_t a) -{ - poly16x4_t result; - __asm__ ("rev32 %0.4h,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vrev32_s8 (int8x8_t a) -{ - int8x8_t result; - __asm__ ("rev32 %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vrev32_s16 (int16x4_t a) -{ - int16x4_t result; - __asm__ ("rev32 %0.4h,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vrev32_u8 (uint8x8_t a) -{ - uint8x8_t result; - __asm__ ("rev32 %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vrev32_u16 (uint16x4_t a) -{ - uint16x4_t result; - __asm__ ("rev32 %0.4h,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vrev32q_p8 (poly8x16_t a) -{ - poly8x16_t result; - __asm__ ("rev32 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vrev32q_p16 (poly16x8_t a) -{ - poly16x8_t result; - __asm__ ("rev32 %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vrev32q_s8 (int8x16_t a) -{ - int8x16_t result; - __asm__ ("rev32 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vrev32q_s16 (int16x8_t a) -{ - int16x8_t result; - __asm__ ("rev32 %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vrev32q_u8 (uint8x16_t a) -{ - uint8x16_t result; - __asm__ ("rev32 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vrev32q_u16 (uint16x8_t a) -{ - uint16x8_t result; - __asm__ ("rev32 %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vrev64_f32 (float32x2_t a) -{ - float32x2_t result; - __asm__ ("rev64 %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vrev64_p8 (poly8x8_t a) -{ - poly8x8_t result; - __asm__ ("rev64 %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vrev64_p16 (poly16x4_t a) -{ - poly16x4_t result; - __asm__ ("rev64 %0.4h,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vrev64_s8 (int8x8_t a) -{ - int8x8_t result; - __asm__ ("rev64 %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vrev64_s16 (int16x4_t a) -{ - int16x4_t result; - __asm__ ("rev64 %0.4h,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vrev64_s32 (int32x2_t a) -{ - int32x2_t result; - __asm__ ("rev64 %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vrev64_u8 (uint8x8_t a) -{ - uint8x8_t result; - __asm__ ("rev64 %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vrev64_u16 (uint16x4_t a) -{ - uint16x4_t result; - __asm__ ("rev64 %0.4h,%1.4h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vrev64_u32 (uint32x2_t a) -{ - uint32x2_t result; - __asm__ ("rev64 %0.2s,%1.2s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vrev64q_f32 (float32x4_t a) -{ - float32x4_t result; - __asm__ ("rev64 %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vrev64q_p8 (poly8x16_t a) -{ - poly8x16_t result; - __asm__ ("rev64 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vrev64q_p16 (poly16x8_t a) -{ - poly16x8_t result; - __asm__ ("rev64 %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vrev64q_s8 (int8x16_t a) -{ - int8x16_t result; - __asm__ ("rev64 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vrev64q_s16 (int16x8_t a) -{ - int16x8_t result; - __asm__ ("rev64 %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vrev64q_s32 (int32x4_t a) -{ - int32x4_t result; - __asm__ ("rev64 %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vrev64q_u8 (uint8x16_t a) -{ - uint8x16_t result; - __asm__ ("rev64 %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vrev64q_u16 (uint16x8_t a) -{ - uint16x8_t result; - __asm__ ("rev64 %0.8h,%1.8h" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vrev64q_u32 (uint32x4_t a) -{ - uint32x4_t result; - __asm__ ("rev64 %0.4s,%1.4s" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - #define vrshrn_high_n_s16(a, b, c) \ __extension__ \ ({ \ @@ -12447,469 +11972,7 @@ return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vtrn1_f32 (float32x2_t a, float32x2_t b) -{ - float32x2_t result; - __asm__ ("trn1 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vtrn1_p8 (poly8x8_t a, poly8x8_t b) -{ - poly8x8_t result; - __asm__ ("trn1 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vtrn1_p16 (poly16x4_t a, poly16x4_t b) -{ - poly16x4_t result; - __asm__ ("trn1 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vtrn1_s8 (int8x8_t a, int8x8_t b) -{ - int8x8_t result; - __asm__ ("trn1 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vtrn1_s16 (int16x4_t a, int16x4_t b) -{ - int16x4_t result; - __asm__ ("trn1 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vtrn1_s32 (int32x2_t a, int32x2_t b) -{ - int32x2_t result; - __asm__ ("trn1 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vtrn1_u8 (uint8x8_t a, uint8x8_t b) -{ - uint8x8_t result; - __asm__ ("trn1 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vtrn1_u16 (uint16x4_t a, uint16x4_t b) -{ - uint16x4_t result; - __asm__ ("trn1 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vtrn1_u32 (uint32x2_t a, uint32x2_t b) -{ - uint32x2_t result; - __asm__ ("trn1 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vtrn1q_f32 (float32x4_t a, float32x4_t b) -{ - float32x4_t result; - __asm__ ("trn1 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vtrn1q_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("trn1 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vtrn1q_p8 (poly8x16_t a, poly8x16_t b) -{ - poly8x16_t result; - __asm__ ("trn1 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vtrn1q_p16 (poly16x8_t a, poly16x8_t b) -{ - poly16x8_t result; - __asm__ ("trn1 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vtrn1q_s8 (int8x16_t a, int8x16_t b) -{ - int8x16_t result; - __asm__ ("trn1 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vtrn1q_s16 (int16x8_t a, int16x8_t b) -{ - int16x8_t result; - __asm__ ("trn1 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vtrn1q_s32 (int32x4_t a, int32x4_t b) -{ - int32x4_t result; - __asm__ ("trn1 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vtrn1q_s64 (int64x2_t a, int64x2_t b) -{ - int64x2_t result; - __asm__ ("trn1 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vtrn1q_u8 (uint8x16_t a, uint8x16_t b) -{ - uint8x16_t result; - __asm__ ("trn1 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vtrn1q_u16 (uint16x8_t a, uint16x8_t b) -{ - uint16x8_t result; - __asm__ ("trn1 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vtrn1q_u32 (uint32x4_t a, uint32x4_t b) -{ - uint32x4_t result; - __asm__ ("trn1 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vtrn1q_u64 (uint64x2_t a, uint64x2_t b) -{ - uint64x2_t result; - __asm__ ("trn1 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vtrn2_f32 (float32x2_t a, float32x2_t b) -{ - float32x2_t result; - __asm__ ("trn2 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vtrn2_p8 (poly8x8_t a, poly8x8_t b) -{ - poly8x8_t result; - __asm__ ("trn2 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vtrn2_p16 (poly16x4_t a, poly16x4_t b) -{ - poly16x4_t result; - __asm__ ("trn2 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vtrn2_s8 (int8x8_t a, int8x8_t b) -{ - int8x8_t result; - __asm__ ("trn2 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vtrn2_s16 (int16x4_t a, int16x4_t b) -{ - int16x4_t result; - __asm__ ("trn2 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vtrn2_s32 (int32x2_t a, int32x2_t b) -{ - int32x2_t result; - __asm__ ("trn2 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vtrn2_u8 (uint8x8_t a, uint8x8_t b) -{ - uint8x8_t result; - __asm__ ("trn2 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vtrn2_u16 (uint16x4_t a, uint16x4_t b) -{ - uint16x4_t result; - __asm__ ("trn2 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vtrn2_u32 (uint32x2_t a, uint32x2_t b) -{ - uint32x2_t result; - __asm__ ("trn2 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vtrn2q_f32 (float32x4_t a, float32x4_t b) -{ - float32x4_t result; - __asm__ ("trn2 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vtrn2q_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("trn2 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vtrn2q_p8 (poly8x16_t a, poly8x16_t b) -{ - poly8x16_t result; - __asm__ ("trn2 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vtrn2q_p16 (poly16x8_t a, poly16x8_t b) -{ - poly16x8_t result; - __asm__ ("trn2 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vtrn2q_s8 (int8x16_t a, int8x16_t b) -{ - int8x16_t result; - __asm__ ("trn2 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vtrn2q_s16 (int16x8_t a, int16x8_t b) -{ - int16x8_t result; - __asm__ ("trn2 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vtrn2q_s32 (int32x4_t a, int32x4_t b) -{ - int32x4_t result; - __asm__ ("trn2 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vtrn2q_s64 (int64x2_t a, int64x2_t b) -{ - int64x2_t result; - __asm__ ("trn2 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vtrn2q_u8 (uint8x16_t a, uint8x16_t b) -{ - uint8x16_t result; - __asm__ ("trn2 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vtrn2q_u16 (uint16x8_t a, uint16x8_t b) -{ - uint16x8_t result; - __asm__ ("trn2 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vtrn2q_u32 (uint32x4_t a, uint32x4_t b) -{ - uint32x4_t result; - __asm__ ("trn2 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vtrn2q_u64 (uint64x2_t a, uint64x2_t b) -{ - uint64x2_t result; - __asm__ ("trn2 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vtst_p8 (poly8x8_t a, poly8x8_t b) { uint8x8_t result; @@ -12952,930 +12015,7 @@ : /* No clobbers */); return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vuzp1_f32 (float32x2_t a, float32x2_t b) -{ - float32x2_t result; - __asm__ ("uzp1 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vuzp1_p8 (poly8x8_t a, poly8x8_t b) -{ - poly8x8_t result; - __asm__ ("uzp1 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vuzp1_p16 (poly16x4_t a, poly16x4_t b) -{ - poly16x4_t result; - __asm__ ("uzp1 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vuzp1_s8 (int8x8_t a, int8x8_t b) -{ - int8x8_t result; - __asm__ ("uzp1 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vuzp1_s16 (int16x4_t a, int16x4_t b) -{ - int16x4_t result; - __asm__ ("uzp1 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vuzp1_s32 (int32x2_t a, int32x2_t b) -{ - int32x2_t result; - __asm__ ("uzp1 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vuzp1_u8 (uint8x8_t a, uint8x8_t b) -{ - uint8x8_t result; - __asm__ ("uzp1 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vuzp1_u16 (uint16x4_t a, uint16x4_t b) -{ - uint16x4_t result; - __asm__ ("uzp1 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vuzp1_u32 (uint32x2_t a, uint32x2_t b) -{ - uint32x2_t result; - __asm__ ("uzp1 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vuzp1q_f32 (float32x4_t a, float32x4_t b) -{ - float32x4_t result; - __asm__ ("uzp1 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vuzp1q_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("uzp1 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vuzp1q_p8 (poly8x16_t a, poly8x16_t b) -{ - poly8x16_t result; - __asm__ ("uzp1 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vuzp1q_p16 (poly16x8_t a, poly16x8_t b) -{ - poly16x8_t result; - __asm__ ("uzp1 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vuzp1q_s8 (int8x16_t a, int8x16_t b) -{ - int8x16_t result; - __asm__ ("uzp1 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vuzp1q_s16 (int16x8_t a, int16x8_t b) -{ - int16x8_t result; - __asm__ ("uzp1 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vuzp1q_s32 (int32x4_t a, int32x4_t b) -{ - int32x4_t result; - __asm__ ("uzp1 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vuzp1q_s64 (int64x2_t a, int64x2_t b) -{ - int64x2_t result; - __asm__ ("uzp1 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vuzp1q_u8 (uint8x16_t a, uint8x16_t b) -{ - uint8x16_t result; - __asm__ ("uzp1 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vuzp1q_u16 (uint16x8_t a, uint16x8_t b) -{ - uint16x8_t result; - __asm__ ("uzp1 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vuzp1q_u32 (uint32x4_t a, uint32x4_t b) -{ - uint32x4_t result; - __asm__ ("uzp1 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vuzp1q_u64 (uint64x2_t a, uint64x2_t b) -{ - uint64x2_t result; - __asm__ ("uzp1 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vuzp2_f32 (float32x2_t a, float32x2_t b) -{ - float32x2_t result; - __asm__ ("uzp2 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vuzp2_p8 (poly8x8_t a, poly8x8_t b) -{ - poly8x8_t result; - __asm__ ("uzp2 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vuzp2_p16 (poly16x4_t a, poly16x4_t b) -{ - poly16x4_t result; - __asm__ ("uzp2 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vuzp2_s8 (int8x8_t a, int8x8_t b) -{ - int8x8_t result; - __asm__ ("uzp2 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vuzp2_s16 (int16x4_t a, int16x4_t b) -{ - int16x4_t result; - __asm__ ("uzp2 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vuzp2_s32 (int32x2_t a, int32x2_t b) -{ - int32x2_t result; - __asm__ ("uzp2 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vuzp2_u8 (uint8x8_t a, uint8x8_t b) -{ - uint8x8_t result; - __asm__ ("uzp2 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vuzp2_u16 (uint16x4_t a, uint16x4_t b) -{ - uint16x4_t result; - __asm__ ("uzp2 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vuzp2_u32 (uint32x2_t a, uint32x2_t b) -{ - uint32x2_t result; - __asm__ ("uzp2 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vuzp2q_f32 (float32x4_t a, float32x4_t b) -{ - float32x4_t result; - __asm__ ("uzp2 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vuzp2q_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("uzp2 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vuzp2q_p8 (poly8x16_t a, poly8x16_t b) -{ - poly8x16_t result; - __asm__ ("uzp2 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vuzp2q_p16 (poly16x8_t a, poly16x8_t b) -{ - poly16x8_t result; - __asm__ ("uzp2 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vuzp2q_s8 (int8x16_t a, int8x16_t b) -{ - int8x16_t result; - __asm__ ("uzp2 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vuzp2q_s16 (int16x8_t a, int16x8_t b) -{ - int16x8_t result; - __asm__ ("uzp2 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vuzp2q_s32 (int32x4_t a, int32x4_t b) -{ - int32x4_t result; - __asm__ ("uzp2 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vuzp2q_s64 (int64x2_t a, int64x2_t b) -{ - int64x2_t result; - __asm__ ("uzp2 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vuzp2q_u8 (uint8x16_t a, uint8x16_t b) -{ - uint8x16_t result; - __asm__ ("uzp2 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vuzp2q_u16 (uint16x8_t a, uint16x8_t b) -{ - uint16x8_t result; - __asm__ ("uzp2 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vuzp2q_u32 (uint32x4_t a, uint32x4_t b) -{ - uint32x4_t result; - __asm__ ("uzp2 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vuzp2q_u64 (uint64x2_t a, uint64x2_t b) -{ - uint64x2_t result; - __asm__ ("uzp2 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vzip1_f32 (float32x2_t a, float32x2_t b) -{ - float32x2_t result; - __asm__ ("zip1 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vzip1_p8 (poly8x8_t a, poly8x8_t b) -{ - poly8x8_t result; - __asm__ ("zip1 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vzip1_p16 (poly16x4_t a, poly16x4_t b) -{ - poly16x4_t result; - __asm__ ("zip1 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vzip1_s8 (int8x8_t a, int8x8_t b) -{ - int8x8_t result; - __asm__ ("zip1 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vzip1_s16 (int16x4_t a, int16x4_t b) -{ - int16x4_t result; - __asm__ ("zip1 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vzip1_s32 (int32x2_t a, int32x2_t b) -{ - int32x2_t result; - __asm__ ("zip1 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vzip1_u8 (uint8x8_t a, uint8x8_t b) -{ - uint8x8_t result; - __asm__ ("zip1 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vzip1_u16 (uint16x4_t a, uint16x4_t b) -{ - uint16x4_t result; - __asm__ ("zip1 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vzip1_u32 (uint32x2_t a, uint32x2_t b) -{ - uint32x2_t result; - __asm__ ("zip1 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vzip1q_f32 (float32x4_t a, float32x4_t b) -{ - float32x4_t result; - __asm__ ("zip1 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vzip1q_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("zip1 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vzip1q_p8 (poly8x16_t a, poly8x16_t b) -{ - poly8x16_t result; - __asm__ ("zip1 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vzip1q_p16 (poly16x8_t a, poly16x8_t b) -{ - poly16x8_t result; - __asm__ ("zip1 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vzip1q_s8 (int8x16_t a, int8x16_t b) -{ - int8x16_t result; - __asm__ ("zip1 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vzip1q_s16 (int16x8_t a, int16x8_t b) -{ - int16x8_t result; - __asm__ ("zip1 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vzip1q_s32 (int32x4_t a, int32x4_t b) -{ - int32x4_t result; - __asm__ ("zip1 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vzip1q_s64 (int64x2_t a, int64x2_t b) -{ - int64x2_t result; - __asm__ ("zip1 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vzip1q_u8 (uint8x16_t a, uint8x16_t b) -{ - uint8x16_t result; - __asm__ ("zip1 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vzip1q_u16 (uint16x8_t a, uint16x8_t b) -{ - uint16x8_t result; - __asm__ ("zip1 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vzip1q_u32 (uint32x4_t a, uint32x4_t b) -{ - uint32x4_t result; - __asm__ ("zip1 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vzip1q_u64 (uint64x2_t a, uint64x2_t b) -{ - uint64x2_t result; - __asm__ ("zip1 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vzip2_f32 (float32x2_t a, float32x2_t b) -{ - float32x2_t result; - __asm__ ("zip2 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vzip2_p8 (poly8x8_t a, poly8x8_t b) -{ - poly8x8_t result; - __asm__ ("zip2 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vzip2_p16 (poly16x4_t a, poly16x4_t b) -{ - poly16x4_t result; - __asm__ ("zip2 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vzip2_s8 (int8x8_t a, int8x8_t b) -{ - int8x8_t result; - __asm__ ("zip2 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vzip2_s16 (int16x4_t a, int16x4_t b) -{ - int16x4_t result; - __asm__ ("zip2 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vzip2_s32 (int32x2_t a, int32x2_t b) -{ - int32x2_t result; - __asm__ ("zip2 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vzip2_u8 (uint8x8_t a, uint8x8_t b) -{ - uint8x8_t result; - __asm__ ("zip2 %0.8b,%1.8b,%2.8b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vzip2_u16 (uint16x4_t a, uint16x4_t b) -{ - uint16x4_t result; - __asm__ ("zip2 %0.4h,%1.4h,%2.4h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vzip2_u32 (uint32x2_t a, uint32x2_t b) -{ - uint32x2_t result; - __asm__ ("zip2 %0.2s,%1.2s,%2.2s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) -vzip2q_f32 (float32x4_t a, float32x4_t b) -{ - float32x4_t result; - __asm__ ("zip2 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) -vzip2q_f64 (float64x2_t a, float64x2_t b) -{ - float64x2_t result; - __asm__ ("zip2 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) -vzip2q_p8 (poly8x16_t a, poly8x16_t b) -{ - poly8x16_t result; - __asm__ ("zip2 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) -vzip2q_p16 (poly16x8_t a, poly16x8_t b) -{ - poly16x8_t result; - __asm__ ("zip2 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vzip2q_s8 (int8x16_t a, int8x16_t b) -{ - int8x16_t result; - __asm__ ("zip2 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) -vzip2q_s16 (int16x8_t a, int16x8_t b) -{ - int16x8_t result; - __asm__ ("zip2 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) -vzip2q_s32 (int32x4_t a, int32x4_t b) -{ - int32x4_t result; - __asm__ ("zip2 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) -vzip2q_s64 (int64x2_t a, int64x2_t b) -{ - int64x2_t result; - __asm__ ("zip2 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vzip2q_u8 (uint8x16_t a, uint8x16_t b) -{ - uint8x16_t result; - __asm__ ("zip2 %0.16b,%1.16b,%2.16b" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) -vzip2q_u16 (uint16x8_t a, uint16x8_t b) -{ - uint16x8_t result; - __asm__ ("zip2 %0.8h,%1.8h,%2.8h" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) -vzip2q_u32 (uint32x4_t a, uint32x4_t b) -{ - uint32x4_t result; - __asm__ ("zip2 %0.4s,%1.4s,%2.4s" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) -vzip2q_u64 (uint64x2_t a, uint64x2_t b) -{ - uint64x2_t result; - __asm__ ("zip2 %0.2d,%1.2d,%2.2d" - : "=w"(result) - : "w"(a), "w"(b) - : /* No clobbers */); - return result; -} - /* End of temporary inline asm implementations. */ /* Start of temporary inline asm for vldn, vstn and friends. */ @@ -14205,132 +12345,225 @@ __LD4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q) __LD4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q) -#define __ST2_LANE_FUNC(intype, ptrtype, regsuffix, \ - lnsuffix, funcsuffix, Q) \ - typedef struct { ptrtype __x[2]; } __ST2_LANE_STRUCTURE_##intype; \ - __extension__ static __inline void \ - __attribute__ ((__always_inline__)) \ - vst2 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \ - intype b, const int c) \ - { \ - __ST2_LANE_STRUCTURE_##intype *__p = \ - (__ST2_LANE_STRUCTURE_##intype *)ptr; \ - __asm__ ("ld1 {v16." #regsuffix ", v17." #regsuffix "}, %1\n\t" \ - "st2 {v16." #lnsuffix ", v17." #lnsuffix "}[%2], %0\n\t" \ - : "=Q"(*__p) \ - : "Q"(b), "i"(c) \ - : "v16", "v17"); \ - } +#define __ST2_LANE_FUNC(intype, largetype, ptrtype, \ + mode, ptr_mode, funcsuffix, signedtype) \ +__extension__ static __inline void \ +__attribute__ ((__always_inline__)) \ +vst2_lane_ ## funcsuffix (ptrtype *__ptr, \ + intype __b, const int __c) \ +{ \ + __builtin_aarch64_simd_oi __o; \ + largetype __temp; \ + __temp.val[0] \ + = vcombine_##funcsuffix (__b.val[0], \ + vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ + __temp.val[1] \ + = vcombine_##funcsuffix (__b.val[1], \ + vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ + __o = __builtin_aarch64_set_qregoi##mode (__o, \ + (signedtype) __temp.val[0], 0); \ + __o = __builtin_aarch64_set_qregoi##mode (__o, \ + (signedtype) __temp.val[1], 1); \ + __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ + __ptr, __o, __c); \ +} -__ST2_LANE_FUNC (int8x8x2_t, int8_t, 8b, b, s8,) -__ST2_LANE_FUNC (float32x2x2_t, float32_t, 2s, s, f32,) -__ST2_LANE_FUNC (float64x1x2_t, float64_t, 1d, d, f64,) -__ST2_LANE_FUNC (poly8x8x2_t, poly8_t, 8b, b, p8,) -__ST2_LANE_FUNC (poly16x4x2_t, poly16_t, 4h, h, p16,) -__ST2_LANE_FUNC (int16x4x2_t, int16_t, 4h, h, s16,) -__ST2_LANE_FUNC (int32x2x2_t, int32_t, 2s, s, s32,) -__ST2_LANE_FUNC (int64x1x2_t, int64_t, 1d, d, s64,) -__ST2_LANE_FUNC (uint8x8x2_t, uint8_t, 8b, b, u8,) -__ST2_LANE_FUNC (uint16x4x2_t, uint16_t, 4h, h, u16,) -__ST2_LANE_FUNC (uint32x2x2_t, uint32_t, 2s, s, u32,) -__ST2_LANE_FUNC (uint64x1x2_t, uint64_t, 1d, d, u64,) -__ST2_LANE_FUNC (float32x4x2_t, float32_t, 4s, s, f32, q) -__ST2_LANE_FUNC (float64x2x2_t, float64_t, 2d, d, f64, q) -__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, 16b, b, p8, q) -__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, 8h, h, p16, q) -__ST2_LANE_FUNC (int8x16x2_t, int8_t, 16b, b, s8, q) -__ST2_LANE_FUNC (int16x8x2_t, int16_t, 8h, h, s16, q) -__ST2_LANE_FUNC (int32x4x2_t, int32_t, 4s, s, s32, q) -__ST2_LANE_FUNC (int64x2x2_t, int64_t, 2d, d, s64, q) -__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, 16b, b, u8, q) -__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, 8h, h, u16, q) -__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, 4s, s, u32, q) -__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, 2d, d, u64, q) +__ST2_LANE_FUNC (float32x2x2_t, float32x4x2_t, float32_t, v4sf, sf, f32, + float32x4_t) +__ST2_LANE_FUNC (float64x1x2_t, float64x2x2_t, float64_t, v2df, df, f64, + float64x2_t) +__ST2_LANE_FUNC (poly8x8x2_t, poly8x16x2_t, poly8_t, v16qi, qi, p8, int8x16_t) +__ST2_LANE_FUNC (poly16x4x2_t, poly16x8x2_t, poly16_t, v8hi, hi, p16, + int16x8_t) +__ST2_LANE_FUNC (int8x8x2_t, int8x16x2_t, int8_t, v16qi, qi, s8, int8x16_t) +__ST2_LANE_FUNC (int16x4x2_t, int16x8x2_t, int16_t, v8hi, hi, s16, int16x8_t) +__ST2_LANE_FUNC (int32x2x2_t, int32x4x2_t, int32_t, v4si, si, s32, int32x4_t) +__ST2_LANE_FUNC (int64x1x2_t, int64x2x2_t, int64_t, v2di, di, s64, int64x2_t) +__ST2_LANE_FUNC (uint8x8x2_t, uint8x16x2_t, uint8_t, v16qi, qi, u8, int8x16_t) +__ST2_LANE_FUNC (uint16x4x2_t, uint16x8x2_t, uint16_t, v8hi, hi, u16, + int16x8_t) +__ST2_LANE_FUNC (uint32x2x2_t, uint32x4x2_t, uint32_t, v4si, si, u32, + int32x4_t) +__ST2_LANE_FUNC (uint64x1x2_t, uint64x2x2_t, uint64_t, v2di, di, u64, + int64x2_t) -#define __ST3_LANE_FUNC(intype, ptrtype, regsuffix, \ - lnsuffix, funcsuffix, Q) \ - typedef struct { ptrtype __x[3]; } __ST3_LANE_STRUCTURE_##intype; \ - __extension__ static __inline void \ - __attribute__ ((__always_inline__)) \ - vst3 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \ - intype b, const int c) \ - { \ - __ST3_LANE_STRUCTURE_##intype *__p = \ - (__ST3_LANE_STRUCTURE_##intype *)ptr; \ - __asm__ ("ld1 {v16." #regsuffix " - v18." #regsuffix "}, %1\n\t" \ - "st3 {v16." #lnsuffix " - v18." #lnsuffix "}[%2], %0\n\t" \ - : "=Q"(*__p) \ - : "Q"(b), "i"(c) \ - : "v16", "v17", "v18"); \ - } +#undef __ST2_LANE_FUNC +#define __ST2_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ +__extension__ static __inline void \ +__attribute__ ((__always_inline__)) \ +vst2q_lane_ ## funcsuffix (ptrtype *__ptr, \ + intype __b, const int __c) \ +{ \ + union { intype __i; \ + __builtin_aarch64_simd_oi __o; } __temp = { __b }; \ + __builtin_aarch64_st2_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ + __ptr, __temp.__o, __c); \ +} -__ST3_LANE_FUNC (int8x8x3_t, int8_t, 8b, b, s8,) -__ST3_LANE_FUNC (float32x2x3_t, float32_t, 2s, s, f32,) -__ST3_LANE_FUNC (float64x1x3_t, float64_t, 1d, d, f64,) -__ST3_LANE_FUNC (poly8x8x3_t, poly8_t, 8b, b, p8,) -__ST3_LANE_FUNC (poly16x4x3_t, poly16_t, 4h, h, p16,) -__ST3_LANE_FUNC (int16x4x3_t, int16_t, 4h, h, s16,) -__ST3_LANE_FUNC (int32x2x3_t, int32_t, 2s, s, s32,) -__ST3_LANE_FUNC (int64x1x3_t, int64_t, 1d, d, s64,) -__ST3_LANE_FUNC (uint8x8x3_t, uint8_t, 8b, b, u8,) -__ST3_LANE_FUNC (uint16x4x3_t, uint16_t, 4h, h, u16,) -__ST3_LANE_FUNC (uint32x2x3_t, uint32_t, 2s, s, u32,) -__ST3_LANE_FUNC (uint64x1x3_t, uint64_t, 1d, d, u64,) -__ST3_LANE_FUNC (float32x4x3_t, float32_t, 4s, s, f32, q) -__ST3_LANE_FUNC (float64x2x3_t, float64_t, 2d, d, f64, q) -__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, 16b, b, p8, q) -__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, 8h, h, p16, q) -__ST3_LANE_FUNC (int8x16x3_t, int8_t, 16b, b, s8, q) -__ST3_LANE_FUNC (int16x8x3_t, int16_t, 8h, h, s16, q) -__ST3_LANE_FUNC (int32x4x3_t, int32_t, 4s, s, s32, q) -__ST3_LANE_FUNC (int64x2x3_t, int64_t, 2d, d, s64, q) -__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, 16b, b, u8, q) -__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, 8h, h, u16, q) -__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, 4s, s, u32, q) -__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, 2d, d, u64, q) +__ST2_LANE_FUNC (float32x4x2_t, float32_t, v4sf, sf, f32) +__ST2_LANE_FUNC (float64x2x2_t, float64_t, v2df, df, f64) +__ST2_LANE_FUNC (poly8x16x2_t, poly8_t, v16qi, qi, p8) +__ST2_LANE_FUNC (poly16x8x2_t, poly16_t, v8hi, hi, p16) +__ST2_LANE_FUNC (int8x16x2_t, int8_t, v16qi, qi, s8) +__ST2_LANE_FUNC (int16x8x2_t, int16_t, v8hi, hi, s16) +__ST2_LANE_FUNC (int32x4x2_t, int32_t, v4si, si, s32) +__ST2_LANE_FUNC (int64x2x2_t, int64_t, v2di, di, s64) +__ST2_LANE_FUNC (uint8x16x2_t, uint8_t, v16qi, qi, u8) +__ST2_LANE_FUNC (uint16x8x2_t, uint16_t, v8hi, hi, u16) +__ST2_LANE_FUNC (uint32x4x2_t, uint32_t, v4si, si, u32) +__ST2_LANE_FUNC (uint64x2x2_t, uint64_t, v2di, di, u64) -#define __ST4_LANE_FUNC(intype, ptrtype, regsuffix, \ - lnsuffix, funcsuffix, Q) \ - typedef struct { ptrtype __x[4]; } __ST4_LANE_STRUCTURE_##intype; \ - __extension__ static __inline void \ - __attribute__ ((__always_inline__)) \ - vst4 ## Q ## _lane_ ## funcsuffix (ptrtype *ptr, \ - intype b, const int c) \ - { \ - __ST4_LANE_STRUCTURE_##intype *__p = \ - (__ST4_LANE_STRUCTURE_##intype *)ptr; \ - __asm__ ("ld1 {v16." #regsuffix " - v19." #regsuffix "}, %1\n\t" \ - "st4 {v16." #lnsuffix " - v19." #lnsuffix "}[%2], %0\n\t" \ - : "=Q"(*__p) \ - : "Q"(b), "i"(c) \ - : "v16", "v17", "v18", "v19"); \ - } +#define __ST3_LANE_FUNC(intype, largetype, ptrtype, \ + mode, ptr_mode, funcsuffix, signedtype) \ +__extension__ static __inline void \ +__attribute__ ((__always_inline__)) \ +vst3_lane_ ## funcsuffix (ptrtype *__ptr, \ + intype __b, const int __c) \ +{ \ + __builtin_aarch64_simd_ci __o; \ + largetype __temp; \ + __temp.val[0] \ + = vcombine_##funcsuffix (__b.val[0], \ + vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ + __temp.val[1] \ + = vcombine_##funcsuffix (__b.val[1], \ + vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ + __temp.val[2] \ + = vcombine_##funcsuffix (__b.val[2], \ + vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ + __o = __builtin_aarch64_set_qregci##mode (__o, \ + (signedtype) __temp.val[0], 0); \ + __o = __builtin_aarch64_set_qregci##mode (__o, \ + (signedtype) __temp.val[1], 1); \ + __o = __builtin_aarch64_set_qregci##mode (__o, \ + (signedtype) __temp.val[2], 2); \ + __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ + __ptr, __o, __c); \ +} -__ST4_LANE_FUNC (int8x8x4_t, int8_t, 8b, b, s8,) -__ST4_LANE_FUNC (float32x2x4_t, float32_t, 2s, s, f32,) -__ST4_LANE_FUNC (float64x1x4_t, float64_t, 1d, d, f64,) -__ST4_LANE_FUNC (poly8x8x4_t, poly8_t, 8b, b, p8,) -__ST4_LANE_FUNC (poly16x4x4_t, poly16_t, 4h, h, p16,) -__ST4_LANE_FUNC (int16x4x4_t, int16_t, 4h, h, s16,) -__ST4_LANE_FUNC (int32x2x4_t, int32_t, 2s, s, s32,) -__ST4_LANE_FUNC (int64x1x4_t, int64_t, 1d, d, s64,) -__ST4_LANE_FUNC (uint8x8x4_t, uint8_t, 8b, b, u8,) -__ST4_LANE_FUNC (uint16x4x4_t, uint16_t, 4h, h, u16,) -__ST4_LANE_FUNC (uint32x2x4_t, uint32_t, 2s, s, u32,) -__ST4_LANE_FUNC (uint64x1x4_t, uint64_t, 1d, d, u64,) -__ST4_LANE_FUNC (float32x4x4_t, float32_t, 4s, s, f32, q) -__ST4_LANE_FUNC (float64x2x4_t, float64_t, 2d, d, f64, q) -__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, 16b, b, p8, q) -__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, 8h, h, p16, q) -__ST4_LANE_FUNC (int8x16x4_t, int8_t, 16b, b, s8, q) -__ST4_LANE_FUNC (int16x8x4_t, int16_t, 8h, h, s16, q) -__ST4_LANE_FUNC (int32x4x4_t, int32_t, 4s, s, s32, q) -__ST4_LANE_FUNC (int64x2x4_t, int64_t, 2d, d, s64, q) -__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, 16b, b, u8, q) -__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, 8h, h, u16, q) -__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, 4s, s, u32, q) -__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, 2d, d, u64, q) +__ST3_LANE_FUNC (float32x2x3_t, float32x4x3_t, float32_t, v4sf, sf, f32, + float32x4_t) +__ST3_LANE_FUNC (float64x1x3_t, float64x2x3_t, float64_t, v2df, df, f64, + float64x2_t) +__ST3_LANE_FUNC (poly8x8x3_t, poly8x16x3_t, poly8_t, v16qi, qi, p8, int8x16_t) +__ST3_LANE_FUNC (poly16x4x3_t, poly16x8x3_t, poly16_t, v8hi, hi, p16, + int16x8_t) +__ST3_LANE_FUNC (int8x8x3_t, int8x16x3_t, int8_t, v16qi, qi, s8, int8x16_t) +__ST3_LANE_FUNC (int16x4x3_t, int16x8x3_t, int16_t, v8hi, hi, s16, int16x8_t) +__ST3_LANE_FUNC (int32x2x3_t, int32x4x3_t, int32_t, v4si, si, s32, int32x4_t) +__ST3_LANE_FUNC (int64x1x3_t, int64x2x3_t, int64_t, v2di, di, s64, int64x2_t) +__ST3_LANE_FUNC (uint8x8x3_t, uint8x16x3_t, uint8_t, v16qi, qi, u8, int8x16_t) +__ST3_LANE_FUNC (uint16x4x3_t, uint16x8x3_t, uint16_t, v8hi, hi, u16, + int16x8_t) +__ST3_LANE_FUNC (uint32x2x3_t, uint32x4x3_t, uint32_t, v4si, si, u32, + int32x4_t) +__ST3_LANE_FUNC (uint64x1x3_t, uint64x2x3_t, uint64_t, v2di, di, u64, + int64x2_t) +#undef __ST3_LANE_FUNC +#define __ST3_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ +__extension__ static __inline void \ +__attribute__ ((__always_inline__)) \ +vst3q_lane_ ## funcsuffix (ptrtype *__ptr, \ + intype __b, const int __c) \ +{ \ + union { intype __i; \ + __builtin_aarch64_simd_ci __o; } __temp = { __b }; \ + __builtin_aarch64_st3_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ + __ptr, __temp.__o, __c); \ +} + +__ST3_LANE_FUNC (float32x4x3_t, float32_t, v4sf, sf, f32) +__ST3_LANE_FUNC (float64x2x3_t, float64_t, v2df, df, f64) +__ST3_LANE_FUNC (poly8x16x3_t, poly8_t, v16qi, qi, p8) +__ST3_LANE_FUNC (poly16x8x3_t, poly16_t, v8hi, hi, p16) +__ST3_LANE_FUNC (int8x16x3_t, int8_t, v16qi, qi, s8) +__ST3_LANE_FUNC (int16x8x3_t, int16_t, v8hi, hi, s16) +__ST3_LANE_FUNC (int32x4x3_t, int32_t, v4si, si, s32) +__ST3_LANE_FUNC (int64x2x3_t, int64_t, v2di, di, s64) +__ST3_LANE_FUNC (uint8x16x3_t, uint8_t, v16qi, qi, u8) +__ST3_LANE_FUNC (uint16x8x3_t, uint16_t, v8hi, hi, u16) +__ST3_LANE_FUNC (uint32x4x3_t, uint32_t, v4si, si, u32) +__ST3_LANE_FUNC (uint64x2x3_t, uint64_t, v2di, di, u64) + +#define __ST4_LANE_FUNC(intype, largetype, ptrtype, \ + mode, ptr_mode, funcsuffix, signedtype) \ +__extension__ static __inline void \ +__attribute__ ((__always_inline__)) \ +vst4_lane_ ## funcsuffix (ptrtype *__ptr, \ + intype __b, const int __c) \ +{ \ + __builtin_aarch64_simd_xi __o; \ + largetype __temp; \ + __temp.val[0] \ + = vcombine_##funcsuffix (__b.val[0], \ + vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ + __temp.val[1] \ + = vcombine_##funcsuffix (__b.val[1], \ + vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ + __temp.val[2] \ + = vcombine_##funcsuffix (__b.val[2], \ + vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ + __temp.val[3] \ + = vcombine_##funcsuffix (__b.val[3], \ + vcreate_##funcsuffix (__AARCH64_UINT64_C (0))); \ + __o = __builtin_aarch64_set_qregxi##mode (__o, \ + (signedtype) __temp.val[0], 0); \ + __o = __builtin_aarch64_set_qregxi##mode (__o, \ + (signedtype) __temp.val[1], 1); \ + __o = __builtin_aarch64_set_qregxi##mode (__o, \ + (signedtype) __temp.val[2], 2); \ + __o = __builtin_aarch64_set_qregxi##mode (__o, \ + (signedtype) __temp.val[3], 3); \ + __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ + __ptr, __o, __c); \ +} + +__ST4_LANE_FUNC (float32x2x4_t, float32x4x4_t, float32_t, v4sf, sf, f32, + float32x4_t) +__ST4_LANE_FUNC (float64x1x4_t, float64x2x4_t, float64_t, v2df, df, f64, + float64x2_t) +__ST4_LANE_FUNC (poly8x8x4_t, poly8x16x4_t, poly8_t, v16qi, qi, p8, int8x16_t) +__ST4_LANE_FUNC (poly16x4x4_t, poly16x8x4_t, poly16_t, v8hi, hi, p16, + int16x8_t) +__ST4_LANE_FUNC (int8x8x4_t, int8x16x4_t, int8_t, v16qi, qi, s8, int8x16_t) +__ST4_LANE_FUNC (int16x4x4_t, int16x8x4_t, int16_t, v8hi, hi, s16, int16x8_t) +__ST4_LANE_FUNC (int32x2x4_t, int32x4x4_t, int32_t, v4si, si, s32, int32x4_t) +__ST4_LANE_FUNC (int64x1x4_t, int64x2x4_t, int64_t, v2di, di, s64, int64x2_t) +__ST4_LANE_FUNC (uint8x8x4_t, uint8x16x4_t, uint8_t, v16qi, qi, u8, int8x16_t) +__ST4_LANE_FUNC (uint16x4x4_t, uint16x8x4_t, uint16_t, v8hi, hi, u16, + int16x8_t) +__ST4_LANE_FUNC (uint32x2x4_t, uint32x4x4_t, uint32_t, v4si, si, u32, + int32x4_t) +__ST4_LANE_FUNC (uint64x1x4_t, uint64x2x4_t, uint64_t, v2di, di, u64, + int64x2_t) + +#undef __ST4_LANE_FUNC +#define __ST4_LANE_FUNC(intype, ptrtype, mode, ptr_mode, funcsuffix) \ +__extension__ static __inline void \ +__attribute__ ((__always_inline__)) \ +vst4q_lane_ ## funcsuffix (ptrtype *__ptr, \ + intype __b, const int __c) \ +{ \ + union { intype __i; \ + __builtin_aarch64_simd_xi __o; } __temp = { __b }; \ + __builtin_aarch64_st4_lane##mode ((__builtin_aarch64_simd_ ## ptr_mode *) \ + __ptr, __temp.__o, __c); \ +} + +__ST4_LANE_FUNC (float32x4x4_t, float32_t, v4sf, sf, f32) +__ST4_LANE_FUNC (float64x2x4_t, float64_t, v2df, df, f64) +__ST4_LANE_FUNC (poly8x16x4_t, poly8_t, v16qi, qi, p8) +__ST4_LANE_FUNC (poly16x8x4_t, poly16_t, v8hi, hi, p16) +__ST4_LANE_FUNC (int8x16x4_t, int8_t, v16qi, qi, s8) +__ST4_LANE_FUNC (int16x8x4_t, int16_t, v8hi, hi, s16) +__ST4_LANE_FUNC (int32x4x4_t, int32_t, v4si, si, s32) +__ST4_LANE_FUNC (int64x2x4_t, int64_t, v2di, di, s64) +__ST4_LANE_FUNC (uint8x16x4_t, uint8_t, v16qi, qi, u8) +__ST4_LANE_FUNC (uint16x8x4_t, uint16_t, v8hi, hi, u16) +__ST4_LANE_FUNC (uint32x4x4_t, uint32_t, v4si, si, u32) +__ST4_LANE_FUNC (uint64x2x4_t, uint64_t, v2di, di, u64) + __extension__ static __inline int64_t __attribute__ ((__always_inline__)) vaddlv_s32 (int32x2_t a) { @@ -18489,6 +16722,292 @@ return __aarch64_vgetq_lane_u64 (__a, __b); } +/* vext */ + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 2); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1}); +#endif +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c) +{ + /* The only possible index to the assembler instruction returns element 0. */ + __builtin_aarch64_im_lane_boundsi (__c, 1); + return __a; +} +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 8); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint8x8_t) + {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); +#else + return __builtin_shuffle (__a, __b, + (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); +#endif +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 4); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, + (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3}); +#endif +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 8); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint8x8_t) + {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); +#else + return __builtin_shuffle (__a, __b, + (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); +#endif +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 4); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, + (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3}); +#endif +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 2); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1}); +#endif +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c) +{ + /* The only possible index to the assembler instruction returns element 0. */ + __builtin_aarch64_im_lane_boundsi (__c, 1); + return __a; +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 8); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint8x8_t) + {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); +#else + return __builtin_shuffle (__a, __b, + (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); +#endif +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 4); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, + (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3}); +#endif +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 2); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1}); +#endif +} + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c) +{ + /* The only possible index to the assembler instruction returns element 0. */ + __builtin_aarch64_im_lane_boundsi (__c, 1); + return __a; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 4); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, + (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3}); +#endif +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 2); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1}); +#endif +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 16); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint8x16_t) + {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c, + 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, + __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15}); +#endif +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 8); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint16x8_t) + {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); +#else + return __builtin_shuffle (__a, __b, + (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); +#endif +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 16); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint8x16_t) + {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c, + 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, + __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15}); +#endif +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 8); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint16x8_t) + {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); +#else + return __builtin_shuffle (__a, __b, + (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); +#endif +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 4); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, + (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3}); +#endif +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 2); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1}); +#endif +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 16); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint8x16_t) + {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c, + 24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7, + __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15}); +#endif +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 8); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint16x8_t) + {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c}); +#else + return __builtin_shuffle (__a, __b, + (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7}); +#endif +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 4); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, + (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3}); +#endif +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c) +{ + __builtin_aarch64_im_lane_boundsi (__c, 2); +#ifdef __AARCH64EB__ + return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1}); +#endif +} + /* vfma_lane */ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) @@ -20943,6 +19462,12 @@ return (int32x1_t) __builtin_aarch64_sqabssi (__a); } +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vqabsd_s64 (int64_t __a) +{ + return __builtin_aarch64_sqabsdi (__a); +} + /* vqadd */ __extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) @@ -20972,25 +19497,26 @@ __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) vqaddb_u8 (uint8x1_t __a, uint8x1_t __b) { - return (uint8x1_t) __builtin_aarch64_uqaddqi (__a, __b); + return (uint8x1_t) __builtin_aarch64_uqaddqi_uuu (__a, __b); } __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) vqaddh_u16 (uint16x1_t __a, uint16x1_t __b) { - return (uint16x1_t) __builtin_aarch64_uqaddhi (__a, __b); + return (uint16x1_t) __builtin_aarch64_uqaddhi_uuu (__a, __b); } __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) vqadds_u32 (uint32x1_t __a, uint32x1_t __b) { - return (uint32x1_t) __builtin_aarch64_uqaddsi (__a, __b); + return (uint32x1_t) __builtin_aarch64_uqaddsi_uuu (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vqaddd_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_uqadddi (__a, __b); + return (uint64x1_t) __builtin_aarch64_uqadddi_uuu ((uint64_t) __a, + (uint64_t) __b); } /* vqdmlal */ @@ -21555,6 +20081,12 @@ return (int32x1_t) __builtin_aarch64_sqnegsi (__a); } +__extension__ static __inline int64_t __attribute__ ((__always_inline__)) +vqnegd_s64 (int64_t __a) +{ + return __builtin_aarch64_sqnegdi (__a); +} + /* vqrdmulh */ __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) @@ -21634,25 +20166,25 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vqrshl_u8 (uint8x8_t __a, int8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_uqrshlv8qi ((int8x8_t) __a, __b); + return __builtin_aarch64_uqrshlv8qi_uus ( __a, __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vqrshl_u16 (uint16x4_t __a, int16x4_t __b) { - return (uint16x4_t) __builtin_aarch64_uqrshlv4hi ((int16x4_t) __a, __b); + return __builtin_aarch64_uqrshlv4hi_uus ( __a, __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vqrshl_u32 (uint32x2_t __a, int32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_uqrshlv2si ((int32x2_t) __a, __b); + return __builtin_aarch64_uqrshlv2si_uus ( __a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vqrshl_u64 (uint64x1_t __a, int64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_uqrshldi ((int64x1_t) __a, __b); + return __builtin_aarch64_uqrshldi_uus ( __a, __b); } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) @@ -21682,25 +20214,25 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vqrshlq_u8 (uint8x16_t __a, int8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_uqrshlv16qi ((int8x16_t) __a, __b); + return __builtin_aarch64_uqrshlv16qi_uus ( __a, __b); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vqrshlq_u16 (uint16x8_t __a, int16x8_t __b) { - return (uint16x8_t) __builtin_aarch64_uqrshlv8hi ((int16x8_t) __a, __b); + return __builtin_aarch64_uqrshlv8hi_uus ( __a, __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vqrshlq_u32 (uint32x4_t __a, int32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_uqrshlv4si ((int32x4_t) __a, __b); + return __builtin_aarch64_uqrshlv4si_uus ( __a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vqrshlq_u64 (uint64x2_t __a, int64x2_t __b) { - return (uint64x2_t) __builtin_aarch64_uqrshlv2di ((int64x2_t) __a, __b); + return __builtin_aarch64_uqrshlv2di_uus ( __a, __b); } __extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) @@ -21730,25 +20262,25 @@ __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) vqrshlb_u8 (uint8x1_t __a, uint8x1_t __b) { - return (uint8x1_t) __builtin_aarch64_uqrshlqi (__a, __b); + return __builtin_aarch64_uqrshlqi_uus (__a, __b); } __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) vqrshlh_u16 (uint16x1_t __a, uint16x1_t __b) { - return (uint16x1_t) __builtin_aarch64_uqrshlhi (__a, __b); + return __builtin_aarch64_uqrshlhi_uus (__a, __b); } __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) vqrshls_u32 (uint32x1_t __a, uint32x1_t __b) { - return (uint32x1_t) __builtin_aarch64_uqrshlsi (__a, __b); + return __builtin_aarch64_uqrshlsi_uus (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vqrshld_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_uqrshldi (__a, __b); + return __builtin_aarch64_uqrshldi_uus (__a, __b); } /* vqrshrn */ @@ -21774,19 +20306,19 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vqrshrn_n_u16 (uint16x8_t __a, const int __b) { - return (uint8x8_t) __builtin_aarch64_uqrshrn_nv8hi ((int16x8_t) __a, __b); + return __builtin_aarch64_uqrshrn_nv8hi_uus ( __a, __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vqrshrn_n_u32 (uint32x4_t __a, const int __b) { - return (uint16x4_t) __builtin_aarch64_uqrshrn_nv4si ((int32x4_t) __a, __b); + return __builtin_aarch64_uqrshrn_nv4si_uus ( __a, __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vqrshrn_n_u64 (uint64x2_t __a, const int __b) { - return (uint32x2_t) __builtin_aarch64_uqrshrn_nv2di ((int64x2_t) __a, __b); + return __builtin_aarch64_uqrshrn_nv2di_uus ( __a, __b); } __extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) @@ -21810,19 +20342,19 @@ __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) vqrshrnh_n_u16 (uint16x1_t __a, const int __b) { - return (uint8x1_t) __builtin_aarch64_uqrshrn_nhi (__a, __b); + return __builtin_aarch64_uqrshrn_nhi_uus (__a, __b); } __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) vqrshrns_n_u32 (uint32x1_t __a, const int __b) { - return (uint16x1_t) __builtin_aarch64_uqrshrn_nsi (__a, __b); + return __builtin_aarch64_uqrshrn_nsi_uus (__a, __b); } __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) vqrshrnd_n_u64 (uint64x1_t __a, const int __b) { - return (uint32x1_t) __builtin_aarch64_uqrshrn_ndi (__a, __b); + return __builtin_aarch64_uqrshrn_ndi_uus (__a, __b); } /* vqrshrun */ @@ -21892,25 +20424,25 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vqshl_u8 (uint8x8_t __a, int8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_uqshlv8qi ((int8x8_t) __a, __b); + return __builtin_aarch64_uqshlv8qi_uus ( __a, __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vqshl_u16 (uint16x4_t __a, int16x4_t __b) { - return (uint16x4_t) __builtin_aarch64_uqshlv4hi ((int16x4_t) __a, __b); + return __builtin_aarch64_uqshlv4hi_uus ( __a, __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vqshl_u32 (uint32x2_t __a, int32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_uqshlv2si ((int32x2_t) __a, __b); + return __builtin_aarch64_uqshlv2si_uus ( __a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vqshl_u64 (uint64x1_t __a, int64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_uqshldi ((int64x1_t) __a, __b); + return __builtin_aarch64_uqshldi_uus ( __a, __b); } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) @@ -21940,25 +20472,25 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vqshlq_u8 (uint8x16_t __a, int8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_uqshlv16qi ((int8x16_t) __a, __b); + return __builtin_aarch64_uqshlv16qi_uus ( __a, __b); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vqshlq_u16 (uint16x8_t __a, int16x8_t __b) { - return (uint16x8_t) __builtin_aarch64_uqshlv8hi ((int16x8_t) __a, __b); + return __builtin_aarch64_uqshlv8hi_uus ( __a, __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vqshlq_u32 (uint32x4_t __a, int32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_uqshlv4si ((int32x4_t) __a, __b); + return __builtin_aarch64_uqshlv4si_uus ( __a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vqshlq_u64 (uint64x2_t __a, int64x2_t __b) { - return (uint64x2_t) __builtin_aarch64_uqshlv2di ((int64x2_t) __a, __b); + return __builtin_aarch64_uqshlv2di_uus ( __a, __b); } __extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) @@ -21988,25 +20520,25 @@ __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) vqshlb_u8 (uint8x1_t __a, uint8x1_t __b) { - return (uint8x1_t) __builtin_aarch64_uqshlqi (__a, __b); + return __builtin_aarch64_uqshlqi_uus (__a, __b); } __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) vqshlh_u16 (uint16x1_t __a, uint16x1_t __b) { - return (uint16x1_t) __builtin_aarch64_uqshlhi (__a, __b); + return __builtin_aarch64_uqshlhi_uus (__a, __b); } __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) vqshls_u32 (uint32x1_t __a, uint32x1_t __b) { - return (uint32x1_t) __builtin_aarch64_uqshlsi (__a, __b); + return __builtin_aarch64_uqshlsi_uus (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vqshld_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_uqshldi (__a, __b); + return __builtin_aarch64_uqshldi_uus (__a, __b); } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) @@ -22036,25 +20568,25 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vqshl_n_u8 (uint8x8_t __a, const int __b) { - return (uint8x8_t) __builtin_aarch64_uqshl_nv8qi ((int8x8_t) __a, __b); + return __builtin_aarch64_uqshl_nv8qi_uus (__a, __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vqshl_n_u16 (uint16x4_t __a, const int __b) { - return (uint16x4_t) __builtin_aarch64_uqshl_nv4hi ((int16x4_t) __a, __b); + return __builtin_aarch64_uqshl_nv4hi_uus (__a, __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vqshl_n_u32 (uint32x2_t __a, const int __b) { - return (uint32x2_t) __builtin_aarch64_uqshl_nv2si ((int32x2_t) __a, __b); + return __builtin_aarch64_uqshl_nv2si_uus (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vqshl_n_u64 (uint64x1_t __a, const int __b) { - return (uint64x1_t) __builtin_aarch64_uqshl_ndi ((int64x1_t) __a, __b); + return __builtin_aarch64_uqshl_ndi_uus (__a, __b); } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) @@ -22084,25 +20616,25 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vqshlq_n_u8 (uint8x16_t __a, const int __b) { - return (uint8x16_t) __builtin_aarch64_uqshl_nv16qi ((int8x16_t) __a, __b); + return __builtin_aarch64_uqshl_nv16qi_uus (__a, __b); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vqshlq_n_u16 (uint16x8_t __a, const int __b) { - return (uint16x8_t) __builtin_aarch64_uqshl_nv8hi ((int16x8_t) __a, __b); + return __builtin_aarch64_uqshl_nv8hi_uus (__a, __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vqshlq_n_u32 (uint32x4_t __a, const int __b) { - return (uint32x4_t) __builtin_aarch64_uqshl_nv4si ((int32x4_t) __a, __b); + return __builtin_aarch64_uqshl_nv4si_uus (__a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vqshlq_n_u64 (uint64x2_t __a, const int __b) { - return (uint64x2_t) __builtin_aarch64_uqshl_nv2di ((int64x2_t) __a, __b); + return __builtin_aarch64_uqshl_nv2di_uus (__a, __b); } __extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) @@ -22132,25 +20664,25 @@ __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) vqshlb_n_u8 (uint8x1_t __a, const int __b) { - return (uint8x1_t) __builtin_aarch64_uqshl_nqi (__a, __b); + return __builtin_aarch64_uqshl_nqi_uus (__a, __b); } __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) vqshlh_n_u16 (uint16x1_t __a, const int __b) { - return (uint16x1_t) __builtin_aarch64_uqshl_nhi (__a, __b); + return __builtin_aarch64_uqshl_nhi_uus (__a, __b); } __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) vqshls_n_u32 (uint32x1_t __a, const int __b) { - return (uint32x1_t) __builtin_aarch64_uqshl_nsi (__a, __b); + return __builtin_aarch64_uqshl_nsi_uus (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vqshld_n_u64 (uint64x1_t __a, const int __b) { - return (uint64x1_t) __builtin_aarch64_uqshl_ndi (__a, __b); + return __builtin_aarch64_uqshl_ndi_uus (__a, __b); } /* vqshlu */ @@ -22158,73 +20690,73 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vqshlu_n_s8 (int8x8_t __a, const int __b) { - return (uint8x8_t) __builtin_aarch64_sqshlu_nv8qi (__a, __b); + return __builtin_aarch64_sqshlu_nv8qi_uss (__a, __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vqshlu_n_s16 (int16x4_t __a, const int __b) { - return (uint16x4_t) __builtin_aarch64_sqshlu_nv4hi (__a, __b); + return __builtin_aarch64_sqshlu_nv4hi_uss (__a, __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vqshlu_n_s32 (int32x2_t __a, const int __b) { - return (uint32x2_t) __builtin_aarch64_sqshlu_nv2si (__a, __b); + return __builtin_aarch64_sqshlu_nv2si_uss (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vqshlu_n_s64 (int64x1_t __a, const int __b) { - return (uint64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b); + return __builtin_aarch64_sqshlu_ndi_uss (__a, __b); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vqshluq_n_s8 (int8x16_t __a, const int __b) { - return (uint8x16_t) __builtin_aarch64_sqshlu_nv16qi (__a, __b); + return __builtin_aarch64_sqshlu_nv16qi_uss (__a, __b); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vqshluq_n_s16 (int16x8_t __a, const int __b) { - return (uint16x8_t) __builtin_aarch64_sqshlu_nv8hi (__a, __b); + return __builtin_aarch64_sqshlu_nv8hi_uss (__a, __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vqshluq_n_s32 (int32x4_t __a, const int __b) { - return (uint32x4_t) __builtin_aarch64_sqshlu_nv4si (__a, __b); + return __builtin_aarch64_sqshlu_nv4si_uss (__a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vqshluq_n_s64 (int64x2_t __a, const int __b) { - return (uint64x2_t) __builtin_aarch64_sqshlu_nv2di (__a, __b); + return __builtin_aarch64_sqshlu_nv2di_uss (__a, __b); } __extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) vqshlub_n_s8 (int8x1_t __a, const int __b) { - return (int8x1_t) __builtin_aarch64_sqshlu_nqi (__a, __b); + return (int8x1_t) __builtin_aarch64_sqshlu_nqi_uss (__a, __b); } __extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) vqshluh_n_s16 (int16x1_t __a, const int __b) { - return (int16x1_t) __builtin_aarch64_sqshlu_nhi (__a, __b); + return (int16x1_t) __builtin_aarch64_sqshlu_nhi_uss (__a, __b); } __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) vqshlus_n_s32 (int32x1_t __a, const int __b) { - return (int32x1_t) __builtin_aarch64_sqshlu_nsi (__a, __b); + return (int32x1_t) __builtin_aarch64_sqshlu_nsi_uss (__a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vqshlud_n_s64 (int64x1_t __a, const int __b) { - return (int64x1_t) __builtin_aarch64_sqshlu_ndi (__a, __b); + return (int64x1_t) __builtin_aarch64_sqshlu_ndi_uss (__a, __b); } /* vqshrn */ @@ -22250,19 +20782,19 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vqshrn_n_u16 (uint16x8_t __a, const int __b) { - return (uint8x8_t) __builtin_aarch64_uqshrn_nv8hi ((int16x8_t) __a, __b); + return __builtin_aarch64_uqshrn_nv8hi_uus ( __a, __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vqshrn_n_u32 (uint32x4_t __a, const int __b) { - return (uint16x4_t) __builtin_aarch64_uqshrn_nv4si ((int32x4_t) __a, __b); + return __builtin_aarch64_uqshrn_nv4si_uus ( __a, __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vqshrn_n_u64 (uint64x2_t __a, const int __b) { - return (uint32x2_t) __builtin_aarch64_uqshrn_nv2di ((int64x2_t) __a, __b); + return __builtin_aarch64_uqshrn_nv2di_uus ( __a, __b); } __extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) @@ -22286,19 +20818,19 @@ __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) vqshrnh_n_u16 (uint16x1_t __a, const int __b) { - return (uint8x1_t) __builtin_aarch64_uqshrn_nhi (__a, __b); + return __builtin_aarch64_uqshrn_nhi_uus (__a, __b); } __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) vqshrns_n_u32 (uint32x1_t __a, const int __b) { - return (uint16x1_t) __builtin_aarch64_uqshrn_nsi (__a, __b); + return __builtin_aarch64_uqshrn_nsi_uus (__a, __b); } __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) vqshrnd_n_u64 (uint64x1_t __a, const int __b) { - return (uint32x1_t) __builtin_aarch64_uqshrn_ndi (__a, __b); + return __builtin_aarch64_uqshrn_ndi_uus (__a, __b); } /* vqshrun */ @@ -22368,25 +20900,26 @@ __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) vqsubb_u8 (uint8x1_t __a, uint8x1_t __b) { - return (uint8x1_t) __builtin_aarch64_uqsubqi (__a, __b); + return (uint8x1_t) __builtin_aarch64_uqsubqi_uuu (__a, __b); } __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) vqsubh_u16 (uint16x1_t __a, uint16x1_t __b) { - return (uint16x1_t) __builtin_aarch64_uqsubhi (__a, __b); + return (uint16x1_t) __builtin_aarch64_uqsubhi_uuu (__a, __b); } __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) vqsubs_u32 (uint32x1_t __a, uint32x1_t __b) { - return (uint32x1_t) __builtin_aarch64_uqsubsi (__a, __b); + return (uint32x1_t) __builtin_aarch64_uqsubsi_uuu (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vqsubd_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b); + return (uint64x1_t) __builtin_aarch64_uqsubdi_uuu ((uint64_t) __a, + (uint64_t) __b); } /* vrecpe */ @@ -22467,6 +21000,234 @@ return __builtin_aarch64_frecpxdf (__a); } + +/* vrev */ + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vrev16_p8 (poly8x8_t a) +{ + return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrev16_s8 (int8x8_t a) +{ + return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrev16_u8 (uint8x8_t a) +{ + return __builtin_shuffle (a, (uint8x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vrev16q_p8 (poly8x16_t a) +{ + return __builtin_shuffle (a, + (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrev16q_s8 (int8x16_t a) +{ + return __builtin_shuffle (a, + (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrev16q_u8 (uint8x16_t a) +{ + return __builtin_shuffle (a, + (uint8x16_t) { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14 }); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vrev32_p8 (poly8x8_t a) +{ + return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vrev32_p16 (poly16x4_t a) +{ + return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrev32_s8 (int8x8_t a) +{ + return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrev32_s16 (int16x4_t a) +{ + return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrev32_u8 (uint8x8_t a) +{ + return __builtin_shuffle (a, (uint8x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrev32_u16 (uint16x4_t a) +{ + return __builtin_shuffle (a, (uint16x4_t) { 1, 0, 3, 2 }); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vrev32q_p8 (poly8x16_t a) +{ + return __builtin_shuffle (a, + (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vrev32q_p16 (poly16x8_t a) +{ + return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrev32q_s8 (int8x16_t a) +{ + return __builtin_shuffle (a, + (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrev32q_s16 (int16x8_t a) +{ + return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrev32q_u8 (uint8x16_t a) +{ + return __builtin_shuffle (a, + (uint8x16_t) { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrev32q_u16 (uint16x8_t a) +{ + return __builtin_shuffle (a, (uint16x8_t) { 1, 0, 3, 2, 5, 4, 7, 6 }); +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vrev64_f32 (float32x2_t a) +{ + return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vrev64_p8 (poly8x8_t a) +{ + return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vrev64_p16 (poly16x4_t a) +{ + return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrev64_s8 (int8x8_t a) +{ + return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vrev64_s16 (int16x4_t a) +{ + return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vrev64_s32 (int32x2_t a) +{ + return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrev64_u8 (uint8x8_t a) +{ + return __builtin_shuffle (a, (uint8x8_t) { 7, 6, 5, 4, 3, 2, 1, 0 }); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vrev64_u16 (uint16x4_t a) +{ + return __builtin_shuffle (a, (uint16x4_t) { 3, 2, 1, 0 }); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vrev64_u32 (uint32x2_t a) +{ + return __builtin_shuffle (a, (uint32x2_t) { 1, 0 }); +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vrev64q_f32 (float32x4_t a) +{ + return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vrev64q_p8 (poly8x16_t a) +{ + return __builtin_shuffle (a, + (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vrev64q_p16 (poly16x8_t a) +{ + return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrev64q_s8 (int8x16_t a) +{ + return __builtin_shuffle (a, + (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vrev64q_s16 (int16x8_t a) +{ + return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vrev64q_s32 (int32x4_t a) +{ + return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrev64q_u8 (uint8x16_t a) +{ + return __builtin_shuffle (a, + (uint8x16_t) { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }); +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vrev64q_u16 (uint16x8_t a) +{ + return __builtin_shuffle (a, (uint16x8_t) { 3, 2, 1, 0, 7, 6, 5, 4 }); +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vrev64q_u32 (uint32x4_t a) +{ + return __builtin_shuffle (a, (uint32x4_t) { 1, 0, 3, 2 }); +} + /* vrnd */ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) @@ -22475,6 +21236,12 @@ return __builtin_aarch64_btruncv2sf (__a); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vrnd_f64 (float64x1_t __a) +{ + return vset_lane_f64 (__builtin_trunc (vget_lane_f64 (__a, 0)), __a, 0); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vrndq_f32 (float32x4_t __a) { @@ -22495,6 +21262,12 @@ return __builtin_aarch64_roundv2sf (__a); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vrnda_f64 (float64x1_t __a) +{ + return vset_lane_f64 (__builtin_round (vget_lane_f64 (__a, 0)), __a, 0); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vrndaq_f32 (float32x4_t __a) { @@ -22515,6 +21288,12 @@ return __builtin_aarch64_nearbyintv2sf (__a); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vrndi_f64 (float64x1_t __a) +{ + return vset_lane_f64 (__builtin_nearbyint (vget_lane_f64 (__a, 0)), __a, 0); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vrndiq_f32 (float32x4_t __a) { @@ -22535,6 +21314,12 @@ return __builtin_aarch64_floorv2sf (__a); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vrndm_f64 (float64x1_t __a) +{ + return vset_lane_f64 (__builtin_floor (vget_lane_f64 (__a, 0)), __a, 0); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vrndmq_f32 (float32x4_t __a) { @@ -22554,6 +21339,13 @@ { return __builtin_aarch64_frintnv2sf (__a); } + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vrndn_f64 (float64x1_t __a) +{ + return __builtin_aarch64_frintndf (__a); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vrndnq_f32 (float32x4_t __a) { @@ -22574,6 +21366,12 @@ return __builtin_aarch64_ceilv2sf (__a); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vrndp_f64 (float64x1_t __a) +{ + return vset_lane_f64 (__builtin_ceil (vget_lane_f64 (__a, 0)), __a, 0); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vrndpq_f32 (float32x4_t __a) { @@ -22594,6 +21392,12 @@ return __builtin_aarch64_rintv2sf (__a); } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vrndx_f64 (float64x1_t __a) +{ + return vset_lane_f64 (__builtin_rint (vget_lane_f64 (__a, 0)), __a, 0); +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vrndxq_f32 (float32x4_t __a) { @@ -22635,25 +21439,25 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vrshl_u8 (uint8x8_t __a, int8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_urshlv8qi ((int8x8_t) __a, __b); + return __builtin_aarch64_urshlv8qi_uus (__a, __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vrshl_u16 (uint16x4_t __a, int16x4_t __b) { - return (uint16x4_t) __builtin_aarch64_urshlv4hi ((int16x4_t) __a, __b); + return __builtin_aarch64_urshlv4hi_uus (__a, __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vrshl_u32 (uint32x2_t __a, int32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_urshlv2si ((int32x2_t) __a, __b); + return __builtin_aarch64_urshlv2si_uus (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vrshl_u64 (uint64x1_t __a, int64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_urshldi ((int64x1_t) __a, __b); + return __builtin_aarch64_urshldi_uus (__a, __b); } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) @@ -22683,25 +21487,25 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vrshlq_u8 (uint8x16_t __a, int8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_urshlv16qi ((int8x16_t) __a, __b); + return __builtin_aarch64_urshlv16qi_uus (__a, __b); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vrshlq_u16 (uint16x8_t __a, int16x8_t __b) { - return (uint16x8_t) __builtin_aarch64_urshlv8hi ((int16x8_t) __a, __b); + return __builtin_aarch64_urshlv8hi_uus (__a, __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vrshlq_u32 (uint32x4_t __a, int32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_urshlv4si ((int32x4_t) __a, __b); + return __builtin_aarch64_urshlv4si_uus (__a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vrshlq_u64 (uint64x2_t __a, int64x2_t __b) { - return (uint64x2_t) __builtin_aarch64_urshlv2di ((int64x2_t) __a, __b); + return __builtin_aarch64_urshlv2di_uus (__a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) @@ -22713,7 +21517,7 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vrshld_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_urshldi (__a, __b); + return __builtin_aarch64_urshldi_uus (__a, __b); } /* vrshr */ @@ -22745,25 +21549,25 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vrshr_n_u8 (uint8x8_t __a, const int __b) { - return (uint8x8_t) __builtin_aarch64_urshr_nv8qi ((int8x8_t) __a, __b); + return __builtin_aarch64_urshr_nv8qi_uus (__a, __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vrshr_n_u16 (uint16x4_t __a, const int __b) { - return (uint16x4_t) __builtin_aarch64_urshr_nv4hi ((int16x4_t) __a, __b); + return __builtin_aarch64_urshr_nv4hi_uus (__a, __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vrshr_n_u32 (uint32x2_t __a, const int __b) { - return (uint32x2_t) __builtin_aarch64_urshr_nv2si ((int32x2_t) __a, __b); + return __builtin_aarch64_urshr_nv2si_uus (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vrshr_n_u64 (uint64x1_t __a, const int __b) { - return (uint64x1_t) __builtin_aarch64_urshr_ndi ((int64x1_t) __a, __b); + return __builtin_aarch64_urshr_ndi_uus (__a, __b); } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) @@ -22793,25 +21597,25 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vrshrq_n_u8 (uint8x16_t __a, const int __b) { - return (uint8x16_t) __builtin_aarch64_urshr_nv16qi ((int8x16_t) __a, __b); + return __builtin_aarch64_urshr_nv16qi_uus (__a, __b); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vrshrq_n_u16 (uint16x8_t __a, const int __b) { - return (uint16x8_t) __builtin_aarch64_urshr_nv8hi ((int16x8_t) __a, __b); + return __builtin_aarch64_urshr_nv8hi_uus (__a, __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vrshrq_n_u32 (uint32x4_t __a, const int __b) { - return (uint32x4_t) __builtin_aarch64_urshr_nv4si ((int32x4_t) __a, __b); + return __builtin_aarch64_urshr_nv4si_uus (__a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vrshrq_n_u64 (uint64x2_t __a, const int __b) { - return (uint64x2_t) __builtin_aarch64_urshr_nv2di ((int64x2_t) __a, __b); + return __builtin_aarch64_urshr_nv2di_uus (__a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) @@ -22823,7 +21627,7 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vrshrd_n_u64 (uint64x1_t __a, const int __b) { - return (uint64x1_t) __builtin_aarch64_urshr_ndi (__a, __b); + return __builtin_aarch64_urshr_ndi_uus (__a, __b); } /* vrsra */ @@ -22855,29 +21659,25 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vrsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) { - return (uint8x8_t) __builtin_aarch64_ursra_nv8qi ((int8x8_t) __a, - (int8x8_t) __b, __c); + return __builtin_aarch64_ursra_nv8qi_uuus (__a, __b, __c); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vrsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) { - return (uint16x4_t) __builtin_aarch64_ursra_nv4hi ((int16x4_t) __a, - (int16x4_t) __b, __c); + return __builtin_aarch64_ursra_nv4hi_uuus (__a, __b, __c); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vrsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) { - return (uint32x2_t) __builtin_aarch64_ursra_nv2si ((int32x2_t) __a, - (int32x2_t) __b, __c); + return __builtin_aarch64_ursra_nv2si_uuus (__a, __b, __c); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vrsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) { - return (uint64x1_t) __builtin_aarch64_ursra_ndi ((int64x1_t) __a, - (int64x1_t) __b, __c); + return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c); } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) @@ -22907,29 +21707,25 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vrsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) { - return (uint8x16_t) __builtin_aarch64_ursra_nv16qi ((int8x16_t) __a, - (int8x16_t) __b, __c); + return __builtin_aarch64_ursra_nv16qi_uuus (__a, __b, __c); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vrsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) { - return (uint16x8_t) __builtin_aarch64_ursra_nv8hi ((int16x8_t) __a, - (int16x8_t) __b, __c); + return __builtin_aarch64_ursra_nv8hi_uuus (__a, __b, __c); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vrsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) { - return (uint32x4_t) __builtin_aarch64_ursra_nv4si ((int32x4_t) __a, - (int32x4_t) __b, __c); + return __builtin_aarch64_ursra_nv4si_uuus (__a, __b, __c); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vrsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) { - return (uint64x2_t) __builtin_aarch64_ursra_nv2di ((int64x2_t) __a, - (int64x2_t) __b, __c); + return __builtin_aarch64_ursra_nv2di_uuus (__a, __b, __c); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) @@ -22941,7 +21737,7 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vrsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) { - return (uint64x1_t) __builtin_aarch64_ursra_ndi (__a, __b, __c); + return __builtin_aarch64_ursra_ndi_uuus (__a, __b, __c); } #ifdef __ARM_FEATURE_CRYPTO @@ -23134,109 +21930,109 @@ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vshl_s8 (int8x8_t __a, int8x8_t __b) { - return (int8x8_t) __builtin_aarch64_sshlv8qi (__a, __b); + return __builtin_aarch64_sshlv8qi (__a, __b); } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vshl_s16 (int16x4_t __a, int16x4_t __b) { - return (int16x4_t) __builtin_aarch64_sshlv4hi (__a, __b); + return __builtin_aarch64_sshlv4hi (__a, __b); } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vshl_s32 (int32x2_t __a, int32x2_t __b) { - return (int32x2_t) __builtin_aarch64_sshlv2si (__a, __b); + return __builtin_aarch64_sshlv2si (__a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vshl_s64 (int64x1_t __a, int64x1_t __b) { - return (int64x1_t) __builtin_aarch64_sshldi (__a, __b); + return __builtin_aarch64_sshldi (__a, __b); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vshl_u8 (uint8x8_t __a, int8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_ushlv8qi ((int8x8_t) __a, __b); + return __builtin_aarch64_ushlv8qi_uus (__a, __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vshl_u16 (uint16x4_t __a, int16x4_t __b) { - return (uint16x4_t) __builtin_aarch64_ushlv4hi ((int16x4_t) __a, __b); + return __builtin_aarch64_ushlv4hi_uus (__a, __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vshl_u32 (uint32x2_t __a, int32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_ushlv2si ((int32x2_t) __a, __b); + return __builtin_aarch64_ushlv2si_uus (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vshl_u64 (uint64x1_t __a, int64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_ushldi ((int64x1_t) __a, __b); + return __builtin_aarch64_ushldi_uus (__a, __b); } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vshlq_s8 (int8x16_t __a, int8x16_t __b) { - return (int8x16_t) __builtin_aarch64_sshlv16qi (__a, __b); + return __builtin_aarch64_sshlv16qi (__a, __b); } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vshlq_s16 (int16x8_t __a, int16x8_t __b) { - return (int16x8_t) __builtin_aarch64_sshlv8hi (__a, __b); + return __builtin_aarch64_sshlv8hi (__a, __b); } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vshlq_s32 (int32x4_t __a, int32x4_t __b) { - return (int32x4_t) __builtin_aarch64_sshlv4si (__a, __b); + return __builtin_aarch64_sshlv4si (__a, __b); } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vshlq_s64 (int64x2_t __a, int64x2_t __b) { - return (int64x2_t) __builtin_aarch64_sshlv2di (__a, __b); + return __builtin_aarch64_sshlv2di (__a, __b); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vshlq_u8 (uint8x16_t __a, int8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_ushlv16qi ((int8x16_t) __a, __b); + return __builtin_aarch64_ushlv16qi_uus (__a, __b); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vshlq_u16 (uint16x8_t __a, int16x8_t __b) { - return (uint16x8_t) __builtin_aarch64_ushlv8hi ((int16x8_t) __a, __b); + return __builtin_aarch64_ushlv8hi_uus (__a, __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vshlq_u32 (uint32x4_t __a, int32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_ushlv4si ((int32x4_t) __a, __b); + return __builtin_aarch64_ushlv4si_uus (__a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vshlq_u64 (uint64x2_t __a, int64x2_t __b) { - return (uint64x2_t) __builtin_aarch64_ushlv2di ((int64x2_t) __a, __b); + return __builtin_aarch64_ushlv2di_uus (__a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vshld_s64 (int64x1_t __a, int64x1_t __b) { - return (int64x1_t) __builtin_aarch64_sshldi (__a, __b); + return __builtin_aarch64_sshldi (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vshld_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_ushldi (__a, __b); + return __builtin_aarch64_ushldi_uus (__a, __b); } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) @@ -23296,19 +22092,19 @@ __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vshll_n_u8 (uint8x8_t __a, const int __b) { - return (uint16x8_t) __builtin_aarch64_ushll_nv8qi ((int8x8_t) __a, __b); + return __builtin_aarch64_ushll_nv8qi_uus (__a, __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vshll_n_u16 (uint16x4_t __a, const int __b) { - return (uint32x4_t) __builtin_aarch64_ushll_nv4hi ((int16x4_t) __a, __b); + return __builtin_aarch64_ushll_nv4hi_uus (__a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vshll_n_u32 (uint32x2_t __a, const int __b) { - return (uint64x2_t) __builtin_aarch64_ushll_nv2si ((int32x2_t) __a, __b); + return __builtin_aarch64_ushll_nv2si_uus (__a, __b); } /* vshr */ @@ -23450,29 +22246,25 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vsli_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) { - return (uint8x8_t) __builtin_aarch64_usli_nv8qi ((int8x8_t) __a, - (int8x8_t) __b, __c); + return __builtin_aarch64_usli_nv8qi_uuus (__a, __b, __c); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vsli_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) { - return (uint16x4_t) __builtin_aarch64_usli_nv4hi ((int16x4_t) __a, - (int16x4_t) __b, __c); + return __builtin_aarch64_usli_nv4hi_uuus (__a, __b, __c); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vsli_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) { - return (uint32x2_t) __builtin_aarch64_usli_nv2si ((int32x2_t) __a, - (int32x2_t) __b, __c); + return __builtin_aarch64_usli_nv2si_uuus (__a, __b, __c); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vsli_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) { - return (uint64x1_t) __builtin_aarch64_usli_ndi ((int64x1_t) __a, - (int64x1_t) __b, __c); + return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c); } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) @@ -23502,29 +22294,25 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vsliq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) { - return (uint8x16_t) __builtin_aarch64_usli_nv16qi ((int8x16_t) __a, - (int8x16_t) __b, __c); + return __builtin_aarch64_usli_nv16qi_uuus (__a, __b, __c); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vsliq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) { - return (uint16x8_t) __builtin_aarch64_usli_nv8hi ((int16x8_t) __a, - (int16x8_t) __b, __c); + return __builtin_aarch64_usli_nv8hi_uuus (__a, __b, __c); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vsliq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) { - return (uint32x4_t) __builtin_aarch64_usli_nv4si ((int32x4_t) __a, - (int32x4_t) __b, __c); + return __builtin_aarch64_usli_nv4si_uuus (__a, __b, __c); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vsliq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) { - return (uint64x2_t) __builtin_aarch64_usli_nv2di ((int64x2_t) __a, - (int64x2_t) __b, __c); + return __builtin_aarch64_usli_nv2di_uuus (__a, __b, __c); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) @@ -23536,7 +22324,7 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vslid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) { - return (uint64x1_t) __builtin_aarch64_usli_ndi (__a, __b, __c); + return __builtin_aarch64_usli_ndi_uuus (__a, __b, __c); } /* vsqadd */ @@ -23544,80 +22332,73 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vsqadd_u8 (uint8x8_t __a, int8x8_t __b) { - return (uint8x8_t) __builtin_aarch64_usqaddv8qi ((int8x8_t) __a, - (int8x8_t) __b); + return __builtin_aarch64_usqaddv8qi_uus (__a, __b); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vsqadd_u16 (uint16x4_t __a, int16x4_t __b) { - return (uint16x4_t) __builtin_aarch64_usqaddv4hi ((int16x4_t) __a, - (int16x4_t) __b); + return __builtin_aarch64_usqaddv4hi_uus (__a, __b); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vsqadd_u32 (uint32x2_t __a, int32x2_t __b) { - return (uint32x2_t) __builtin_aarch64_usqaddv2si ((int32x2_t) __a, - (int32x2_t) __b); + return __builtin_aarch64_usqaddv2si_uus (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vsqadd_u64 (uint64x1_t __a, int64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b); + return __builtin_aarch64_usqadddi_uus (__a, __b); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vsqaddq_u8 (uint8x16_t __a, int8x16_t __b) { - return (uint8x16_t) __builtin_aarch64_usqaddv16qi ((int8x16_t) __a, - (int8x16_t) __b); + return __builtin_aarch64_usqaddv16qi_uus (__a, __b); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vsqaddq_u16 (uint16x8_t __a, int16x8_t __b) { - return (uint16x8_t) __builtin_aarch64_usqaddv8hi ((int16x8_t) __a, - (int16x8_t) __b); + return __builtin_aarch64_usqaddv8hi_uus (__a, __b); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vsqaddq_u32 (uint32x4_t __a, int32x4_t __b) { - return (uint32x4_t) __builtin_aarch64_usqaddv4si ((int32x4_t) __a, - (int32x4_t) __b); + return __builtin_aarch64_usqaddv4si_uus (__a, __b); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vsqaddq_u64 (uint64x2_t __a, int64x2_t __b) { - return (uint64x2_t) __builtin_aarch64_usqaddv2di ((int64x2_t) __a, - (int64x2_t) __b); + return __builtin_aarch64_usqaddv2di_uus (__a, __b); } __extension__ static __inline uint8x1_t __attribute__ ((__always_inline__)) vsqaddb_u8 (uint8x1_t __a, int8x1_t __b) { - return (uint8x1_t) __builtin_aarch64_usqaddqi ((int8x1_t) __a, __b); + return __builtin_aarch64_usqaddqi_uus (__a, __b); } __extension__ static __inline uint16x1_t __attribute__ ((__always_inline__)) vsqaddh_u16 (uint16x1_t __a, int16x1_t __b) { - return (uint16x1_t) __builtin_aarch64_usqaddhi ((int16x1_t) __a, __b); + return __builtin_aarch64_usqaddhi_uus (__a, __b); } __extension__ static __inline uint32x1_t __attribute__ ((__always_inline__)) vsqadds_u32 (uint32x1_t __a, int32x1_t __b) { - return (uint32x1_t) __builtin_aarch64_usqaddsi ((int32x1_t) __a, __b); + return __builtin_aarch64_usqaddsi_uus (__a, __b); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vsqaddd_u64 (uint64x1_t __a, int64x1_t __b) { - return (uint64x1_t) __builtin_aarch64_usqadddi ((int64x1_t) __a, __b); + return __builtin_aarch64_usqadddi_uus (__a, __b); } /* vsqrt */ @@ -23668,29 +22449,25 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vsra_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) { - return (uint8x8_t) __builtin_aarch64_usra_nv8qi ((int8x8_t) __a, - (int8x8_t) __b, __c); + return __builtin_aarch64_usra_nv8qi_uuus (__a, __b, __c); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vsra_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) { - return (uint16x4_t) __builtin_aarch64_usra_nv4hi ((int16x4_t) __a, - (int16x4_t) __b, __c); + return __builtin_aarch64_usra_nv4hi_uuus (__a, __b, __c); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vsra_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) { - return (uint32x2_t) __builtin_aarch64_usra_nv2si ((int32x2_t) __a, - (int32x2_t) __b, __c); + return __builtin_aarch64_usra_nv2si_uuus (__a, __b, __c); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vsra_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) { - return (uint64x1_t) __builtin_aarch64_usra_ndi ((int64x1_t) __a, - (int64x1_t) __b, __c); + return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c); } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) @@ -23720,29 +22497,25 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vsraq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) { - return (uint8x16_t) __builtin_aarch64_usra_nv16qi ((int8x16_t) __a, - (int8x16_t) __b, __c); + return __builtin_aarch64_usra_nv16qi_uuus (__a, __b, __c); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vsraq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) { - return (uint16x8_t) __builtin_aarch64_usra_nv8hi ((int16x8_t) __a, - (int16x8_t) __b, __c); + return __builtin_aarch64_usra_nv8hi_uuus (__a, __b, __c); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vsraq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) { - return (uint32x4_t) __builtin_aarch64_usra_nv4si ((int32x4_t) __a, - (int32x4_t) __b, __c); + return __builtin_aarch64_usra_nv4si_uuus (__a, __b, __c); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vsraq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) { - return (uint64x2_t) __builtin_aarch64_usra_nv2di ((int64x2_t) __a, - (int64x2_t) __b, __c); + return __builtin_aarch64_usra_nv2di_uuus (__a, __b, __c); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) @@ -23754,7 +22527,7 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vsrad_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) { - return (uint64x1_t) __builtin_aarch64_usra_ndi (__a, __b, __c); + return __builtin_aarch64_usra_ndi_uuus (__a, __b, __c); } /* vsri */ @@ -23786,29 +22559,25 @@ __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vsri_n_u8 (uint8x8_t __a, uint8x8_t __b, const int __c) { - return (uint8x8_t) __builtin_aarch64_usri_nv8qi ((int8x8_t) __a, - (int8x8_t) __b, __c); + return __builtin_aarch64_usri_nv8qi_uuus (__a, __b, __c); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vsri_n_u16 (uint16x4_t __a, uint16x4_t __b, const int __c) { - return (uint16x4_t) __builtin_aarch64_usri_nv4hi ((int16x4_t) __a, - (int16x4_t) __b, __c); + return __builtin_aarch64_usri_nv4hi_uuus (__a, __b, __c); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vsri_n_u32 (uint32x2_t __a, uint32x2_t __b, const int __c) { - return (uint32x2_t) __builtin_aarch64_usri_nv2si ((int32x2_t) __a, - (int32x2_t) __b, __c); + return __builtin_aarch64_usri_nv2si_uuus (__a, __b, __c); } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vsri_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) { - return (uint64x1_t) __builtin_aarch64_usri_ndi ((int64x1_t) __a, - (int64x1_t) __b, __c); + return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c); } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) @@ -23838,29 +22607,25 @@ __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vsriq_n_u8 (uint8x16_t __a, uint8x16_t __b, const int __c) { - return (uint8x16_t) __builtin_aarch64_usri_nv16qi ((int8x16_t) __a, - (int8x16_t) __b, __c); + return __builtin_aarch64_usri_nv16qi_uuus (__a, __b, __c); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vsriq_n_u16 (uint16x8_t __a, uint16x8_t __b, const int __c) { - return (uint16x8_t) __builtin_aarch64_usri_nv8hi ((int16x8_t) __a, - (int16x8_t) __b, __c); + return __builtin_aarch64_usri_nv8hi_uuus (__a, __b, __c); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vsriq_n_u32 (uint32x4_t __a, uint32x4_t __b, const int __c) { - return (uint32x4_t) __builtin_aarch64_usri_nv4si ((int32x4_t) __a, - (int32x4_t) __b, __c); + return __builtin_aarch64_usri_nv4si_uuus (__a, __b, __c); } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vsriq_n_u64 (uint64x2_t __a, uint64x2_t __b, const int __c) { - return (uint64x2_t) __builtin_aarch64_usri_nv2di ((int64x2_t) __a, - (int64x2_t) __b, __c); + return __builtin_aarch64_usri_nv2di_uuus (__a, __b, __c); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) @@ -23872,7 +22637,7 @@ __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vsrid_n_u64 (uint64x1_t __a, uint64x1_t __b, const int __c) { - return (uint64x1_t) __builtin_aarch64_usri_ndi (__a, __b, __c); + return __builtin_aarch64_usri_ndi_uuus (__a, __b, __c); } /* vst1 */ @@ -24976,6 +23741,438 @@ /* vtrn */ +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vtrn1_f32 (float32x2_t __a, float32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); +#endif +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtrn1_p8 (poly8x8_t __a, poly8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); +#endif +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vtrn1_p16 (poly16x4_t __a, poly16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6}); +#endif +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtrn1_s8 (int8x8_t __a, int8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); +#endif +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vtrn1_s16 (int16x4_t __a, int16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6}); +#endif +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vtrn1_s32 (int32x2_t __a, int32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); +#endif +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtrn1_u8 (uint8x8_t __a, uint8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); +#endif +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vtrn1_u16 (uint16x4_t __a, uint16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 1, 7, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 2, 6}); +#endif +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vtrn1_u32 (uint32x2_t __a, uint32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); +#endif +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vtrn1q_f32 (float32x4_t __a, float32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6}); +#endif +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vtrn1q_f64 (float64x2_t __a, float64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); +#endif +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vtrn1q_p8 (poly8x16_t __a, poly8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, + (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15}); +#else + return __builtin_shuffle (__a, __b, + (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}); +#endif +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vtrn1q_p16 (poly16x8_t __a, poly16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); +#endif +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vtrn1q_s8 (int8x16_t __a, int8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, + (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15}); +#else + return __builtin_shuffle (__a, __b, + (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}); +#endif +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vtrn1q_s16 (int16x8_t __a, int16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); +#endif +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vtrn1q_s32 (int32x4_t __a, int32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6}); +#endif +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vtrn1q_s64 (int64x2_t __a, int64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); +#endif +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vtrn1q_u8 (uint8x16_t __a, uint8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, + (uint8x16_t) {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15}); +#else + return __builtin_shuffle (__a, __b, + (uint8x16_t) {0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30}); +#endif +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vtrn1q_u16 (uint16x8_t __a, uint16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 1, 11, 3, 13, 5, 15, 7}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 2, 10, 4, 12, 6, 14}); +#endif +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vtrn1q_u32 (uint32x4_t __a, uint32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 1, 7, 3}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 2, 6}); +#endif +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vtrn1q_u64 (uint64x2_t __a, uint64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); +#endif +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vtrn2_f32 (float32x2_t __a, float32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); +#endif +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vtrn2_p8 (poly8x8_t __a, poly8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); +#endif +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vtrn2_p16 (poly16x4_t __a, poly16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7}); +#endif +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vtrn2_s8 (int8x8_t __a, int8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); +#endif +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vtrn2_s16 (int16x4_t __a, int16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7}); +#endif +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vtrn2_s32 (int32x2_t __a, int32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); +#endif +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vtrn2_u8 (uint8x8_t __a, uint8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); +#endif +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vtrn2_u16 (uint16x4_t __a, uint16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 6, 2}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 5, 3, 7}); +#endif +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vtrn2_u32 (uint32x2_t __a, uint32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); +#endif +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vtrn2q_f32 (float32x4_t __a, float32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7}); +#endif +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vtrn2q_f64 (float64x2_t __a, float64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); +#endif +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vtrn2q_p8 (poly8x16_t __a, poly8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, + (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14}); +#else + return __builtin_shuffle (__a, __b, + (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}); +#endif +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vtrn2q_p16 (poly16x8_t __a, poly16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); +#endif +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vtrn2q_s8 (int8x16_t __a, int8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, + (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14}); +#else + return __builtin_shuffle (__a, __b, + (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}); +#endif +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vtrn2q_s16 (int16x8_t __a, int16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); +#endif +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vtrn2q_s32 (int32x4_t __a, int32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7}); +#endif +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vtrn2q_s64 (int64x2_t __a, int64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); +#endif +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vtrn2q_u8 (uint8x16_t __a, uint8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, + (uint8x16_t) {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14}); +#else + return __builtin_shuffle (__a, __b, + (uint8x16_t) {1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31}); +#endif +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vtrn2q_u16 (uint16x8_t __a, uint16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 10, 2, 12, 4, 14, 6}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 9, 3, 11, 5, 13, 7, 15}); +#endif +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vtrn2q_u32 (uint32x4_t __a, uint32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 6, 2}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 5, 3, 7}); +#endif +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vtrn2q_u64 (uint64x2_t __a, uint64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); +#endif +} + __extension__ static __inline float32x2x2_t __attribute__ ((__always_inline__)) vtrn_f32 (float32x2_t a, float32x2_t b) { @@ -25206,73 +24403,73 @@ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vuqadd_s8 (int8x8_t __a, uint8x8_t __b) { - return (int8x8_t) __builtin_aarch64_suqaddv8qi (__a, (int8x8_t) __b); + return __builtin_aarch64_suqaddv8qi_ssu (__a, __b); } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vuqadd_s16 (int16x4_t __a, uint16x4_t __b) { - return (int16x4_t) __builtin_aarch64_suqaddv4hi (__a, (int16x4_t) __b); + return __builtin_aarch64_suqaddv4hi_ssu (__a, __b); } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vuqadd_s32 (int32x2_t __a, uint32x2_t __b) { - return (int32x2_t) __builtin_aarch64_suqaddv2si (__a, (int32x2_t) __b); + return __builtin_aarch64_suqaddv2si_ssu (__a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vuqadd_s64 (int64x1_t __a, uint64x1_t __b) { - return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b); + return __builtin_aarch64_suqadddi_ssu (__a, __b); } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vuqaddq_s8 (int8x16_t __a, uint8x16_t __b) { - return (int8x16_t) __builtin_aarch64_suqaddv16qi (__a, (int8x16_t) __b); + return __builtin_aarch64_suqaddv16qi_ssu (__a, __b); } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vuqaddq_s16 (int16x8_t __a, uint16x8_t __b) { - return (int16x8_t) __builtin_aarch64_suqaddv8hi (__a, (int16x8_t) __b); + return __builtin_aarch64_suqaddv8hi_ssu (__a, __b); } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vuqaddq_s32 (int32x4_t __a, uint32x4_t __b) { - return (int32x4_t) __builtin_aarch64_suqaddv4si (__a, (int32x4_t) __b); + return __builtin_aarch64_suqaddv4si_ssu (__a, __b); } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vuqaddq_s64 (int64x2_t __a, uint64x2_t __b) { - return (int64x2_t) __builtin_aarch64_suqaddv2di (__a, (int64x2_t) __b); + return __builtin_aarch64_suqaddv2di_ssu (__a, __b); } __extension__ static __inline int8x1_t __attribute__ ((__always_inline__)) vuqaddb_s8 (int8x1_t __a, uint8x1_t __b) { - return (int8x1_t) __builtin_aarch64_suqaddqi (__a, (int8x1_t) __b); + return __builtin_aarch64_suqaddqi_ssu (__a, __b); } __extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) vuqaddh_s16 (int16x1_t __a, uint16x1_t __b) { - return (int16x1_t) __builtin_aarch64_suqaddhi (__a, (int16x1_t) __b); + return __builtin_aarch64_suqaddhi_ssu (__a, __b); } __extension__ static __inline int32x1_t __attribute__ ((__always_inline__)) vuqadds_s32 (int32x1_t __a, uint32x1_t __b) { - return (int32x1_t) __builtin_aarch64_suqaddsi (__a, (int32x1_t) __b); + return __builtin_aarch64_suqaddsi_ssu (__a, __b); } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vuqaddd_s64 (int64x1_t __a, uint64x1_t __b) { - return (int64x1_t) __builtin_aarch64_suqadddi (__a, (int64x1_t) __b); + return __builtin_aarch64_suqadddi_ssu (__a, __b); } #define __DEFINTERLEAVE(op, rettype, intype, funcsuffix, Q) \ @@ -25306,10 +24503,880 @@ /* vuzp */ +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vuzp1_f32 (float32x2_t __a, float32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); +#endif +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vuzp1_p8 (poly8x8_t __a, poly8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); +#endif +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vuzp1_p16 (poly16x4_t __a, poly16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6}); +#endif +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vuzp1_s8 (int8x8_t __a, int8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); +#endif +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vuzp1_s16 (int16x4_t __a, int16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6}); +#endif +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vuzp1_s32 (int32x2_t __a, int32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); +#endif +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vuzp1_u8 (uint8x8_t __a, uint8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); +#endif +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vuzp1_u16 (uint16x4_t __a, uint16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {5, 7, 1, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 2, 4, 6}); +#endif +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vuzp1_u32 (uint32x2_t __a, uint32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); +#endif +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vuzp1q_f32 (float32x4_t __a, float32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6}); +#endif +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vuzp1q_f64 (float64x2_t __a, float64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); +#endif +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vuzp1q_p8 (poly8x16_t __a, poly8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x16_t) + {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}); +#endif +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vuzp1q_p16 (poly16x8_t __a, poly16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); +#endif +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vuzp1q_s8 (int8x16_t __a, int8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, + (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15}); +#else + return __builtin_shuffle (__a, __b, + (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}); +#endif +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vuzp1q_s16 (int16x8_t __a, int16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); +#endif +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vuzp1q_s32 (int32x4_t __a, int32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6}); +#endif +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vuzp1q_s64 (int64x2_t __a, int64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); +#endif +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vuzp1q_u8 (uint8x16_t __a, uint8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, + (uint8x16_t) {17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15}); +#else + return __builtin_shuffle (__a, __b, + (uint8x16_t) {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}); +#endif +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vuzp1q_u16 (uint16x8_t __a, uint16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {9, 11, 13, 15, 1, 3, 5, 7}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 2, 4, 6, 8, 10, 12, 14}); +#endif +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vuzp1q_u32 (uint32x4_t __a, uint32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {5, 7, 1, 3}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 2, 4, 6}); +#endif +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vuzp1q_u64 (uint64x2_t __a, uint64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); +#endif +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vuzp2_f32 (float32x2_t __a, float32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); +#endif +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vuzp2_p8 (poly8x8_t __a, poly8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); +#endif +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vuzp2_p16 (poly16x4_t __a, poly16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7}); +#endif +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vuzp2_s8 (int8x8_t __a, int8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); +#endif +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vuzp2_s16 (int16x4_t __a, int16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7}); +#endif +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vuzp2_s32 (int32x2_t __a, int32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); +#endif +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vuzp2_u8 (uint8x8_t __a, uint8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); +#endif +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vuzp2_u16 (uint16x4_t __a, uint16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 6, 0, 2}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {1, 3, 5, 7}); +#endif +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vuzp2_u32 (uint32x2_t __a, uint32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); +#endif +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vuzp2q_f32 (float32x4_t __a, float32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7}); +#endif +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vuzp2q_f64 (float64x2_t __a, float64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); +#endif +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vuzp2q_p8 (poly8x16_t __a, poly8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, + (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14}); +#else + return __builtin_shuffle (__a, __b, + (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}); +#endif +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vuzp2q_p16 (poly16x8_t __a, poly16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); +#endif +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vuzp2q_s8 (int8x16_t __a, int8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, + (uint8x16_t) {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14}); +#else + return __builtin_shuffle (__a, __b, + (uint8x16_t) {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}); +#endif +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vuzp2q_s16 (int16x8_t __a, int16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); +#endif +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vuzp2q_s32 (int32x4_t __a, int32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7}); +#endif +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vuzp2q_s64 (int64x2_t __a, int64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); +#endif +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vuzp2q_u8 (uint8x16_t __a, uint8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x16_t) + {16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}); +#endif +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vuzp2q_u16 (uint16x8_t __a, uint16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 10, 12, 14, 0, 2, 4, 6}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {1, 3, 5, 7, 9, 11, 13, 15}); +#endif +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vuzp2q_u32 (uint32x4_t __a, uint32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 6, 0, 2}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {1, 3, 5, 7}); +#endif +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vuzp2q_u64 (uint64x2_t __a, uint64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); +#endif +} + __INTERLEAVE_LIST (uzp) /* vzip */ +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vzip1_f32 (float32x2_t __a, float32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); +#endif +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vzip1_p8 (poly8x8_t __a, poly8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); +#endif +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vzip1_p16 (poly16x4_t __a, poly16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5}); +#endif +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vzip1_s8 (int8x8_t __a, int8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); +#endif +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vzip1_s16 (int16x4_t __a, int16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5}); +#endif +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vzip1_s32 (int32x2_t __a, int32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); +#endif +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vzip1_u8 (uint8x8_t __a, uint8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {12, 4, 13, 5, 14, 6, 15, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); +#endif +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vzip1_u16 (uint16x4_t __a, uint16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {6, 2, 7, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {0, 4, 1, 5}); +#endif +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vzip1_u32 (uint32x2_t __a, uint32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {0, 2}); +#endif +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vzip1q_f32 (float32x4_t __a, float32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5}); +#endif +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vzip1q_f64 (float64x2_t __a, float64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); +#endif +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vzip1q_p8 (poly8x16_t __a, poly8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x16_t) + {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}); +#endif +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vzip1q_p16 (poly16x8_t __a, poly16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) + {12, 4, 13, 5, 14, 6, 15, 7}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); +#endif +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vzip1q_s8 (int8x16_t __a, int8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x16_t) + {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}); +#endif +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vzip1q_s16 (int16x8_t __a, int16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) + {12, 4, 13, 5, 14, 6, 15, 7}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); +#endif +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vzip1q_s32 (int32x4_t __a, int32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5}); +#endif +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vzip1q_s64 (int64x2_t __a, int64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); +#endif +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vzip1q_u8 (uint8x16_t __a, uint8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x16_t) + {24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}); +#endif +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vzip1q_u16 (uint16x8_t __a, uint16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) + {12, 4, 13, 5, 14, 6, 15, 7}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) {0, 8, 1, 9, 2, 10, 3, 11}); +#endif +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vzip1q_u32 (uint32x4_t __a, uint32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {6, 2, 7, 3}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {0, 4, 1, 5}); +#endif +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vzip1q_u64 (uint64x2_t __a, uint64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {3, 1}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {0, 2}); +#endif +} + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vzip2_f32 (float32x2_t __a, float32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); +#endif +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vzip2_p8 (poly8x8_t __a, poly8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15}); +#endif +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vzip2_p16 (poly16x4_t __a, poly16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7}); +#endif +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vzip2_s8 (int8x8_t __a, int8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15}); +#endif +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vzip2_s16 (int16x4_t __a, int16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7}); +#endif +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vzip2_s32 (int32x2_t __a, int32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); +#endif +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vzip2_u8 (uint8x8_t __a, uint8x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); +#else + return __builtin_shuffle (__a, __b, (uint8x8_t) {4, 12, 5, 13, 6, 14, 7, 15}); +#endif +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vzip2_u16 (uint16x4_t __a, uint16x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x4_t) {4, 0, 5, 1}); +#else + return __builtin_shuffle (__a, __b, (uint16x4_t) {2, 6, 3, 7}); +#endif +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vzip2_u32 (uint32x2_t __a, uint32x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint32x2_t) {1, 3}); +#endif +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vzip2q_f32 (float32x4_t __a, float32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7}); +#endif +} + +__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) +vzip2q_f64 (float64x2_t __a, float64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); +#endif +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vzip2q_p8 (poly8x16_t __a, poly8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x16_t) + {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); +#endif +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vzip2q_p16 (poly16x8_t __a, poly16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) + {4, 12, 5, 13, 6, 14, 7, 15}); +#endif +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vzip2q_s8 (int8x16_t __a, int8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x16_t) + {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); +#endif +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vzip2q_s16 (int16x8_t __a, int16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) + {4, 12, 5, 13, 6, 14, 7, 15}); +#endif +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vzip2q_s32 (int32x4_t __a, int32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7}); +#endif +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vzip2q_s64 (int64x2_t __a, int64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); +#endif +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vzip2q_u8 (uint8x16_t __a, uint8x16_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint8x16_t) + {16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7}); +#else + return __builtin_shuffle (__a, __b, (uint8x16_t) + {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); +#endif +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vzip2q_u16 (uint16x8_t __a, uint16x8_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint16x8_t) {8, 0, 9, 1, 10, 2, 11, 3}); +#else + return __builtin_shuffle (__a, __b, (uint16x8_t) + {4, 12, 5, 13, 6, 14, 7, 15}); +#endif +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vzip2q_u32 (uint32x4_t __a, uint32x4_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint32x4_t) {4, 0, 5, 1}); +#else + return __builtin_shuffle (__a, __b, (uint32x4_t) {2, 6, 3, 7}); +#endif +} + +__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) +vzip2q_u64 (uint64x2_t __a, uint64x2_t __b) +{ +#ifdef __AARCH64EB__ + return __builtin_shuffle (__a, __b, (uint64x2_t) {2, 0}); +#else + return __builtin_shuffle (__a, __b, (uint64x2_t) {1, 3}); +#endif +} + __INTERLEAVE_LIST (zip) #undef __INTERLEAVE_LIST --- a/src/gcc/config/aarch64/t-aarch64-linux +++ b/src/gcc/config/aarch64/t-aarch64-linux @@ -22,10 +22,7 @@ LIB1ASMFUNCS = _aarch64_sync_cache_range AARCH_BE = $(if $(findstring TARGET_BIG_ENDIAN_DEFAULT=1, $(tm_defines)),_be) -MULTILIB_OSDIRNAMES = .=../lib64$(call if_multiarch,:aarch64$(AARCH_BE)-linux-gnu) +MULTILIB_OSDIRNAMES = mabi.lp64=../lib64$(call if_multiarch,:aarch64$(AARCH_BE)-linux-gnu) MULTIARCH_DIRNAME = $(call if_multiarch,aarch64$(AARCH_BE)-linux-gnu) -# Disable the multilib for linux-gnu targets for the time being; focus -# on the baremetal targets. -MULTILIB_OPTIONS = -MULTILIB_DIRNAMES = +MULTILIB_OSDIRNAMES += mabi.ilp32=../libilp32 --- a/src/gcc/config/aarch64/aarch64.md +++ b/src/gcc/config/aarch64/aarch64.md @@ -68,6 +68,14 @@ (define_c_enum "unspec" [ UNSPEC_CASESI UNSPEC_CLS + UNSPEC_CRC32B + UNSPEC_CRC32CB + UNSPEC_CRC32CH + UNSPEC_CRC32CW + UNSPEC_CRC32CX + UNSPEC_CRC32H + UNSPEC_CRC32W + UNSPEC_CRC32X UNSPEC_FRECPE UNSPEC_FRECPS UNSPEC_FRECPX @@ -98,15 +106,24 @@ UNSPEC_ST2 UNSPEC_ST3 UNSPEC_ST4 + UNSPEC_ST2_LANE + UNSPEC_ST3_LANE + UNSPEC_ST4_LANE UNSPEC_TLS UNSPEC_TLSDESC UNSPEC_USHL_2S UNSPEC_USHR64 UNSPEC_VSTRUCTDUMMY + UNSPEC_SP_SET + UNSPEC_SP_TEST ]) (define_c_enum "unspecv" [ UNSPECV_EH_RETURN ; Represent EH_RETURN + UNSPECV_GET_FPCR ; Represent fetch of FPCR content. + UNSPECV_SET_FPCR ; Represent assign of FPCR content. + UNSPECV_GET_FPSR ; Represent fetch of FPSR content. + UNSPECV_SET_FPSR ; Represent assign of FPSR content. ] ) @@ -514,6 +531,10 @@ (use (match_operand 2 "" ""))])] "" { + if (!REG_P (XEXP (operands[0], 0)) + && (GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF)) + XEXP (operands[0], 0) = force_reg (Pmode, XEXP (operands[0], 0)); + if (operands[2] == NULL_RTX) operands[2] = const0_rtx; } @@ -527,6 +548,10 @@ (use (match_operand 3 "" ""))])] "" { + if (!REG_P (XEXP (operands[1], 0)) + && (GET_CODE (XEXP (operands[1], 0)) != SYMBOL_REF)) + XEXP (operands[1], 0) = force_reg (Pmode, XEXP (operands[1], 0)); + if (operands[3] == NULL_RTX) operands[3] = const0_rtx; } @@ -533,25 +558,28 @@ ) (define_insn "*sibcall_insn" - [(call (mem:DI (match_operand:DI 0 "" "X")) + [(call (mem:DI (match_operand:DI 0 "aarch64_call_insn_operand" "Ucs, Usf")) (match_operand 1 "" "")) (return) (use (match_operand 2 "" ""))] - "GET_CODE (operands[0]) == SYMBOL_REF" - "b\\t%a0" - [(set_attr "type" "branch")] - + "SIBLING_CALL_P (insn)" + "@ + br\\t%0 + b\\t%a0" + [(set_attr "type" "branch, branch")] ) (define_insn "*sibcall_value_insn" [(set (match_operand 0 "" "") - (call (mem:DI (match_operand 1 "" "X")) + (call (mem:DI (match_operand 1 "aarch64_call_insn_operand" "Ucs, Usf")) (match_operand 2 "" ""))) (return) (use (match_operand 3 "" ""))] - "GET_CODE (operands[1]) == SYMBOL_REF" - "b\\t%a1" - [(set_attr "type" "branch")] + "SIBLING_CALL_P (insn)" + "@ + br\\t%1 + b\\t%a1" + [(set_attr "type" "branch, branch")] ) ;; Call subroutine returning any type. @@ -669,7 +697,7 @@ fmov\\t%w0, %s1 fmov\\t%s0, %s1" [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\ - adr,adr,fmov,fmov,fmov") + adr,adr,f_mcr,f_mrc,fmov") (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")] ) @@ -694,7 +722,7 @@ fmov\\t%d0, %d1 movi\\t%d0, %1" [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,load1,load1,store1,store1,\ - adr,adr,fmov,fmov,fmov,fmov") + adr,adr,f_mcr,f_mrc,fmov,fmov") (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*") (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")] ) @@ -789,7 +817,7 @@ str\\t%w1, %0 mov\\t%w0, %w1" [(set_attr "type" "f_mcr,f_mrc,fmov,fconsts,\ - f_loads,f_stores,f_loads,f_stores,fmov")] + f_loads,f_stores,f_loads,f_stores,mov_reg")] ) (define_insn "*movdf_aarch64" @@ -863,6 +891,24 @@ } ) +;; 0 is dst +;; 1 is src +;; 2 is size of move in bytes +;; 3 is alignment + +(define_expand "movmemdi" + [(match_operand:BLK 0 "memory_operand") + (match_operand:BLK 1 "memory_operand") + (match_operand:DI 2 "immediate_operand") + (match_operand:DI 3 "immediate_operand")] + "!STRICT_ALIGNMENT" +{ + if (aarch64_expand_movmem (operands)) + DONE; + FAIL; +} +) + ;; Operands 1 and 3 are tied together by the final condition; so we allow ;; fairly lax checking on the second memory operation. (define_insn "load_pair" @@ -1063,16 +1109,18 @@ (define_insn "*addsi3_aarch64" [(set - (match_operand:SI 0 "register_operand" "=rk,rk,rk") + (match_operand:SI 0 "register_operand" "=rk,rk,w,rk") (plus:SI - (match_operand:SI 1 "register_operand" "%rk,rk,rk") - (match_operand:SI 2 "aarch64_plus_operand" "I,r,J")))] + (match_operand:SI 1 "register_operand" "%rk,rk,w,rk") + (match_operand:SI 2 "aarch64_plus_operand" "I,r,w,J")))] "" "@ add\\t%w0, %w1, %2 add\\t%w0, %w1, %w2 + add\\t%0.2s, %1.2s, %2.2s sub\\t%w0, %w1, #%n2" - [(set_attr "type" "alu_imm,alu_reg,alu_imm")] + [(set_attr "type" "alu_imm,alu_reg,neon_add,alu_imm") + (set_attr "simd" "*,*,yes,*")] ) ;; zero_extend version of above @@ -1106,7 +1154,26 @@ (set_attr "simd" "*,*,*,yes")] ) -(define_insn "*add3_compare0" +(define_expand "addti3" + [(set (match_operand:TI 0 "register_operand" "") + (plus:TI (match_operand:TI 1 "register_operand" "") + (match_operand:TI 2 "register_operand" "")))] + "" +{ + rtx low = gen_reg_rtx (DImode); + emit_insn (gen_adddi3_compare0 (low, gen_lowpart (DImode, operands[1]), + gen_lowpart (DImode, operands[2]))); + + rtx high = gen_reg_rtx (DImode); + emit_insn (gen_adddi3_carryin (high, gen_highpart (DImode, operands[1]), + gen_highpart (DImode, operands[2]))); + + emit_move_insn (gen_lowpart (DImode, operands[0]), low); + emit_move_insn (gen_highpart (DImode, operands[0]), high); + DONE; +}) + +(define_insn "add3_compare0" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ (plus:GPI (match_operand:GPI 1 "register_operand" "%r,r,r") @@ -1390,7 +1457,7 @@ [(set_attr "type" "alu_ext")] ) -(define_insn "*add3_carryin" +(define_insn "add3_carryin" [(set (match_operand:GPI 0 "register_operand" "=r") (plus:GPI (geu:GPI (reg:CC CC_REGNUM) (const_int 0)) @@ -1558,8 +1625,26 @@ (set_attr "simd" "*,yes")] ) +(define_expand "subti3" + [(set (match_operand:TI 0 "register_operand" "") + (minus:TI (match_operand:TI 1 "register_operand" "") + (match_operand:TI 2 "register_operand" "")))] + "" +{ + rtx low = gen_reg_rtx (DImode); + emit_insn (gen_subdi3_compare0 (low, gen_lowpart (DImode, operands[1]), + gen_lowpart (DImode, operands[2]))); -(define_insn "*sub3_compare0" + rtx high = gen_reg_rtx (DImode); + emit_insn (gen_subdi3_carryin (high, gen_highpart (DImode, operands[1]), + gen_highpart (DImode, operands[2]))); + + emit_move_insn (gen_lowpart (DImode, operands[0]), low); + emit_move_insn (gen_highpart (DImode, operands[0]), high); + DONE; +}) + +(define_insn "sub3_compare0" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ (minus:GPI (match_operand:GPI 1 "register_operand" "r") (match_operand:GPI 2 "register_operand" "r")) @@ -1706,7 +1791,7 @@ [(set_attr "type" "alu_ext")] ) -(define_insn "*sub3_carryin" +(define_insn "sub3_carryin" [(set (match_operand:GPI 0 "register_operand" "=r") (minus:GPI (minus:GPI @@ -1935,7 +2020,7 @@ [(set_attr "type" "mul")] ) -(define_insn "*madd" +(define_insn "madd" [(set (match_operand:GPI 0 "register_operand" "=r") (plus:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r") (match_operand:GPI 2 "register_operand" "r")) @@ -2045,6 +2130,48 @@ [(set_attr "type" "mull")] ) +(define_expand "mulditi3" + [(set (match_operand:TI 0 "register_operand") + (mult:TI (ANY_EXTEND:TI (match_operand:DI 1 "register_operand")) + (ANY_EXTEND:TI (match_operand:DI 2 "register_operand"))))] + "" +{ + rtx low = gen_reg_rtx (DImode); + emit_insn (gen_muldi3 (low, operands[1], operands[2])); + + rtx high = gen_reg_rtx (DImode); + emit_insn (gen_muldi3_highpart (high, operands[1], operands[2])); + + emit_move_insn (gen_lowpart (DImode, operands[0]), low); + emit_move_insn (gen_highpart (DImode, operands[0]), high); + DONE; +}) + +;; The default expansion of multi3 using umuldi3_highpart will perform +;; the additions in an order that fails to combine into two madd insns. +(define_expand "multi3" + [(set (match_operand:TI 0 "register_operand") + (mult:TI (match_operand:TI 1 "register_operand") + (match_operand:TI 2 "register_operand")))] + "" +{ + rtx l0 = gen_reg_rtx (DImode); + rtx l1 = gen_lowpart (DImode, operands[1]); + rtx l2 = gen_lowpart (DImode, operands[2]); + rtx h0 = gen_reg_rtx (DImode); + rtx h1 = gen_highpart (DImode, operands[1]); + rtx h2 = gen_highpart (DImode, operands[2]); + + emit_insn (gen_muldi3 (l0, l1, l2)); + emit_insn (gen_umuldi3_highpart (h0, l1, l2)); + emit_insn (gen_madddi (h0, h1, l2, h0)); + emit_insn (gen_madddi (h0, l1, h2, h0)); + + emit_move_insn (gen_lowpart (DImode, operands[0]), l0); + emit_move_insn (gen_highpart (DImode, operands[0]), h0); + DONE; +}) + (define_insn "muldi3_highpart" [(set (match_operand:DI 0 "register_operand" "=r") (truncate:DI @@ -2345,6 +2472,42 @@ } ) +(define_expand "movcc" + [(set (match_operand:GPF 0 "register_operand" "") + (if_then_else:GPF (match_operand 1 "aarch64_comparison_operator" "") + (match_operand:GPF 2 "register_operand" "") + (match_operand:GPF 3 "register_operand" "")))] + "" + { + rtx ccreg; + enum rtx_code code = GET_CODE (operands[1]); + + if (code == UNEQ || code == LTGT) + FAIL; + + ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0), + XEXP (operands[1], 1)); + operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx); + } +) + + +;; CRC32 instructions. +(define_insn "aarch64_" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec:SI [(match_operand:SI 1 "register_operand" "r") + (match_operand: 2 "register_operand" "r")] + CRC))] + "TARGET_CRC32" + { + if (GET_MODE_BITSIZE (GET_MODE (operands[2])) >= 64) + return "\\t%w0, %w1, %x2"; + else + return "\\t%w0, %w1, %w2"; + } + [(set_attr "type" "crc")] +) + (define_insn "*csinc2_insn" [(set (match_operand:GPI 0 "register_operand" "=r") (plus:GPI (match_operator:GPI 2 "aarch64_comparison_operator" @@ -2486,7 +2649,18 @@ [(set_attr "type" "logic_shift_imm")] ) -;; zero_extend version of above +(define_insn "*_rol3" + [(set (match_operand:GPI 0 "register_operand" "=r") + (LOGICAL:GPI (rotate:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_" "n")) + (match_operand:GPI 3 "register_operand" "r")))] + "" + "\\t%0, %3, %1, ror ( - %2)" + [(set_attr "type" "logic_shift_imm")] +) + +;; zero_extend versions of above (define_insn "*_si3_uxtw" [(set (match_operand:DI 0 "register_operand" "=r") (zero_extend:DI @@ -2499,6 +2673,18 @@ [(set_attr "type" "logic_shift_imm")] ) +(define_insn "*_rolsi3_uxtw" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (LOGICAL:SI (rotate:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_si" "n")) + (match_operand:SI 3 "register_operand" "r"))))] + "" + "\\t%w0, %w3, %w1, ror (32 - %2)" + [(set_attr "type" "logic_shift_imm")] +) + (define_insn "one_cmpl2" [(set (match_operand:GPI 0 "register_operand" "=r") (not:GPI (match_operand:GPI 1 "register_operand" "r")))] @@ -3179,6 +3365,38 @@ [(set_attr "type" "rev")] ) +;; There are no canonicalisation rules for the position of the lshiftrt, ashift +;; operations within an IOR/AND RTX, therefore we have two patterns matching +;; each valid permutation. + +(define_insn "rev162" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r") + (const_int 8)) + (match_operand:GPI 3 "const_int_operand" "n")) + (and:GPI (lshiftrt:GPI (match_dup 1) + (const_int 8)) + (match_operand:GPI 2 "const_int_operand" "n"))))] + "aarch_rev16_shleft_mask_imm_p (operands[3], mode) + && aarch_rev16_shright_mask_imm_p (operands[2], mode)" + "rev16\\t%0, %1" + [(set_attr "type" "rev")] +) + +(define_insn "rev162_alt" + [(set (match_operand:GPI 0 "register_operand" "=r") + (ior:GPI (and:GPI (lshiftrt:GPI (match_operand:GPI 1 "register_operand" "r") + (const_int 8)) + (match_operand:GPI 2 "const_int_operand" "n")) + (and:GPI (ashift:GPI (match_dup 1) + (const_int 8)) + (match_operand:GPI 3 "const_int_operand" "n"))))] + "aarch_rev16_shleft_mask_imm_p (operands[3], mode) + && aarch_rev16_shright_mask_imm_p (operands[2], mode)" + "rev16\\t%0, %1" + [(set_attr "type" "rev")] +) + ;; zero_extend version of above (define_insn "*bswapsi2_uxtw" [(set (match_operand:DI 0 "register_operand" "=r") @@ -3193,7 +3411,7 @@ ;; ------------------------------------------------------------------- ;; frint floating-point round to integral standard patterns. -;; Expands to btrunc, ceil, floor, nearbyint, rint, round. +;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. (define_insn "2" [(set (match_operand:GPF 0 "register_operand" "=w") @@ -3489,7 +3707,7 @@ (truncate:DI (match_operand:TI 1 "register_operand" "w"))))] "reload_completed || reload_in_progress" "fmov\\t%d0, %d1" - [(set_attr "type" "f_mcr") + [(set_attr "type" "fmov") (set_attr "length" "4") ]) @@ -3587,36 +3805,63 @@ [(set_attr "type" "call") (set_attr "length" "16")]) -(define_insn "tlsie_small" - [(set (match_operand:DI 0 "register_operand" "=r") - (unspec:DI [(match_operand:DI 1 "aarch64_tls_ie_symref" "S")] +(define_insn "tlsie_small_" + [(set (match_operand:PTR 0 "register_operand" "=r") + (unspec:PTR [(match_operand 1 "aarch64_tls_ie_symref" "S")] UNSPEC_GOTSMALLTLS))] "" - "adrp\\t%0, %A1\;ldr\\t%0, [%0, #%L1]" + "adrp\\t%0, %A1\;ldr\\t%0, [%0, #%L1]" [(set_attr "type" "load1") (set_attr "length" "8")] ) -(define_insn "tlsle_small" +(define_insn "tlsie_small_sidi" [(set (match_operand:DI 0 "register_operand" "=r") - (unspec:DI [(match_operand:DI 1 "register_operand" "r") - (match_operand:DI 2 "aarch64_tls_le_symref" "S")] + (zero_extend:DI + (unspec:SI [(match_operand 1 "aarch64_tls_ie_symref" "S")] + UNSPEC_GOTSMALLTLS)))] + "" + "adrp\\t%0, %A1\;ldr\\t%w0, [%0, #%L1]" + [(set_attr "type" "load1") + (set_attr "length" "8")] +) + +(define_expand "tlsle_small" + [(set (match_operand 0 "register_operand" "=r") + (unspec [(match_operand 1 "register_operand" "r") + (match_operand 2 "aarch64_tls_le_symref" "S")] + UNSPEC_GOTSMALLTLS))] + "" +{ + enum machine_mode mode = GET_MODE (operands[0]); + emit_insn ((mode == DImode + ? gen_tlsle_small_di + : gen_tlsle_small_si) (operands[0], + operands[1], + operands[2])); + DONE; +}) + +(define_insn "tlsle_small_" + [(set (match_operand:P 0 "register_operand" "=r") + (unspec:P [(match_operand:P 1 "register_operand" "r") + (match_operand 2 "aarch64_tls_le_symref" "S")] UNSPEC_GOTSMALLTLS))] "" - "add\\t%0, %1, #%G2\;add\\t%0, %0, #%L2" + "add\\t%0, %1, #%G2\;add\\t%0, %0, #%L2" [(set_attr "type" "alu_reg") (set_attr "length" "8")] ) -(define_insn "tlsdesc_small" - [(set (reg:DI R0_REGNUM) - (unspec:DI [(match_operand:DI 0 "aarch64_valid_symref" "S")] +(define_insn "tlsdesc_small_" + [(set (reg:PTR R0_REGNUM) + (unspec:PTR [(match_operand 0 "aarch64_valid_symref" "S")] UNSPEC_TLSDESC)) (clobber (reg:DI LR_REGNUM)) (clobber (reg:CC CC_REGNUM)) (clobber (match_scratch:DI 1 "=r"))] "TARGET_TLS_DESC" - "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\tx0, x0, %L0\;.tlsdesccall\\t%0\;blr\\t%1" + "adrp\\tx0, %A0\;ldr\\t%1, [x0, #%L0]\;add\\t0, 0, %L0\;.tlsdesccall\\t%0\;blr\\t%1" [(set_attr "type" "call") (set_attr "length" "16")]) @@ -3641,6 +3886,98 @@ DONE; }) +;; Named patterns for stack smashing protection. +(define_expand "stack_protect_set" + [(match_operand 0 "memory_operand") + (match_operand 1 "memory_operand")] + "" +{ + enum machine_mode mode = GET_MODE (operands[0]); + + emit_insn ((mode == DImode + ? gen_stack_protect_set_di + : gen_stack_protect_set_si) (operands[0], operands[1])); + DONE; +}) + +(define_insn "stack_protect_set_" + [(set (match_operand:PTR 0 "memory_operand" "=m") + (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m")] + UNSPEC_SP_SET)) + (set (match_scratch:PTR 2 "=&r") (const_int 0))] + "" + "ldr\\t%2, %1\;str\\t%2, %0\;mov\t%2,0" + [(set_attr "length" "12") + (set_attr "type" "multiple")]) + +(define_expand "stack_protect_test" + [(match_operand 0 "memory_operand") + (match_operand 1 "memory_operand") + (match_operand 2)] + "" +{ + rtx result; + enum machine_mode mode = GET_MODE (operands[0]); + + result = gen_reg_rtx(mode); + + emit_insn ((mode == DImode + ? gen_stack_protect_test_di + : gen_stack_protect_test_si) (result, + operands[0], + operands[1])); + + if (mode == DImode) + emit_jump_insn (gen_cbranchdi4 (gen_rtx_EQ (VOIDmode, result, const0_rtx), + result, const0_rtx, operands[2])); + else + emit_jump_insn (gen_cbranchsi4 (gen_rtx_EQ (VOIDmode, result, const0_rtx), + result, const0_rtx, operands[2])); + DONE; +}) + +(define_insn "stack_protect_test_" + [(set (match_operand:PTR 0 "register_operand") + (unspec:PTR [(match_operand:PTR 1 "memory_operand" "m") + (match_operand:PTR 2 "memory_operand" "m")] + UNSPEC_SP_TEST)) + (clobber (match_scratch:PTR 3 "=&r"))] + "" + "ldr\t%3, %x1\;ldr\t%0, %x2\;eor\t%0, %3, %0" + [(set_attr "length" "12") + (set_attr "type" "multiple")]) + +;; Write Floating-point Control Register. +(define_insn "set_fpcr" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPCR)] + "" + "msr\\tfpcr, %0\;isb" + [(set_attr "type" "mrs")]) + +;; Read Floating-point Control Register. +(define_insn "get_fpcr" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPCR))] + "" + "mrs\\t%0, fpcr" + [(set_attr "type" "mrs")]) + +;; Write Floating-point Status Register. +(define_insn "set_fpsr" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] UNSPECV_SET_FPSR)] + "" + "msr\\tfpsr, %0" + [(set_attr "type" "mrs")]) + +;; Read Floating-point Status Register. +(define_insn "get_fpsr" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(const_int 0)] UNSPECV_GET_FPSR))] + "" + "mrs\\t%0, fpsr" + [(set_attr "type" "mrs")]) + + ;; AdvSIMD Stuff (include "aarch64-simd.md") --- a/src/gcc/config/aarch64/arm_acle.h +++ b/src/gcc/config/aarch64/arm_acle.h @@ -0,0 +1,90 @@ +/* AArch64 Non-NEON ACLE intrinsics include file. + + Copyright (C) 2014 Free Software Foundation, Inc. + Contributed by ARM Ltd. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _GCC_ARM_ACLE_H +#define _GCC_ARM_ACLE_H + +#include +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __ARM_FEATURE_CRC32 +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32b (uint32_t __a, uint8_t __b) +{ + return __builtin_aarch64_crc32b (__a, __b); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32cb (uint32_t __a, uint8_t __b) +{ + return __builtin_aarch64_crc32cb (__a, __b); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32ch (uint32_t __a, uint16_t __b) +{ + return __builtin_aarch64_crc32ch (__a, __b); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32cw (uint32_t __a, uint32_t __b) +{ + return __builtin_aarch64_crc32cw (__a, __b); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32cd (uint32_t __a, uint64_t __b) +{ + return __builtin_aarch64_crc32cx (__a, __b); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32h (uint32_t __a, uint16_t __b) +{ + return __builtin_aarch64_crc32h (__a, __b); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32w (uint32_t __a, uint32_t __b) +{ + return __builtin_aarch64_crc32w (__a, __b); +} + +__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) +__crc32d (uint32_t __a, uint64_t __b) +{ + return __builtin_aarch64_crc32x (__a, __b); +} + +#endif + +#ifdef __cplusplus +} +#endif + +#endif --- a/src/gcc/config/aarch64/aarch64-builtins.c +++ b/src/gcc/config/aarch64/aarch64-builtins.c @@ -147,16 +147,44 @@ = { qualifier_unsigned, qualifier_unsigned }; #define TYPES_UNOPU (aarch64_types_unopu_qualifiers) #define TYPES_CREATE (aarch64_types_unop_qualifiers) -#define TYPES_REINTERP (aarch64_types_unop_qualifiers) +#define TYPES_REINTERP_SS (aarch64_types_unop_qualifiers) static enum aarch64_type_qualifiers +aarch64_types_unop_su_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_unsigned }; +#define TYPES_REINTERP_SU (aarch64_types_unop_su_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_unop_sp_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_poly }; +#define TYPES_REINTERP_SP (aarch64_types_unop_sp_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_unop_us_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_none }; +#define TYPES_REINTERP_US (aarch64_types_unop_us_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_unop_ps_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_poly, qualifier_none }; +#define TYPES_REINTERP_PS (aarch64_types_unop_ps_qualifiers) +static enum aarch64_type_qualifiers aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_maybe_immediate }; #define TYPES_BINOP (aarch64_types_binop_qualifiers) static enum aarch64_type_qualifiers +aarch64_types_binopv_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_none, qualifier_none }; +#define TYPES_BINOPV (aarch64_types_binopv_qualifiers) +static enum aarch64_type_qualifiers aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned }; #define TYPES_BINOPU (aarch64_types_binopu_qualifiers) static enum aarch64_type_qualifiers +aarch64_types_binop_uus_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_none }; +#define TYPES_BINOP_UUS (aarch64_types_binop_uus_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_unsigned }; +#define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers) +static enum aarch64_type_qualifiers aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_poly, qualifier_poly, qualifier_poly }; #define TYPES_BINOPP (aarch64_types_binopp_qualifiers) @@ -183,9 +211,14 @@ #define TYPES_GETLANE (aarch64_types_getlane_qualifiers) #define TYPES_SHIFTIMM (aarch64_types_getlane_qualifiers) static enum aarch64_type_qualifiers +aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_none, qualifier_immediate }; +#define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers) +static enum aarch64_type_qualifiers aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate }; #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers) + static enum aarch64_type_qualifiers aarch64_types_setlane_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate }; @@ -194,6 +227,13 @@ #define TYPES_SHIFTACC (aarch64_types_setlane_qualifiers) static enum aarch64_type_qualifiers +aarch64_types_unsigned_shiftacc_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, + qualifier_immediate }; +#define TYPES_USHIFTACC (aarch64_types_unsigned_shiftacc_qualifiers) + + +static enum aarch64_type_qualifiers aarch64_types_combine_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_none }; #define TYPES_COMBINE (aarch64_types_combine_qualifiers) @@ -230,6 +270,11 @@ = { qualifier_void, qualifier_pointer_map_mode, qualifier_none }; #define TYPES_STORE1 (aarch64_types_store1_qualifiers) #define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_void, qualifier_pointer_map_mode, + qualifier_none, qualifier_none }; +#define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers) #define CF0(N, X) CODE_FOR_aarch64_##N##X #define CF1(N, X) CODE_FOR_##N##X##1 @@ -311,6 +356,8 @@ VAR7 (T, N, MAP, v8qi, v16qi, v4hi, v8hi, v2si, v4si, v2di) #define BUILTIN_VDQF(T, N, MAP) \ VAR3 (T, N, MAP, v2sf, v4sf, v2df) +#define BUILTIN_VDQF_DF(T, N, MAP) \ + VAR4 (T, N, MAP, v2sf, v4sf, v2df, df) #define BUILTIN_VDQH(T, N, MAP) \ VAR2 (T, N, MAP, v4hi, v8hi) #define BUILTIN_VDQHS(T, N, MAP) \ @@ -364,6 +411,28 @@ #include "aarch64-simd-builtins.def" }; +/* There's only 8 CRC32 builtins. Probably not worth their own .def file. */ +#define AARCH64_CRC32_BUILTINS \ + CRC32_BUILTIN (crc32b, QI) \ + CRC32_BUILTIN (crc32h, HI) \ + CRC32_BUILTIN (crc32w, SI) \ + CRC32_BUILTIN (crc32x, DI) \ + CRC32_BUILTIN (crc32cb, QI) \ + CRC32_BUILTIN (crc32ch, HI) \ + CRC32_BUILTIN (crc32cw, SI) \ + CRC32_BUILTIN (crc32cx, DI) + +typedef struct +{ + const char *name; + enum machine_mode mode; + const enum insn_code icode; + unsigned int fcode; +} aarch64_crc_builtin_datum; + +#define CRC32_BUILTIN(N, M) \ + AARCH64_BUILTIN_##N, + #undef VAR1 #define VAR1(T, N, MAP, A) \ AARCH64_SIMD_BUILTIN_##T##_##N##A, @@ -371,13 +440,32 @@ enum aarch64_builtins { AARCH64_BUILTIN_MIN, + + AARCH64_BUILTIN_GET_FPCR, + AARCH64_BUILTIN_SET_FPCR, + AARCH64_BUILTIN_GET_FPSR, + AARCH64_BUILTIN_SET_FPSR, + AARCH64_SIMD_BUILTIN_BASE, #include "aarch64-simd-builtins.def" AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_BUILTIN_BASE + ARRAY_SIZE (aarch64_simd_builtin_data), + AARCH64_CRC32_BUILTIN_BASE, + AARCH64_CRC32_BUILTINS + AARCH64_CRC32_BUILTIN_MAX, AARCH64_BUILTIN_MAX }; +#undef CRC32_BUILTIN +#define CRC32_BUILTIN(N, M) \ + {"__builtin_aarch64_"#N, M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N}, + +static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = { + AARCH64_CRC32_BUILTINS +}; + +#undef CRC32_BUILTIN + static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX]; #define NUM_DREG_TYPES 6 @@ -749,11 +837,49 @@ } } +static void +aarch64_init_crc32_builtins () +{ + tree usi_type = aarch64_build_unsigned_type (SImode); + unsigned int i = 0; + + for (i = 0; i < ARRAY_SIZE (aarch64_crc_builtin_data); ++i) + { + aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i]; + tree argtype = aarch64_build_unsigned_type (d->mode); + tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE); + tree fndecl = add_builtin_function (d->name, ftype, d->fcode, + BUILT_IN_MD, NULL, NULL_TREE); + + aarch64_builtin_decls[d->fcode] = fndecl; + } +} + void aarch64_init_builtins (void) { + tree ftype_set_fpr + = build_function_type_list (void_type_node, unsigned_type_node, NULL); + tree ftype_get_fpr + = build_function_type_list (unsigned_type_node, NULL); + + aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR] + = add_builtin_function ("__builtin_aarch64_get_fpcr", ftype_get_fpr, + AARCH64_BUILTIN_GET_FPCR, BUILT_IN_MD, NULL, NULL_TREE); + aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR] + = add_builtin_function ("__builtin_aarch64_set_fpcr", ftype_set_fpr, + AARCH64_BUILTIN_SET_FPCR, BUILT_IN_MD, NULL, NULL_TREE); + aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR] + = add_builtin_function ("__builtin_aarch64_get_fpsr", ftype_get_fpr, + AARCH64_BUILTIN_GET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); + aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR] + = add_builtin_function ("__builtin_aarch64_set_fpsr", ftype_set_fpr, + AARCH64_BUILTIN_SET_FPSR, BUILT_IN_MD, NULL, NULL_TREE); + if (TARGET_SIMD) aarch64_init_simd_builtins (); + if (TARGET_CRC32) + aarch64_init_crc32_builtins (); } tree @@ -953,6 +1079,41 @@ SIMD_ARG_STOP); } +rtx +aarch64_crc32_expand_builtin (int fcode, tree exp, rtx target) +{ + rtx pat; + aarch64_crc_builtin_datum *d + = &aarch64_crc_builtin_data[fcode - (AARCH64_CRC32_BUILTIN_BASE + 1)]; + enum insn_code icode = d->icode; + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + enum machine_mode tmode = insn_data[icode].operand[0].mode; + enum machine_mode mode0 = insn_data[icode].operand[1].mode; + enum machine_mode mode1 = insn_data[icode].operand[2].mode; + + if (! target + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode) + && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + pat = GEN_FCN (icode) (target, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + return target; +} + /* Expand an expression EXP that calls a built-in function, with result going to TARGET if that's convenient. */ rtx @@ -964,9 +1125,41 @@ { tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); int fcode = DECL_FUNCTION_CODE (fndecl); + int icode; + rtx pat, op0; + tree arg0; - if (fcode >= AARCH64_SIMD_BUILTIN_BASE) + switch (fcode) + { + case AARCH64_BUILTIN_GET_FPCR: + case AARCH64_BUILTIN_SET_FPCR: + case AARCH64_BUILTIN_GET_FPSR: + case AARCH64_BUILTIN_SET_FPSR: + if ((fcode == AARCH64_BUILTIN_GET_FPCR) + || (fcode == AARCH64_BUILTIN_GET_FPSR)) + { + icode = (fcode == AARCH64_BUILTIN_GET_FPSR) ? + CODE_FOR_get_fpsr : CODE_FOR_get_fpcr; + target = gen_reg_rtx (SImode); + pat = GEN_FCN (icode) (target); + } + else + { + target = NULL_RTX; + icode = (fcode == AARCH64_BUILTIN_SET_FPSR) ? + CODE_FOR_set_fpsr : CODE_FOR_set_fpcr; + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + pat = GEN_FCN (icode) (op0); + } + emit_insn (pat); + return target; + } + + if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX) return aarch64_simd_expand_builtin (fcode, exp, target); + else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= AARCH64_CRC32_BUILTIN_MAX) + return aarch64_crc32_expand_builtin (fcode, exp, target); return NULL_RTX; } @@ -1086,7 +1279,29 @@ return aarch64_builtin_decls[builtin]; } - + case BUILT_IN_BSWAP16: +#undef AARCH64_CHECK_BUILTIN_MODE +#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ + (out_mode == N##Imode && out_n == C \ + && in_mode == N##Imode && in_n == C) + if (AARCH64_CHECK_BUILTIN_MODE (4, H)) + return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4hi]; + else if (AARCH64_CHECK_BUILTIN_MODE (8, H)) + return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv8hi]; + else + return NULL_TREE; + case BUILT_IN_BSWAP32: + if (AARCH64_CHECK_BUILTIN_MODE (2, S)) + return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2si]; + else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) + return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4si]; + else + return NULL_TREE; + case BUILT_IN_BSWAP64: + if (AARCH64_CHECK_BUILTIN_MODE (2, D)) + return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2di]; + else + return NULL_TREE; default: return NULL_TREE; } @@ -1127,6 +1342,25 @@ return fold_build2 (NE_EXPR, type, and_node, vec_zero_node); break; } + VAR1 (REINTERP_SS, reinterpretdi, 0, df) + VAR1 (REINTERP_SS, reinterpretv8qi, 0, df) + VAR1 (REINTERP_SS, reinterpretv4hi, 0, df) + VAR1 (REINTERP_SS, reinterpretv2si, 0, df) + VAR1 (REINTERP_SS, reinterpretv2sf, 0, df) + BUILTIN_VD (REINTERP_SS, reinterpretdf, 0) + BUILTIN_VD (REINTERP_SU, reinterpretdf, 0) + VAR1 (REINTERP_US, reinterpretdi, 0, df) + VAR1 (REINTERP_US, reinterpretv8qi, 0, df) + VAR1 (REINTERP_US, reinterpretv4hi, 0, df) + VAR1 (REINTERP_US, reinterpretv2si, 0, df) + VAR1 (REINTERP_US, reinterpretv2sf, 0, df) + BUILTIN_VD (REINTERP_SP, reinterpretdf, 0) + VAR1 (REINTERP_PS, reinterpretdi, 0, df) + VAR1 (REINTERP_PS, reinterpretv8qi, 0, df) + VAR1 (REINTERP_PS, reinterpretv4hi, 0, df) + VAR1 (REINTERP_PS, reinterpretv2si, 0, df) + VAR1 (REINTERP_PS, reinterpretv2sf, 0, df) + return fold_build1 (VIEW_CONVERT_EXPR, type, args[0]); VAR1 (UNOP, floatv2si, 2, v2sf) VAR1 (UNOP, floatv4si, 2, v4sf) VAR1 (UNOP, floatv2di, 2, v2df) @@ -1196,6 +1430,106 @@ return changed; } +void +aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) +{ + const unsigned AARCH64_FE_INVALID = 1; + const unsigned AARCH64_FE_DIVBYZERO = 2; + const unsigned AARCH64_FE_OVERFLOW = 4; + const unsigned AARCH64_FE_UNDERFLOW = 8; + const unsigned AARCH64_FE_INEXACT = 16; + const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID + | AARCH64_FE_DIVBYZERO + | AARCH64_FE_OVERFLOW + | AARCH64_FE_UNDERFLOW + | AARCH64_FE_INEXACT); + const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8; + tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr; + tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr; + tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr; + tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv; + + /* Generate the equivalence of : + unsigned int fenv_cr; + fenv_cr = __builtin_aarch64_get_fpcr (); + + unsigned int fenv_sr; + fenv_sr = __builtin_aarch64_get_fpsr (); + + Now set all exceptions to non-stop + unsigned int mask_cr + = ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT); + unsigned int masked_cr; + masked_cr = fenv_cr & mask_cr; + + And clear all exception flags + unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT; + unsigned int masked_cr; + masked_sr = fenv_sr & mask_sr; + + __builtin_aarch64_set_cr (masked_cr); + __builtin_aarch64_set_sr (masked_sr); */ + + fenv_cr = create_tmp_var (unsigned_type_node, NULL); + fenv_sr = create_tmp_var (unsigned_type_node, NULL); + + get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]; + set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]; + get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]; + set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]; + + mask_cr = build_int_cst (unsigned_type_node, + ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT)); + mask_sr = build_int_cst (unsigned_type_node, + ~(AARCH64_FE_ALL_EXCEPT)); + + ld_fenv_cr = build2 (MODIFY_EXPR, unsigned_type_node, + fenv_cr, build_call_expr (get_fpcr, 0)); + ld_fenv_sr = build2 (MODIFY_EXPR, unsigned_type_node, + fenv_sr, build_call_expr (get_fpsr, 0)); + + masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr); + masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr); + + hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr); + hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr); + + hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr, + hold_fnclex_sr); + masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr, + masked_fenv_sr); + ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr); + + *hold = build2 (COMPOUND_EXPR, void_type_node, + build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv), + hold_fnclex); + + /* Store the value of masked_fenv to clear the exceptions: + __builtin_aarch64_set_fpsr (masked_fenv_sr); */ + + *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr); + + /* Generate the equivalent of : + unsigned int new_fenv_var; + new_fenv_var = __builtin_aarch64_get_fpsr (); + + __builtin_aarch64_set_fpsr (fenv_sr); + + __atomic_feraiseexcept (new_fenv_var); */ + + new_fenv_var = create_tmp_var (unsigned_type_node, NULL); + reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, + new_fenv_var, build_call_expr (get_fpsr, 0)); + restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr); + atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); + update_call = build_call_expr (atomic_feraiseexcept, 1, + fold_convert (integer_type_node, new_fenv_var)); + *update = build2 (COMPOUND_EXPR, void_type_node, + build2 (COMPOUND_EXPR, void_type_node, + reload_fenv, restore_fnenv), update_call); +} + + #undef AARCH64_CHECK_BUILTIN_MODE #undef AARCH64_FIND_FRINT_VARIANT #undef BUILTIN_DX --- a/src/gcc/config/aarch64/aarch64-protos.h +++ b/src/gcc/config/aarch64/aarch64-protos.h @@ -108,9 +108,22 @@ cost models and vectors for address cost calculations, register move costs and memory move costs. */ +/* Scaled addressing modes can vary cost depending on the mode of the + value to be loaded/stored. QImode values cannot use scaled + addressing modes. */ + +struct scale_addr_mode_cost +{ + const int hi; + const int si; + const int di; + const int ti; +}; + /* Additional cost for addresses. */ struct cpu_addrcost_table { + const struct scale_addr_mode_cost addr_scale_costs; const int pre_modify; const int post_modify; const int register_offset; @@ -167,6 +180,7 @@ enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx, enum aarch64_symbol_context); bool aarch64_constant_address_p (rtx); +bool aarch64_expand_movmem (rtx *); bool aarch64_float_const_zero_rtx_p (rtx); bool aarch64_function_arg_regno_p (unsigned); bool aarch64_gen_movmemqi (rtx *); @@ -175,6 +189,8 @@ bool aarch64_is_long_call_p (rtx); bool aarch64_label_mentioned_p (rtx); bool aarch64_legitimate_pic_operand_p (rtx); +bool aarch64_modes_tieable_p (enum machine_mode mode1, + enum machine_mode mode2); bool aarch64_move_imm (HOST_WIDE_INT, enum machine_mode); bool aarch64_mov_operand_p (rtx, enum aarch64_symbol_context, enum machine_mode); @@ -200,6 +216,8 @@ enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx); enum reg_class aarch64_regno_regclass (unsigned); int aarch64_asm_preferred_eh_data_format (int, int); +enum machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned, + enum machine_mode); int aarch64_hard_regno_mode_ok (unsigned, enum machine_mode); int aarch64_hard_regno_nregs (unsigned, enum machine_mode); int aarch64_simd_attr_length_move (rtx); @@ -289,4 +307,5 @@ extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel); extern bool aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel); +void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *); #endif /* GCC_AARCH64_PROTOS_H */ --- a/src/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/src/gcc/config/aarch64/aarch64-simd-builtins.def @@ -51,32 +51,43 @@ VAR1 (GETLANE, get_lane, 0, di) BUILTIN_VALL (GETLANE, be_checked_get_lane, 0) - BUILTIN_VD_RE (REINTERP, reinterpretdi, 0) - BUILTIN_VDC (REINTERP, reinterpretv8qi, 0) - BUILTIN_VDC (REINTERP, reinterpretv4hi, 0) - BUILTIN_VDC (REINTERP, reinterpretv2si, 0) - BUILTIN_VDC (REINTERP, reinterpretv2sf, 0) - BUILTIN_VQ (REINTERP, reinterpretv16qi, 0) - BUILTIN_VQ (REINTERP, reinterpretv8hi, 0) - BUILTIN_VQ (REINTERP, reinterpretv4si, 0) - BUILTIN_VQ (REINTERP, reinterpretv4sf, 0) - BUILTIN_VQ (REINTERP, reinterpretv2di, 0) - BUILTIN_VQ (REINTERP, reinterpretv2df, 0) + VAR1 (REINTERP_SS, reinterpretdi, 0, df) + VAR1 (REINTERP_SS, reinterpretv8qi, 0, df) + VAR1 (REINTERP_SS, reinterpretv4hi, 0, df) + VAR1 (REINTERP_SS, reinterpretv2si, 0, df) + VAR1 (REINTERP_SS, reinterpretv2sf, 0, df) + BUILTIN_VD (REINTERP_SS, reinterpretdf, 0) + BUILTIN_VD (REINTERP_SU, reinterpretdf, 0) + + VAR1 (REINTERP_US, reinterpretdi, 0, df) + VAR1 (REINTERP_US, reinterpretv8qi, 0, df) + VAR1 (REINTERP_US, reinterpretv4hi, 0, df) + VAR1 (REINTERP_US, reinterpretv2si, 0, df) + VAR1 (REINTERP_US, reinterpretv2sf, 0, df) + + BUILTIN_VD (REINTERP_SP, reinterpretdf, 0) + + VAR1 (REINTERP_PS, reinterpretdi, 0, df) + VAR1 (REINTERP_PS, reinterpretv8qi, 0, df) + VAR1 (REINTERP_PS, reinterpretv4hi, 0, df) + VAR1 (REINTERP_PS, reinterpretv2si, 0, df) + VAR1 (REINTERP_PS, reinterpretv2sf, 0, df) + BUILTIN_VDQ_I (BINOP, dup_lane, 0) /* Implemented by aarch64_qshl. */ BUILTIN_VSDQ_I (BINOP, sqshl, 0) - BUILTIN_VSDQ_I (BINOP, uqshl, 0) + BUILTIN_VSDQ_I (BINOP_UUS, uqshl, 0) BUILTIN_VSDQ_I (BINOP, sqrshl, 0) - BUILTIN_VSDQ_I (BINOP, uqrshl, 0) + BUILTIN_VSDQ_I (BINOP_UUS, uqrshl, 0) /* Implemented by aarch64_. */ BUILTIN_VSDQ_I (BINOP, sqadd, 0) - BUILTIN_VSDQ_I (BINOP, uqadd, 0) + BUILTIN_VSDQ_I (BINOPU, uqadd, 0) BUILTIN_VSDQ_I (BINOP, sqsub, 0) - BUILTIN_VSDQ_I (BINOP, uqsub, 0) + BUILTIN_VSDQ_I (BINOPU, uqsub, 0) /* Implemented by aarch64_qadd. */ - BUILTIN_VSDQ_I (BINOP, suqadd, 0) - BUILTIN_VSDQ_I (BINOP, usqadd, 0) + BUILTIN_VSDQ_I (BINOP_SSU, suqadd, 0) + BUILTIN_VSDQ_I (BINOP_UUS, usqadd, 0) /* Implemented by aarch64_get_dreg. */ BUILTIN_VDC (GETLANE, get_dregoi, 0) @@ -107,6 +118,10 @@ BUILTIN_VQ (STORESTRUCT, st3, 0) BUILTIN_VQ (STORESTRUCT, st4, 0) + BUILTIN_VQ (STORESTRUCT_LANE, st2_lane, 0) + BUILTIN_VQ (STORESTRUCT_LANE, st3_lane, 0) + BUILTIN_VQ (STORESTRUCT_LANE, st4_lane, 0) + BUILTIN_VQW (BINOP, saddl2, 0) BUILTIN_VQW (BINOP, uaddl2, 0) BUILTIN_VQW (BINOP, ssubl2, 0) @@ -142,8 +157,8 @@ BUILTIN_VSQN_HSDI (UNOP, sqmovn, 0) BUILTIN_VSQN_HSDI (UNOP, uqmovn, 0) /* Implemented by aarch64_s. */ - BUILTIN_VSDQ_I_BHSI (UNOP, sqabs, 0) - BUILTIN_VSDQ_I_BHSI (UNOP, sqneg, 0) + BUILTIN_VSDQ_I (UNOP, sqabs, 0) + BUILTIN_VSDQ_I (UNOP, sqneg, 0) BUILTIN_VSD_HSI (QUADOP, sqdmlal_lane, 0) BUILTIN_VSD_HSI (QUADOP, sqdmlsl_lane, 0) @@ -186,9 +201,9 @@ BUILTIN_VSDQ_I_DI (BINOP, ashl, 3) /* Implemented by aarch64_shl. */ BUILTIN_VSDQ_I_DI (BINOP, sshl, 0) - BUILTIN_VSDQ_I_DI (BINOP, ushl, 0) + BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0) BUILTIN_VSDQ_I_DI (BINOP, srshl, 0) - BUILTIN_VSDQ_I_DI (BINOP, urshl, 0) + BUILTIN_VSDQ_I_DI (BINOP_UUS, urshl, 0) BUILTIN_VDQ_I (SHIFTIMM, ashr, 3) VAR1 (SHIFTIMM, ashr_simd, 0, di) @@ -196,15 +211,15 @@ VAR1 (USHIFTIMM, lshr_simd, 0, di) /* Implemented by aarch64_shr_n. */ BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0) - BUILTIN_VSDQ_I_DI (SHIFTIMM, urshr_n, 0) + BUILTIN_VSDQ_I_DI (USHIFTIMM, urshr_n, 0) /* Implemented by aarch64_sra_n. */ BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n, 0) - BUILTIN_VSDQ_I_DI (SHIFTACC, usra_n, 0) + BUILTIN_VSDQ_I_DI (USHIFTACC, usra_n, 0) BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n, 0) - BUILTIN_VSDQ_I_DI (SHIFTACC, ursra_n, 0) + BUILTIN_VSDQ_I_DI (USHIFTACC, ursra_n, 0) /* Implemented by aarch64_shll_n. */ BUILTIN_VDW (SHIFTIMM, sshll_n, 0) - BUILTIN_VDW (SHIFTIMM, ushll_n, 0) + BUILTIN_VDW (USHIFTIMM, ushll_n, 0) /* Implemented by aarch64_shll2_n. */ BUILTIN_VQW (SHIFTIMM, sshll2_n, 0) BUILTIN_VQW (SHIFTIMM, ushll2_n, 0) @@ -212,18 +227,18 @@ BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrun_n, 0) BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrun_n, 0) BUILTIN_VSQN_HSDI (SHIFTIMM, sqshrn_n, 0) - BUILTIN_VSQN_HSDI (SHIFTIMM, uqshrn_n, 0) + BUILTIN_VSQN_HSDI (USHIFTIMM, uqshrn_n, 0) BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0) - BUILTIN_VSQN_HSDI (SHIFTIMM, uqrshrn_n, 0) + BUILTIN_VSQN_HSDI (USHIFTIMM, uqrshrn_n, 0) /* Implemented by aarch64_si_n. */ BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0) - BUILTIN_VSDQ_I_DI (SHIFTINSERT, usri_n, 0) + BUILTIN_VSDQ_I_DI (USHIFTACC, usri_n, 0) BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0) - BUILTIN_VSDQ_I_DI (SHIFTINSERT, usli_n, 0) + BUILTIN_VSDQ_I_DI (USHIFTACC, usli_n, 0) /* Implemented by aarch64_qshl_n. */ - BUILTIN_VSDQ_I (SHIFTIMM, sqshlu_n, 0) + BUILTIN_VSDQ_I (SHIFTIMM_USS, sqshlu_n, 0) BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0) - BUILTIN_VSDQ_I (SHIFTIMM, uqshl_n, 0) + BUILTIN_VSDQ_I (USHIFTIMM, uqshl_n, 0) /* Implemented by aarch64_cm. */ BUILTIN_VALLDI (BINOP, cmeq, 0) @@ -265,7 +280,7 @@ BUILTIN_VDQF (UNOP, nearbyint, 2) BUILTIN_VDQF (UNOP, rint, 2) BUILTIN_VDQF (UNOP, round, 2) - BUILTIN_VDQF (UNOP, frintn, 2) + BUILTIN_VDQF_DF (UNOP, frintn, 2) /* Implemented by l2. */ VAR1 (UNOP, lbtruncv2sf, 2, v2si) @@ -330,6 +345,8 @@ VAR1 (UNOP, floatunsv4si, 2, v4sf) VAR1 (UNOP, floatunsv2di, 2, v2df) + VAR5 (UNOPU, bswap, 10, v4hi, v8hi, v2si, v4si, v2di) + /* Implemented by aarch64_. */ BUILTIN_VALL (BINOP, zip1, 0) @@ -393,3 +410,6 @@ /* Implemented by aarch64_crypto_pmull. */ VAR1 (BINOPP, crypto_pmull, 0, di) VAR1 (BINOPP, crypto_pmull, 0, v2di) + + /* Meta-op to check lane bounds of immediate in aarch64_expand_builtin. */ + VAR1 (BINOPV, im_lane_bound, 0, si) --- a/src/gcc/config/aarch64/constraints.md +++ b/src/gcc/config/aarch64/constraints.md @@ -21,6 +21,9 @@ (define_register_constraint "k" "STACK_REG" "@internal The stack register.") +(define_register_constraint "Ucs" "CALLER_SAVE_REGS" + "@internal The caller save registers.") + (define_register_constraint "w" "FP_REGS" "Floating point and SIMD vector registers.") @@ -92,6 +95,10 @@ (and (match_code "const_int") (match_test "(unsigned HOST_WIDE_INT) ival < 64"))) +(define_constraint "Usf" + "@internal Usf is a symbol reference." + (match_code "symbol_ref")) + (define_constraint "UsM" "@internal A constraint that matches the immediate constant -1." --- a/src/gcc/config/aarch64/aarch64.c +++ b/src/gcc/config/aarch64/aarch64.c @@ -63,6 +63,7 @@ #include "cfgloop.h" #include "tree-vectorizer.h" #include "config/arm/aarch-cost-tables.h" +#include "dumpfile.h" /* Defined for convenience. */ #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT) @@ -141,6 +142,7 @@ static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode, const unsigned char *sel); +static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool); /* The processor for which instructions should be scheduled. */ enum aarch64_processor aarch64_tune = cortexa53; @@ -171,6 +173,15 @@ #endif static const struct cpu_addrcost_table generic_addrcost_table = { +#if HAVE_DESIGNATED_INITIALIZERS + .addr_scale_costs = +#endif + { + NAMED_PARAM (qi, 0), + NAMED_PARAM (hi, 0), + NAMED_PARAM (si, 0), + NAMED_PARAM (ti, 0), + }, NAMED_PARAM (pre_modify, 0), NAMED_PARAM (post_modify, 0), NAMED_PARAM (register_offset, 0), @@ -181,6 +192,27 @@ #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 __extension__ #endif +static const struct cpu_addrcost_table cortexa57_addrcost_table = +{ +#if HAVE_DESIGNATED_INITIALIZERS + .addr_scale_costs = +#endif + { + NAMED_PARAM (qi, 0), + NAMED_PARAM (hi, 1), + NAMED_PARAM (si, 0), + NAMED_PARAM (ti, 1), + }, + NAMED_PARAM (pre_modify, 0), + NAMED_PARAM (post_modify, 0), + NAMED_PARAM (register_offset, 0), + NAMED_PARAM (register_extend, 0), + NAMED_PARAM (imm_offset, 0), +}; + +#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 +__extension__ +#endif static const struct cpu_regmove_cost generic_regmove_cost = { NAMED_PARAM (GP2GP, 1), @@ -212,9 +244,29 @@ NAMED_PARAM (cond_not_taken_branch_cost, 1) }; +/* Generic costs for vector insn classes. */ #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 __extension__ #endif +static const struct cpu_vector_cost cortexa57_vector_cost = +{ + NAMED_PARAM (scalar_stmt_cost, 1), + NAMED_PARAM (scalar_load_cost, 4), + NAMED_PARAM (scalar_store_cost, 1), + NAMED_PARAM (vec_stmt_cost, 3), + NAMED_PARAM (vec_to_scalar_cost, 8), + NAMED_PARAM (scalar_to_vec_cost, 8), + NAMED_PARAM (vec_align_load_cost, 5), + NAMED_PARAM (vec_unalign_load_cost, 5), + NAMED_PARAM (vec_unalign_store_cost, 1), + NAMED_PARAM (vec_store_cost, 1), + NAMED_PARAM (cond_taken_branch_cost, 1), + NAMED_PARAM (cond_not_taken_branch_cost, 1) +}; + +#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 +__extension__ +#endif static const struct tune_params generic_tunings = { &cortexa57_extra_costs, @@ -238,9 +290,9 @@ static const struct tune_params cortexa57_tunings = { &cortexa57_extra_costs, - &generic_addrcost_table, + &cortexa57_addrcost_table, &generic_regmove_cost, - &generic_vector_cost, + &cortexa57_vector_cost, NAMED_PARAM (memmov_cost, 4), NAMED_PARAM (issue_rate, 3) }; @@ -424,6 +476,24 @@ return 0; } +/* Implement HARD_REGNO_CALLER_SAVE_MODE. */ +enum machine_mode +aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs, + enum machine_mode mode) +{ + /* Handle modes that fit within single registers. */ + if (nregs == 1 && GET_MODE_SIZE (mode) <= 16) + { + if (GET_MODE_SIZE (mode) >= 4) + return mode; + else + return SImode; + } + /* Fall back to generic for multi-reg and very large modes. */ + else + return choose_hard_reg_mode (regno, nregs, false); +} + /* Return true if calls to DECL should be treated as long-calls (ie called via a register). */ static bool @@ -444,7 +514,7 @@ represent an expression that matches an extend operation. The operands represent the paramters from - (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */ + (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */ bool aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm, rtx extract_imm) @@ -636,12 +706,24 @@ case SYMBOL_SMALL_TLSDESC: { - rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM); + enum machine_mode mode = GET_MODE (dest); + rtx x0 = gen_rtx_REG (mode, R0_REGNUM); rtx tp; - emit_insn (gen_tlsdesc_small (imm)); + gcc_assert (mode == Pmode || mode == ptr_mode); + + /* In ILP32, the got entry is always of SImode size. Unlike + small GOT, the dest is fixed at reg 0. */ + if (TARGET_ILP32) + emit_insn (gen_tlsdesc_small_si (imm)); + else + emit_insn (gen_tlsdesc_small_di (imm)); tp = aarch64_load_tp (NULL); - emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0))); + + if (mode != Pmode) + tp = gen_lowpart (mode, tp); + + emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0))); set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); return; } @@ -648,10 +730,34 @@ case SYMBOL_SMALL_GOTTPREL: { - rtx tmp_reg = gen_reg_rtx (Pmode); + /* In ILP32, the mode of dest can be either SImode or DImode, + while the got entry is always of SImode size. The mode of + dest depends on how dest is used: if dest is assigned to a + pointer (e.g. in the memory), it has SImode; it may have + DImode if dest is dereferenced to access the memeory. + This is why we have to handle three different tlsie_small + patterns here (two patterns for ILP32). */ + enum machine_mode mode = GET_MODE (dest); + rtx tmp_reg = gen_reg_rtx (mode); rtx tp = aarch64_load_tp (NULL); - emit_insn (gen_tlsie_small (tmp_reg, imm)); - emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg))); + + if (mode == ptr_mode) + { + if (mode == DImode) + emit_insn (gen_tlsie_small_di (tmp_reg, imm)); + else + { + emit_insn (gen_tlsie_small_si (tmp_reg, imm)); + tp = gen_lowpart (mode, tp); + } + } + else + { + gcc_assert (mode == Pmode); + emit_insn (gen_tlsie_small_sidi (tmp_reg, imm)); + } + + emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg))); set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); return; } @@ -1162,18 +1268,10 @@ } static bool -aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) +aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, + tree exp ATTRIBUTE_UNUSED) { - /* Indirect calls are not currently supported. */ - if (decl == NULL) - return false; - - /* Cannot tail-call to long-calls, since these are outside of the - range of a branch instruction (we could handle this if we added - support for indirect tail-calls. */ - if (aarch64_decl_is_long_call_p (decl)) - return false; - + /* Currently, always true. */ return true; } @@ -1716,8 +1814,6 @@ if (reload_completed && cfun->machine->frame.laid_out) return; - cfun->machine->frame.fp_lr_offset = 0; - /* First mark all the registers that really need to be saved... */ for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) cfun->machine->frame.reg_offset[regno] = -1; @@ -1767,7 +1863,6 @@ { cfun->machine->frame.reg_offset[R29_REGNUM] = offset; offset += UNITS_PER_WORD; - cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD; } if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1) @@ -1774,7 +1869,6 @@ { cfun->machine->frame.reg_offset[R30_REGNUM] = offset; offset += UNITS_PER_WORD; - cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD; } cfun->machine->frame.padding0 = @@ -1819,7 +1913,6 @@ rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM; - for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) { if (aarch64_register_saved_on_entry (regno)) @@ -1837,10 +1930,12 @@ { /* Empty loop. */ } + if (regno2 <= V31_REGNUM && aarch64_register_saved_on_entry (regno2)) { rtx mem2; + /* Next highest register to be saved. */ mem2 = gen_mem_ref (DFmode, plus_constant @@ -1866,10 +1961,10 @@ gen_rtx_REG (DFmode, regno2)); } - /* The first part of a frame-related parallel insn - is always assumed to be relevant to the frame - calculations; subsequent parts, are only - frame-related if explicitly marked. */ + /* The first part of a frame-related parallel insn is + always assumed to be relevant to the frame + calculations; subsequent parts, are only + frame-related if explicitly marked. */ RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; regno = regno2; start_offset += increment * 2; @@ -1882,7 +1977,7 @@ { insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem); add_reg_note (insn, REG_CFA_RESTORE, - gen_rtx_REG (DImode, regno)); + gen_rtx_REG (DFmode, regno)); } start_offset += increment; } @@ -1889,7 +1984,6 @@ RTX_FRAME_RELATED_P (insn) = 1; } } - } @@ -1897,7 +1991,7 @@ restore's have to happen. */ static void aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset, - bool restore) + bool restore) { rtx insn; rtx base_rtx = stack_pointer_rtx; @@ -1929,6 +2023,7 @@ aarch64_register_saved_on_entry (regno2)) { rtx mem2; + /* Next highest register to be saved. */ mem2 = gen_mem_ref (Pmode, plus_constant @@ -1952,12 +2047,11 @@ add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2)); } - /* The first part of a frame-related parallel insn - is always assumed to be relevant to the frame - calculations; subsequent parts, are only - frame-related if explicitly marked. */ - RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, - 1)) = 1; + /* The first part of a frame-related parallel insn is + always assumed to be relevant to the frame + calculations; subsequent parts, are only + frame-related if explicitly marked. */ + RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; regno = regno2; start_offset += increment * 2; } @@ -1977,7 +2071,6 @@ } aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx); - } /* AArch64 stack frames generated by this compiler look like: @@ -1986,37 +2079,35 @@ | | | incoming stack arguments | | | - +-------------------------------+ <-- arg_pointer_rtx - | | + +-------------------------------+ + | | <-- incoming stack pointer (aligned) | callee-allocated save area | | for register varargs | | | - +-------------------------------+ <-- frame_pointer_rtx + +-------------------------------+ + | local variables | <-- frame_pointer_rtx | | - | local variables | - | | +-------------------------------+ | padding0 | \ +-------------------------------+ | - | | | - | | | | callee-saved registers | | frame.saved_regs_size - | | | +-------------------------------+ | | LR' | | +-------------------------------+ | - | FP' | / - P +-------------------------------+ <-- hard_frame_pointer_rtx + | FP' | / <- hard_frame_pointer_rtx (aligned) + +-------------------------------+ | dynamic allocation | +-------------------------------+ - | | - | outgoing stack arguments | - | | - +-------------------------------+ <-- stack_pointer_rtx + | padding | + +-------------------------------+ + | outgoing stack arguments | <-- arg_pointer + | | + +-------------------------------+ + | | <-- stack_pointer_rtx (aligned) - Dynamic stack allocations such as alloca insert data at point P. - They decrease stack_pointer_rtx but leave frame_pointer_rtx and - hard_frame_pointer_rtx unchanged. */ + Dynamic stack allocations via alloca() decrease stack_pointer_rtx + but leave frame_pointer_rtx and hard_frame_pointer_rtx + unchanged. */ /* Generate the prologue instructions for entry into a function. Establish the stack frame by decreasing the stack pointer with a @@ -2449,12 +2540,22 @@ - 2 * UNITS_PER_WORD)); } -/* Output code to build up a constant in a register. */ -static void -aarch64_build_constant (int regnum, HOST_WIDE_INT val) +/* Possibly output code to build up a constant in a register. For + the benefit of the costs infrastructure, returns the number of + instructions which would be emitted. GENERATE inhibits or + enables code generation. */ + +static int +aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate) { + int insns = 0; + if (aarch64_bitmask_imm (val, DImode)) - emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val)); + { + if (generate) + emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val)); + insns = 1; + } else { int i; @@ -2485,15 +2586,19 @@ the same. */ if (ncount < zcount) { - emit_move_insn (gen_rtx_REG (Pmode, regnum), - GEN_INT (val | ~(HOST_WIDE_INT) 0xffff)); + if (generate) + emit_move_insn (gen_rtx_REG (Pmode, regnum), + GEN_INT (val | ~(HOST_WIDE_INT) 0xffff)); tval = 0xffff; + insns++; } else { - emit_move_insn (gen_rtx_REG (Pmode, regnum), - GEN_INT (val & 0xffff)); + if (generate) + emit_move_insn (gen_rtx_REG (Pmode, regnum), + GEN_INT (val & 0xffff)); tval = 0; + insns++; } val >>= 16; @@ -2501,11 +2606,17 @@ for (i = 16; i < 64; i += 16) { if ((val & 0xffff) != tval) - emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum), - GEN_INT (i), GEN_INT (val & 0xffff))); + { + if (generate) + emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum), + GEN_INT (i), + GEN_INT (val & 0xffff))); + insns++; + } val >>= 16; } } + return insns; } static void @@ -2520,7 +2631,7 @@ if (mdelta >= 4096 * 4096) { - aarch64_build_constant (scratchreg, delta); + (void) aarch64_build_constant (scratchreg, delta, true); emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx)); } else if (mdelta > 0) @@ -2594,7 +2705,7 @@ addr = plus_constant (Pmode, temp0, vcall_offset); else { - aarch64_build_constant (IP1_REGNUM, vcall_offset); + (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true); addr = gen_rtx_PLUS (Pmode, temp0, temp1); } @@ -3046,11 +3157,11 @@ enum rtx_code code = GET_CODE (x); rtx op0, op1; bool allow_reg_index_p = - outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16; - + outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16 + || aarch64_vector_mode_supported_p (mode)); /* Don't support anything other than POST_INC or REG addressing for AdvSIMD. */ - if (aarch64_vector_mode_p (mode) + if (aarch64_vect_struct_mode_p (mode) && (code != POST_INC && code != REG)) return false; @@ -3839,34 +3950,34 @@ if (addr.offset == const0_rtx) asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]); else - asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)], + asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)], INTVAL (addr.offset)); return; case ADDRESS_REG_REG: if (addr.shift == 0) - asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)], + asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)], reg_names [REGNO (addr.offset)]); else - asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)], + asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)], reg_names [REGNO (addr.offset)], addr.shift); return; case ADDRESS_REG_UXTW: if (addr.shift == 0) - asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)], + asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)], REGNO (addr.offset) - R0_REGNUM); else - asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)], + asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)], REGNO (addr.offset) - R0_REGNUM, addr.shift); return; case ADDRESS_REG_SXTW: if (addr.shift == 0) - asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)], + asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)], REGNO (addr.offset) - R0_REGNUM); else - asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)], + asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)], REGNO (addr.offset) - R0_REGNUM, addr.shift); return; @@ -3874,27 +3985,27 @@ switch (GET_CODE (x)) { case PRE_INC: - asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)], + asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)], GET_MODE_SIZE (aarch64_memory_reference_mode)); return; case POST_INC: - asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)], + asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)], GET_MODE_SIZE (aarch64_memory_reference_mode)); return; case PRE_DEC: - asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)], + asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)], GET_MODE_SIZE (aarch64_memory_reference_mode)); return; case POST_DEC: - asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)], + asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)], GET_MODE_SIZE (aarch64_memory_reference_mode)); return; case PRE_MODIFY: - asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)], + asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)], INTVAL (addr.offset)); return; case POST_MODIFY: - asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)], + asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)], INTVAL (addr.offset)); return; default: @@ -3903,7 +4014,7 @@ break; case ADDRESS_LO_SUM: - asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]); + asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]); output_addr_const (f, addr.offset); asm_fprintf (f, "]"); return; @@ -3980,8 +4091,8 @@ { rtx x = *x_p; - /* Do not allow mem (plus (reg, const)) if vector mode. */ - if (aarch64_vector_mode_p (mode) + /* Do not allow mem (plus (reg, const)) if vector struct mode. */ + if (aarch64_vect_struct_mode_p (mode) && GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1))) @@ -4150,32 +4261,31 @@ + crtl->outgoing_args_size + cfun->machine->saved_varargs_size); - frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT); - offset = frame_size; + frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT); + offset = frame_size; - if (to == HARD_FRAME_POINTER_REGNUM) - { - if (from == ARG_POINTER_REGNUM) - return offset - crtl->outgoing_args_size; + if (to == HARD_FRAME_POINTER_REGNUM) + { + if (from == ARG_POINTER_REGNUM) + return offset - crtl->outgoing_args_size; - if (from == FRAME_POINTER_REGNUM) - return cfun->machine->frame.saved_regs_size + get_frame_size (); - } + if (from == FRAME_POINTER_REGNUM) + return cfun->machine->frame.saved_regs_size + get_frame_size (); + } - if (to == STACK_POINTER_REGNUM) - { - if (from == FRAME_POINTER_REGNUM) - { - HOST_WIDE_INT elim = crtl->outgoing_args_size - + cfun->machine->frame.saved_regs_size - + get_frame_size () - - cfun->machine->frame.fp_lr_offset; - elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT); - return elim; - } - } + if (to == STACK_POINTER_REGNUM) + { + if (from == FRAME_POINTER_REGNUM) + { + HOST_WIDE_INT elim = crtl->outgoing_args_size + + cfun->machine->frame.saved_regs_size + + get_frame_size (); + elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT); + return elim; + } + } - return offset; + return offset; } @@ -4242,6 +4352,7 @@ { switch (regclass) { + case CALLER_SAVE_REGS: case CORE_REGS: case POINTER_REGS: case GENERAL_REGS: @@ -4443,9 +4554,13 @@ { rtx op = x; + /* We accept both ROTATERT and ROTATE: since the RHS must be a constant + we can convert both to ROR during final output. */ if ((GET_CODE (op) == ASHIFT || GET_CODE (op) == ASHIFTRT - || GET_CODE (op) == LSHIFTRT) + || GET_CODE (op) == LSHIFTRT + || GET_CODE (op) == ROTATERT + || GET_CODE (op) == ROTATE) && CONST_INT_P (XEXP (op, 1))) return XEXP (op, 0); @@ -4457,12 +4572,12 @@ return x; } -/* Helper function for rtx cost calculation. Strip a shift or extend +/* Helper function for rtx cost calculation. Strip an extend expression from X. Returns the inner operand if successful, or the original expression on failure. We deal with a number of possible canonicalization variations here. */ static rtx -aarch64_strip_shift_or_extend (rtx x) +aarch64_strip_extend (rtx x) { rtx op = x; @@ -4469,6 +4584,7 @@ /* Zero and sign extraction of a widened value. */ if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT) && XEXP (op, 2) == const0_rtx + && GET_CODE (XEXP (op, 0)) == MULT && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1), XEXP (op, 1))) return XEXP (XEXP (op, 0), 0); @@ -4497,9 +4613,316 @@ if (op != x) return op; - return aarch64_strip_shift (x); + return x; } +/* Helper function for rtx cost calculation. Calculate the cost of + a MULT, which may be part of a multiply-accumulate rtx. Return + the calculated cost of the expression, recursing manually in to + operands where needed. */ + +static int +aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed) +{ + rtx op0, op1; + const struct cpu_cost_table *extra_cost + = aarch64_tune_params->insn_extra_cost; + int cost = 0; + bool maybe_fma = (outer == PLUS || outer == MINUS); + enum machine_mode mode = GET_MODE (x); + + gcc_checking_assert (code == MULT); + + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + + if (VECTOR_MODE_P (mode)) + mode = GET_MODE_INNER (mode); + + /* Integer multiply/fma. */ + if (GET_MODE_CLASS (mode) == MODE_INT) + { + /* The multiply will be canonicalized as a shift, cost it as such. */ + if (CONST_INT_P (op1) + && exact_log2 (INTVAL (op1)) > 0) + { + if (speed) + { + if (maybe_fma) + /* ADD (shifted register). */ + cost += extra_cost->alu.arith_shift; + else + /* LSL (immediate). */ + cost += extra_cost->alu.shift; + } + + cost += rtx_cost (op0, GET_CODE (op0), 0, speed); + + return cost; + } + + /* Integer multiplies or FMAs have zero/sign extending variants. */ + if ((GET_CODE (op0) == ZERO_EXTEND + && GET_CODE (op1) == ZERO_EXTEND) + || (GET_CODE (op0) == SIGN_EXTEND + && GET_CODE (op1) == SIGN_EXTEND)) + { + cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed) + + rtx_cost (XEXP (op1, 0), MULT, 1, speed); + + if (speed) + { + if (maybe_fma) + /* MADD/SMADDL/UMADDL. */ + cost += extra_cost->mult[0].extend_add; + else + /* MUL/SMULL/UMULL. */ + cost += extra_cost->mult[0].extend; + } + + return cost; + } + + /* This is either an integer multiply or an FMA. In both cases + we want to recurse and cost the operands. */ + cost += rtx_cost (op0, MULT, 0, speed) + + rtx_cost (op1, MULT, 1, speed); + + if (speed) + { + if (maybe_fma) + /* MADD. */ + cost += extra_cost->mult[mode == DImode].add; + else + /* MUL. */ + cost += extra_cost->mult[mode == DImode].simple; + } + + return cost; + } + else + { + if (speed) + { + /* Floating-point FMA/FMUL can also support negations of the + operands. */ + if (GET_CODE (op0) == NEG) + op0 = XEXP (op0, 0); + if (GET_CODE (op1) == NEG) + op1 = XEXP (op1, 0); + + if (maybe_fma) + /* FMADD/FNMADD/FNMSUB/FMSUB. */ + cost += extra_cost->fp[mode == DFmode].fma; + else + /* FMUL/FNMUL. */ + cost += extra_cost->fp[mode == DFmode].mult; + } + + cost += rtx_cost (op0, MULT, 0, speed) + + rtx_cost (op1, MULT, 1, speed); + return cost; + } +} + +static int +aarch64_address_cost (rtx x, + enum machine_mode mode, + addr_space_t as ATTRIBUTE_UNUSED, + bool speed) +{ + enum rtx_code c = GET_CODE (x); + const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost; + struct aarch64_address_info info; + int cost = 0; + info.shift = 0; + + if (!aarch64_classify_address (&info, x, mode, c, false)) + { + if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF) + { + /* This is a CONST or SYMBOL ref which will be split + in a different way depending on the code model in use. + Cost it through the generic infrastructure. */ + int cost_symbol_ref = rtx_cost (x, MEM, 1, speed); + /* Divide through by the cost of one instruction to + bring it to the same units as the address costs. */ + cost_symbol_ref /= COSTS_N_INSNS (1); + /* The cost is then the cost of preparing the address, + followed by an immediate (possibly 0) offset. */ + return cost_symbol_ref + addr_cost->imm_offset; + } + else + { + /* This is most likely a jump table from a case + statement. */ + return addr_cost->register_offset; + } + } + + switch (info.type) + { + case ADDRESS_LO_SUM: + case ADDRESS_SYMBOLIC: + case ADDRESS_REG_IMM: + cost += addr_cost->imm_offset; + break; + + case ADDRESS_REG_WB: + if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY) + cost += addr_cost->pre_modify; + else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY) + cost += addr_cost->post_modify; + else + gcc_unreachable (); + + break; + + case ADDRESS_REG_REG: + cost += addr_cost->register_offset; + break; + + case ADDRESS_REG_UXTW: + case ADDRESS_REG_SXTW: + cost += addr_cost->register_extend; + break; + + default: + gcc_unreachable (); + } + + + if (info.shift > 0) + { + /* For the sake of calculating the cost of the shifted register + component, we can treat same sized modes in the same way. */ + switch (GET_MODE_BITSIZE (mode)) + { + case 16: + cost += addr_cost->addr_scale_costs.hi; + break; + + case 32: + cost += addr_cost->addr_scale_costs.si; + break; + + case 64: + cost += addr_cost->addr_scale_costs.di; + break; + + /* We can't tell, or this is a 128-bit vector. */ + default: + cost += addr_cost->addr_scale_costs.ti; + break; + } + } + + return cost; +} + +/* Return true if the RTX X in mode MODE is a zero or sign extract + usable in an ADD or SUB (extended register) instruction. */ +static bool +aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode) +{ + /* Catch add with a sign extract. + This is add__multp2. */ + if (GET_CODE (x) == SIGN_EXTRACT + || GET_CODE (x) == ZERO_EXTRACT) + { + rtx op0 = XEXP (x, 0); + rtx op1 = XEXP (x, 1); + rtx op2 = XEXP (x, 2); + + if (GET_CODE (op0) == MULT + && CONST_INT_P (op1) + && op2 == const0_rtx + && CONST_INT_P (XEXP (op0, 1)) + && aarch64_is_extend_from_extract (mode, + XEXP (op0, 1), + op1)) + { + return true; + } + } + + return false; +} + +/* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)), + storing it in *COST. Result is true if the total cost of the operation + has now been calculated. */ +static bool +aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed) +{ + rtx inner; + rtx comparator; + enum rtx_code cmpcode; + + if (COMPARISON_P (op0)) + { + inner = XEXP (op0, 0); + comparator = XEXP (op0, 1); + cmpcode = GET_CODE (op0); + } + else + { + inner = op0; + comparator = const0_rtx; + cmpcode = NE; + } + + if (GET_CODE (op1) == PC || GET_CODE (op2) == PC) + { + /* Conditional branch. */ + if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC) + return true; + else + { + if (cmpcode == NE || cmpcode == EQ) + { + if (comparator == const0_rtx) + { + /* TBZ/TBNZ/CBZ/CBNZ. */ + if (GET_CODE (inner) == ZERO_EXTRACT) + /* TBZ/TBNZ. */ + *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT, + 0, speed); + else + /* CBZ/CBNZ. */ + *cost += rtx_cost (inner, cmpcode, 0, speed); + + return true; + } + } + else if (cmpcode == LT || cmpcode == GE) + { + /* TBZ/TBNZ. */ + if (comparator == const0_rtx) + return true; + } + } + } + else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC) + { + /* It's a conditional operation based on the status flags, + so it must be some flavor of CSEL. */ + + /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */ + if (GET_CODE (op1) == NEG + || GET_CODE (op1) == NOT + || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx)) + op1 = XEXP (op1, 0); + + *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed); + *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed); + return true; + } + + /* We don't know what this is, cost all operands. */ + return false; +} + /* Calculate the cost of calculating X, storing it in *COST. Result is true if the total cost of the operation has now been calculated. */ static bool @@ -4506,13 +4929,31 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, int param ATTRIBUTE_UNUSED, int *cost, bool speed) { - rtx op0, op1; + rtx op0, op1, op2; const struct cpu_cost_table *extra_cost = aarch64_tune_params->insn_extra_cost; + enum machine_mode mode = GET_MODE (x); + /* By default, assume that everything has equivalent cost to the + cheapest instruction. Any additional costs are applied as a delta + above this default. */ + *cost = COSTS_N_INSNS (1); + + /* TODO: The cost infrastructure currently does not handle + vector operations. Assume that all vector operations + are equally expensive. */ + if (VECTOR_MODE_P (mode)) + { + if (speed) + *cost += extra_cost->vect.alu; + return true; + } + switch (code) { case SET: + /* The cost depends entirely on the operands to SET. */ + *cost = 0; op0 = SET_DEST (x); op1 = SET_SRC (x); @@ -4520,25 +4961,47 @@ { case MEM: if (speed) - *cost += extra_cost->ldst.store; + { + rtx address = XEXP (op0, 0); + if (GET_MODE_CLASS (mode) == MODE_INT) + *cost += extra_cost->ldst.store; + else if (mode == SFmode) + *cost += extra_cost->ldst.storef; + else if (mode == DFmode) + *cost += extra_cost->ldst.stored; - if (op1 != const0_rtx) - *cost += rtx_cost (op1, SET, 1, speed); + *cost += + COSTS_N_INSNS (aarch64_address_cost (address, mode, + 0, speed)); + } + + *cost += rtx_cost (op1, SET, 1, speed); return true; case SUBREG: if (! REG_P (SUBREG_REG (op0))) *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed); + /* Fall through. */ case REG: - /* Cost is just the cost of the RHS of the set. */ - *cost += rtx_cost (op1, SET, 1, true); + /* const0_rtx is in general free, but we will use an + instruction to set a register to 0. */ + if (REG_P (op1) || op1 == const0_rtx) + { + /* The cost is 1 per register copied. */ + int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1) + / UNITS_PER_WORD; + *cost = COSTS_N_INSNS (n_minus_1 + 1); + } + else + /* Cost is just the cost of the RHS of the set. */ + *cost += rtx_cost (op1, SET, 1, speed); return true; - case ZERO_EXTRACT: /* Bit-field insertion. */ + case ZERO_EXTRACT: case SIGN_EXTRACT: - /* Strip any redundant widening of the RHS to meet the width of - the target. */ + /* Bit-field insertion. Strip any redundant widening of + the RHS to meet the width of the target. */ if (GET_CODE (op1) == SUBREG) op1 = SUBREG_REG (op1); if ((GET_CODE (op1) == ZERO_EXTEND @@ -4547,25 +5010,139 @@ && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0))) >= INTVAL (XEXP (op0, 1)))) op1 = XEXP (op1, 0); - *cost += rtx_cost (op1, SET, 1, speed); + + if (CONST_INT_P (op1)) + { + /* MOV immediate is assumed to always be cheap. */ + *cost = COSTS_N_INSNS (1); + } + else + { + /* BFM. */ + if (speed) + *cost += extra_cost->alu.bfi; + *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed); + } + return true; default: + /* We can't make sense of this, assume default cost. */ + *cost = COSTS_N_INSNS (1); break; } return false; + case CONST_INT: + /* If an instruction can incorporate a constant within the + instruction, the instruction's expression avoids calling + rtx_cost() on the constant. If rtx_cost() is called on a + constant, then it is usually because the constant must be + moved into a register by one or more instructions. + + The exception is constant 0, which can be expressed + as XZR/WZR and is therefore free. The exception to this is + if we have (set (reg) (const0_rtx)) in which case we must cost + the move. However, we can catch that when we cost the SET, so + we don't need to consider that here. */ + if (x == const0_rtx) + *cost = 0; + else + { + /* To an approximation, building any other constant is + proportionally expensive to the number of instructions + required to build that constant. This is true whether we + are compiling for SPEED or otherwise. */ + *cost = COSTS_N_INSNS (aarch64_build_constant (0, + INTVAL (x), + false)); + } + return true; + + case CONST_DOUBLE: + if (speed) + { + /* mov[df,sf]_aarch64. */ + if (aarch64_float_const_representable_p (x)) + /* FMOV (scalar immediate). */ + *cost += extra_cost->fp[mode == DFmode].fpconst; + else if (!aarch64_float_const_zero_rtx_p (x)) + { + /* This will be a load from memory. */ + if (mode == DFmode) + *cost += extra_cost->ldst.loadd; + else + *cost += extra_cost->ldst.loadf; + } + else + /* Otherwise this is +0.0. We get this using MOVI d0, #0 + or MOV v0.s[0], wzr - neither of which are modeled by the + cost tables. Just use the default cost. */ + { + } + } + + return true; + case MEM: if (speed) - *cost += extra_cost->ldst.load; + { + /* For loads we want the base cost of a load, plus an + approximation for the additional cost of the addressing + mode. */ + rtx address = XEXP (x, 0); + if (GET_MODE_CLASS (mode) == MODE_INT) + *cost += extra_cost->ldst.load; + else if (mode == SFmode) + *cost += extra_cost->ldst.loadf; + else if (mode == DFmode) + *cost += extra_cost->ldst.loadd; + *cost += + COSTS_N_INSNS (aarch64_address_cost (address, mode, + 0, speed)); + } + return true; case NEG: - op0 = CONST0_RTX (GET_MODE (x)); - op1 = XEXP (x, 0); - goto cost_minus; + op0 = XEXP (x, 0); + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) + { + if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE + || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE) + { + /* CSETM. */ + *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed); + return true; + } + + /* Cost this as SUB wzr, X. */ + op0 = CONST0_RTX (GET_MODE (x)); + op1 = XEXP (x, 0); + goto cost_minus; + } + + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) + { + /* Support (neg(fma...)) as a single instruction only if + sign of zeros is unimportant. This matches the decision + making in aarch64.md. */ + if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0))) + { + /* FNMADD. */ + *cost = rtx_cost (op0, NEG, 0, speed); + return true; + } + if (speed) + /* FNEG. */ + *cost += extra_cost->fp[mode == DFmode].neg; + return false; + } + + return false; + case COMPARE: op0 = XEXP (x, 0); op1 = XEXP (x, 1); @@ -4577,96 +5154,228 @@ goto cost_logic; } - /* Comparisons can work if the order is swapped. - Canonicalization puts the more complex operation first, but - we want it in op1. */ - if (! (REG_P (op0) - || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0))))) - { - op0 = XEXP (x, 1); - op1 = XEXP (x, 0); - } - goto cost_minus; + if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT) + { + /* TODO: A write to the CC flags possibly costs extra, this + needs encoding in the cost tables. */ + /* CC_ZESWPmode supports zero extend for free. */ + if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND) + op0 = XEXP (op0, 0); + + /* ANDS. */ + if (GET_CODE (op0) == AND) + { + x = op0; + goto cost_logic; + } + + if (GET_CODE (op0) == PLUS) + { + /* ADDS (and CMN alias). */ + x = op0; + goto cost_plus; + } + + if (GET_CODE (op0) == MINUS) + { + /* SUBS. */ + x = op0; + goto cost_minus; + } + + if (GET_CODE (op1) == NEG) + { + /* CMN. */ + if (speed) + *cost += extra_cost->alu.arith; + + *cost += rtx_cost (op0, COMPARE, 0, speed); + *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed); + return true; + } + + /* CMP. + + Compare can freely swap the order of operands, and + canonicalization puts the more complex operation first. + But the integer MINUS logic expects the shift/extend + operation in op1. */ + if (! (REG_P (op0) + || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0))))) + { + op0 = XEXP (x, 1); + op1 = XEXP (x, 0); + } + goto cost_minus; + } + + if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT) + { + /* FCMP. */ + if (speed) + *cost += extra_cost->fp[mode == DFmode].compare; + + if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1)) + { + /* FCMP supports constant 0.0 for no extra cost. */ + return true; + } + return false; + } + + return false; + case MINUS: - op0 = XEXP (x, 0); - op1 = XEXP (x, 1); + { + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); - cost_minus: - if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT - || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC - && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)) - { - if (op0 != const0_rtx) +cost_minus: + /* Detect valid immediates. */ + if ((GET_MODE_CLASS (mode) == MODE_INT + || (GET_MODE_CLASS (mode) == MODE_CC + && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)) + && CONST_INT_P (op1) + && aarch64_uimm12_shift (INTVAL (op1))) + { *cost += rtx_cost (op0, MINUS, 0, speed); - if (CONST_INT_P (op1)) - { - if (!aarch64_uimm12_shift (INTVAL (op1))) - *cost += rtx_cost (op1, MINUS, 1, speed); - } - else - { - op1 = aarch64_strip_shift_or_extend (op1); - *cost += rtx_cost (op1, MINUS, 1, speed); - } - return true; - } + if (speed) + /* SUB(S) (immediate). */ + *cost += extra_cost->alu.arith; + return true; - return false; + } + /* Look for SUB (extended register). */ + if (aarch64_rtx_arith_op_extract_p (op1, mode)) + { + if (speed) + *cost += extra_cost->alu.arith_shift; + + *cost += rtx_cost (XEXP (XEXP (op1, 0), 0), + (enum rtx_code) GET_CODE (op1), + 0, speed); + return true; + } + + rtx new_op1 = aarch64_strip_extend (op1); + + /* Cost this as an FMA-alike operation. */ + if ((GET_CODE (new_op1) == MULT + || GET_CODE (new_op1) == ASHIFT) + && code != COMPARE) + { + *cost += aarch64_rtx_mult_cost (new_op1, MULT, + (enum rtx_code) code, + speed); + *cost += rtx_cost (op0, MINUS, 0, speed); + return true; + } + + *cost += rtx_cost (new_op1, MINUS, 1, speed); + + if (speed) + { + if (GET_MODE_CLASS (mode) == MODE_INT) + /* SUB(S). */ + *cost += extra_cost->alu.arith; + else if (GET_MODE_CLASS (mode) == MODE_FLOAT) + /* FSUB. */ + *cost += extra_cost->fp[mode == DFmode].addsub; + } + return true; + } + case PLUS: - op0 = XEXP (x, 0); - op1 = XEXP (x, 1); + { + rtx new_op0; - if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) - { - if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1))) - { - *cost += rtx_cost (op0, PLUS, 0, speed); - } - else - { - rtx new_op0 = aarch64_strip_shift_or_extend (op0); + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); - if (new_op0 == op0 - && GET_CODE (op0) == MULT) - { - if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND - && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND) - || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND - && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND)) - { - *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0, - speed) - + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1, - speed) - + rtx_cost (op1, PLUS, 1, speed)); - if (speed) - *cost += - extra_cost->mult[GET_MODE (x) == DImode].extend_add; - return true; - } +cost_plus: + if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE + || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE) + { + /* CSINC. */ + *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed); + *cost += rtx_cost (op1, PLUS, 1, speed); + return true; + } - *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed) - + rtx_cost (XEXP (op0, 1), MULT, 1, speed) - + rtx_cost (op1, PLUS, 1, speed)); + if (GET_MODE_CLASS (mode) == MODE_INT + && CONST_INT_P (op1) + && aarch64_uimm12_shift (INTVAL (op1))) + { + *cost += rtx_cost (op0, PLUS, 0, speed); - if (speed) - *cost += extra_cost->mult[GET_MODE (x) == DImode].add; + if (speed) + /* ADD (immediate). */ + *cost += extra_cost->alu.arith; + return true; + } - return true; - } + /* Look for ADD (extended register). */ + if (aarch64_rtx_arith_op_extract_p (op0, mode)) + { + if (speed) + *cost += extra_cost->alu.arith_shift; - *cost += (rtx_cost (new_op0, PLUS, 0, speed) - + rtx_cost (op1, PLUS, 1, speed)); - } - return true; - } + *cost += rtx_cost (XEXP (XEXP (op0, 0), 0), + (enum rtx_code) GET_CODE (op0), + 0, speed); + return true; + } + /* Strip any extend, leave shifts behind as we will + cost them through mult_cost. */ + new_op0 = aarch64_strip_extend (op0); + + if (GET_CODE (new_op0) == MULT + || GET_CODE (new_op0) == ASHIFT) + { + *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS, + speed); + *cost += rtx_cost (op1, PLUS, 1, speed); + return true; + } + + *cost += (rtx_cost (new_op0, PLUS, 0, speed) + + rtx_cost (op1, PLUS, 1, speed)); + + if (speed) + { + if (GET_MODE_CLASS (mode) == MODE_INT) + /* ADD. */ + *cost += extra_cost->alu.arith; + else if (GET_MODE_CLASS (mode) == MODE_FLOAT) + /* FADD. */ + *cost += extra_cost->fp[mode == DFmode].addsub; + } + return true; + } + + case BSWAP: + *cost = COSTS_N_INSNS (1); + + if (speed) + *cost += extra_cost->alu.rev; + return false; case IOR: + if (aarch_rev16_p (x)) + { + *cost = COSTS_N_INSNS (1); + + if (speed) + *cost += extra_cost->alu.rev; + + return true; + } + /* Fall through. */ case XOR: case AND: cost_logic: @@ -4673,117 +5382,252 @@ op0 = XEXP (x, 0); op1 = XEXP (x, 1); + if (code == AND + && GET_CODE (op0) == MULT + && CONST_INT_P (XEXP (op0, 1)) + && CONST_INT_P (op1) + && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))), + INTVAL (op1)) != 0) + { + /* This is a UBFM/SBFM. */ + *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed); + if (speed) + *cost += extra_cost->alu.bfx; + return true; + } + if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) { + /* We possibly get the immediate for free, this is not + modelled. */ if (CONST_INT_P (op1) && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x))) { - *cost += rtx_cost (op0, AND, 0, speed); + *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed); + + if (speed) + *cost += extra_cost->alu.logical; + + return true; } else { + rtx new_op0 = op0; + + /* Handle ORN, EON, or BIC. */ if (GET_CODE (op0) == NOT) op0 = XEXP (op0, 0); - op0 = aarch64_strip_shift (op0); - *cost += (rtx_cost (op0, AND, 0, speed) - + rtx_cost (op1, AND, 1, speed)); + + new_op0 = aarch64_strip_shift (op0); + + /* If we had a shift on op0 then this is a logical-shift- + by-register/immediate operation. Otherwise, this is just + a logical operation. */ + if (speed) + { + if (new_op0 != op0) + { + /* Shift by immediate. */ + if (CONST_INT_P (XEXP (op0, 1))) + *cost += extra_cost->alu.log_shift; + else + *cost += extra_cost->alu.log_shift_reg; + } + else + *cost += extra_cost->alu.logical; + } + + /* In both cases we want to cost both operands. */ + *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed) + + rtx_cost (op1, (enum rtx_code) code, 1, speed); + + return true; } - return true; } return false; + case NOT: + /* MVN. */ + if (speed) + *cost += extra_cost->alu.logical; + + /* The logical instruction could have the shifted register form, + but the cost is the same if the shift is processed as a separate + instruction, so we don't bother with it here. */ + return false; + case ZERO_EXTEND: - if ((GET_MODE (x) == DImode - && GET_MODE (XEXP (x, 0)) == SImode) - || GET_CODE (XEXP (x, 0)) == MEM) + + op0 = XEXP (x, 0); + /* If a value is written in SI mode, then zero extended to DI + mode, the operation will in general be free as a write to + a 'w' register implicitly zeroes the upper bits of an 'x' + register. However, if this is + + (set (reg) (zero_extend (reg))) + + we must cost the explicit register move. */ + if (mode == DImode + && GET_MODE (op0) == SImode + && outer == SET) { - *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed); + int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed); + + if (!op_cost && speed) + /* MOV. */ + *cost += extra_cost->alu.extend; + else + /* Free, the cost is that of the SI mode operation. */ + *cost = op_cost; + return true; } + else if (MEM_P (XEXP (x, 0))) + { + /* All loads can zero extend to any size for free. */ + *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed); + return true; + } + + /* UXTB/UXTH. */ + if (speed) + *cost += extra_cost->alu.extend; + return false; case SIGN_EXTEND: - if (GET_CODE (XEXP (x, 0)) == MEM) + if (MEM_P (XEXP (x, 0))) { - *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed); + /* LDRSH. */ + if (speed) + { + rtx address = XEXP (XEXP (x, 0), 0); + *cost += extra_cost->ldst.load_sign_extend; + + *cost += + COSTS_N_INSNS (aarch64_address_cost (address, mode, + 0, speed)); + } return true; } + + if (speed) + *cost += extra_cost->alu.extend; return false; + case ASHIFT: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + + if (CONST_INT_P (op1)) + { + /* LSL (immediate), UBMF, UBFIZ and friends. These are all + aliases. */ + if (speed) + *cost += extra_cost->alu.shift; + + /* We can incorporate zero/sign extend for free. */ + if (GET_CODE (op0) == ZERO_EXTEND + || GET_CODE (op0) == SIGN_EXTEND) + op0 = XEXP (op0, 0); + + *cost += rtx_cost (op0, ASHIFT, 0, speed); + return true; + } + else + { + /* LSLV. */ + if (speed) + *cost += extra_cost->alu.shift_reg; + + return false; /* All arguments need to be in registers. */ + } + case ROTATE: - if (!CONST_INT_P (XEXP (x, 1))) - *cost += COSTS_N_INSNS (2); - /* Fall through. */ case ROTATERT: case LSHIFTRT: - case ASHIFT: case ASHIFTRT: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); - /* Shifting by a register often takes an extra cycle. */ - if (speed && !CONST_INT_P (XEXP (x, 1))) - *cost += extra_cost->alu.arith_shift_reg; + if (CONST_INT_P (op1)) + { + /* ASR (immediate) and friends. */ + if (speed) + *cost += extra_cost->alu.shift; - *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed); + *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed); + return true; + } + else + { + + /* ASR (register) and friends. */ + if (speed) + *cost += extra_cost->alu.shift_reg; + + return false; /* All arguments need to be in registers. */ + } + + case SYMBOL_REF: + + if (aarch64_cmodel == AARCH64_CMODEL_LARGE) + { + /* LDR. */ + if (speed) + *cost += extra_cost->ldst.load; + } + else if (aarch64_cmodel == AARCH64_CMODEL_SMALL + || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC) + { + /* ADRP, followed by ADD. */ + *cost += COSTS_N_INSNS (1); + if (speed) + *cost += 2 * extra_cost->alu.arith; + } + else if (aarch64_cmodel == AARCH64_CMODEL_TINY + || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC) + { + /* ADR. */ + if (speed) + *cost += extra_cost->alu.arith; + } + + if (flag_pic) + { + /* One extra load instruction, after accessing the GOT. */ + *cost += COSTS_N_INSNS (1); + if (speed) + *cost += extra_cost->ldst.load; + } return true; case HIGH: - if (!CONSTANT_P (XEXP (x, 0))) - *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed); - return true; - case LO_SUM: - if (!CONSTANT_P (XEXP (x, 1))) - *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed); - *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed); + /* ADRP/ADD (immediate). */ + if (speed) + *cost += extra_cost->alu.arith; return true; case ZERO_EXTRACT: case SIGN_EXTRACT: - *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed); + /* UBFX/SBFX. */ + if (speed) + *cost += extra_cost->alu.bfx; + + /* We can trust that the immediates used will be correct (there + are no by-register forms), so we need only cost op0. */ + *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed); return true; case MULT: - op0 = XEXP (x, 0); - op1 = XEXP (x, 1); + *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed); + /* aarch64_rtx_mult_cost always handles recursion to its + operands. */ + return true; - *cost = COSTS_N_INSNS (1); - if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) - { - if (CONST_INT_P (op1) - && exact_log2 (INTVAL (op1)) > 0) - { - *cost += rtx_cost (op0, ASHIFT, 0, speed); - return true; - } - - if ((GET_CODE (op0) == ZERO_EXTEND - && GET_CODE (op1) == ZERO_EXTEND) - || (GET_CODE (op0) == SIGN_EXTEND - && GET_CODE (op1) == SIGN_EXTEND)) - { - *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed) - + rtx_cost (XEXP (op1, 0), MULT, 1, speed)); - if (speed) - *cost += extra_cost->mult[GET_MODE (x) == DImode].extend; - return true; - } - - if (speed) - *cost += extra_cost->mult[GET_MODE (x) == DImode].simple; - } - else if (speed) - { - if (GET_MODE (x) == DFmode) - *cost += extra_cost->fp[1].mult; - else if (GET_MODE (x) == SFmode) - *cost += extra_cost->fp[0].mult; - } - - return false; /* All arguments need to be in registers. */ - case MOD: case UMOD: - *cost = COSTS_N_INSNS (2); if (speed) { if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) @@ -4800,53 +5644,179 @@ case DIV: case UDIV: - *cost = COSTS_N_INSNS (1); + case SQRT: if (speed) { - if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) - *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv; - else if (GET_MODE (x) == DFmode) - *cost += extra_cost->fp[1].div; - else if (GET_MODE (x) == SFmode) - *cost += extra_cost->fp[0].div; + if (GET_MODE_CLASS (mode) == MODE_INT) + /* There is no integer SQRT, so only DIV and UDIV can get + here. */ + *cost += extra_cost->mult[mode == DImode].idiv; + else + *cost += extra_cost->fp[mode == DFmode].div; } return false; /* All arguments need to be in registers. */ + case IF_THEN_ELSE: + return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1), + XEXP (x, 2), cost, speed); + + case EQ: + case NE: + case GT: + case GTU: + case LT: + case LTU: + case GE: + case GEU: + case LE: + case LEU: + + return false; /* All arguments must be in registers. */ + + case FMA: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + op2 = XEXP (x, 2); + + if (speed) + *cost += extra_cost->fp[mode == DFmode].fma; + + /* FMSUB, FNMADD, and FNMSUB are free. */ + if (GET_CODE (op0) == NEG) + op0 = XEXP (op0, 0); + + if (GET_CODE (op2) == NEG) + op2 = XEXP (op2, 0); + + /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1, + and the by-element operand as operand 0. */ + if (GET_CODE (op1) == NEG) + op1 = XEXP (op1, 0); + + /* Catch vector-by-element operations. The by-element operand can + either be (vec_duplicate (vec_select (x))) or just + (vec_select (x)), depending on whether we are multiplying by + a vector or a scalar. + + Canonicalization is not very good in these cases, FMA4 will put the + by-element operand as operand 0, FNMA4 will have it as operand 1. */ + if (GET_CODE (op0) == VEC_DUPLICATE) + op0 = XEXP (op0, 0); + else if (GET_CODE (op1) == VEC_DUPLICATE) + op1 = XEXP (op1, 0); + + if (GET_CODE (op0) == VEC_SELECT) + op0 = XEXP (op0, 0); + else if (GET_CODE (op1) == VEC_SELECT) + op1 = XEXP (op1, 0); + + /* If the remaining parameters are not registers, + get the cost to put them into registers. */ + *cost += rtx_cost (op0, FMA, 0, speed); + *cost += rtx_cost (op1, FMA, 1, speed); + *cost += rtx_cost (op2, FMA, 2, speed); + return true; + + case FLOAT_EXTEND: + if (speed) + *cost += extra_cost->fp[mode == DFmode].widen; + return false; + + case FLOAT_TRUNCATE: + if (speed) + *cost += extra_cost->fp[mode == DFmode].narrow; + return false; + + case ABS: + if (GET_MODE_CLASS (mode) == MODE_FLOAT) + { + /* FABS and FNEG are analogous. */ + if (speed) + *cost += extra_cost->fp[mode == DFmode].neg; + } + else + { + /* Integer ABS will either be split to + two arithmetic instructions, or will be an ABS + (scalar), which we don't model. */ + *cost = COSTS_N_INSNS (2); + if (speed) + *cost += 2 * extra_cost->alu.arith; + } + return false; + + case SMAX: + case SMIN: + if (speed) + { + /* FMAXNM/FMINNM/FMAX/FMIN. + TODO: This may not be accurate for all implementations, but + we do not model this in the cost tables. */ + *cost += extra_cost->fp[mode == DFmode].addsub; + } + return false; + + case TRUNCATE: + + /* Decompose muldi3_highpart. */ + if (/* (truncate:DI */ + mode == DImode + /* (lshiftrt:TI */ + && GET_MODE (XEXP (x, 0)) == TImode + && GET_CODE (XEXP (x, 0)) == LSHIFTRT + /* (mult:TI */ + && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT + /* (ANY_EXTEND:TI (reg:DI)) + (ANY_EXTEND:TI (reg:DI))) */ + && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND + && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND) + || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND + && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)) + && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode + && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode + /* (const_int 64) */ + && CONST_INT_P (XEXP (XEXP (x, 0), 1)) + && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64) + { + /* UMULH/SMULH. */ + if (speed) + *cost += extra_cost->mult[mode == DImode].extend; + *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0), + MULT, 0, speed); + *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0), + MULT, 1, speed); + return true; + } + + /* Fall through. */ default: - break; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "\nFailed to cost RTX. Assuming default cost.\n"); + + return true; } return false; } -static int -aarch64_address_cost (rtx x ATTRIBUTE_UNUSED, - enum machine_mode mode ATTRIBUTE_UNUSED, - addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED) +/* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost + calculated for X. This cost is stored in *COST. Returns true + if the total cost of X was calculated. */ +static bool +aarch64_rtx_costs_wrapper (rtx x, int code, int outer, + int param, int *cost, bool speed) { - enum rtx_code c = GET_CODE (x); - const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost; + bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed); - if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY) - return addr_cost->pre_modify; - - if (c == POST_INC || c == POST_DEC || c == POST_MODIFY) - return addr_cost->post_modify; - - if (c == PLUS) + if (dump_file && (dump_flags & TDF_DETAILS)) { - if (GET_CODE (XEXP (x, 1)) == CONST_INT) - return addr_cost->imm_offset; - else if (GET_CODE (XEXP (x, 0)) == MULT - || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND - || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND) - return addr_cost->register_extend; - - return addr_cost->register_offset; + print_rtl_single (dump_file, x); + fprintf (dump_file, "\n%s cost: %d (%s)\n", + speed ? "Hot" : "Cold", + *cost, result ? "final" : "partial"); } - else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF) - return addr_cost->imm_offset; - return 0; + return result; } static int @@ -8038,7 +9008,145 @@ return true; } +/* Recognize patterns for the EXT insn. */ + static bool +aarch64_evpc_ext (struct expand_vec_perm_d *d) +{ + unsigned int i, nelt = d->nelt; + rtx (*gen) (rtx, rtx, rtx, rtx); + rtx offset; + + unsigned int location = d->perm[0]; /* Always < nelt. */ + + /* Check if the extracted indices are increasing by one. */ + for (i = 1; i < nelt; i++) + { + unsigned int required = location + i; + if (d->one_vector_p) + { + /* We'll pass the same vector in twice, so allow indices to wrap. */ + required &= (nelt - 1); + } + if (d->perm[i] != required) + return false; + } + + switch (d->vmode) + { + case V16QImode: gen = gen_aarch64_extv16qi; break; + case V8QImode: gen = gen_aarch64_extv8qi; break; + case V4HImode: gen = gen_aarch64_extv4hi; break; + case V8HImode: gen = gen_aarch64_extv8hi; break; + case V2SImode: gen = gen_aarch64_extv2si; break; + case V4SImode: gen = gen_aarch64_extv4si; break; + case V2SFmode: gen = gen_aarch64_extv2sf; break; + case V4SFmode: gen = gen_aarch64_extv4sf; break; + case V2DImode: gen = gen_aarch64_extv2di; break; + case V2DFmode: gen = gen_aarch64_extv2df; break; + default: + return false; + } + + /* Success! */ + if (d->testing_p) + return true; + + /* The case where (location == 0) is a no-op for both big- and little-endian, + and is removed by the mid-end at optimization levels -O1 and higher. */ + + if (BYTES_BIG_ENDIAN && (location != 0)) + { + /* After setup, we want the high elements of the first vector (stored + at the LSB end of the register), and the low elements of the second + vector (stored at the MSB end of the register). So swap. */ + rtx temp = d->op0; + d->op0 = d->op1; + d->op1 = temp; + /* location != 0 (above), so safe to assume (nelt - location) < nelt. */ + location = nelt - location; + } + + offset = GEN_INT (location); + emit_insn (gen (d->target, d->op0, d->op1, offset)); + return true; +} + +/* Recognize patterns for the REV insns. */ + +static bool +aarch64_evpc_rev (struct expand_vec_perm_d *d) +{ + unsigned int i, j, diff, nelt = d->nelt; + rtx (*gen) (rtx, rtx); + + if (!d->one_vector_p) + return false; + + diff = d->perm[0]; + switch (diff) + { + case 7: + switch (d->vmode) + { + case V16QImode: gen = gen_aarch64_rev64v16qi; break; + case V8QImode: gen = gen_aarch64_rev64v8qi; break; + default: + return false; + } + break; + case 3: + switch (d->vmode) + { + case V16QImode: gen = gen_aarch64_rev32v16qi; break; + case V8QImode: gen = gen_aarch64_rev32v8qi; break; + case V8HImode: gen = gen_aarch64_rev64v8hi; break; + case V4HImode: gen = gen_aarch64_rev64v4hi; break; + default: + return false; + } + break; + case 1: + switch (d->vmode) + { + case V16QImode: gen = gen_aarch64_rev16v16qi; break; + case V8QImode: gen = gen_aarch64_rev16v8qi; break; + case V8HImode: gen = gen_aarch64_rev32v8hi; break; + case V4HImode: gen = gen_aarch64_rev32v4hi; break; + case V4SImode: gen = gen_aarch64_rev64v4si; break; + case V2SImode: gen = gen_aarch64_rev64v2si; break; + case V4SFmode: gen = gen_aarch64_rev64v4sf; break; + case V2SFmode: gen = gen_aarch64_rev64v2sf; break; + default: + return false; + } + break; + default: + return false; + } + + for (i = 0; i < nelt ; i += diff + 1) + for (j = 0; j <= diff; j += 1) + { + /* This is guaranteed to be true as the value of diff + is 7, 3, 1 and we should have enough elements in the + queue to generate this. Getting a vector mask with a + value of diff other than these values implies that + something is wrong by the time we get here. */ + gcc_assert (i + j < nelt); + if (d->perm[i + j] != i + diff - j) + return false; + } + + /* Success! */ + if (d->testing_p) + return true; + + emit_insn (gen (d->target, d->op0)); + return true; +} + +static bool aarch64_evpc_dup (struct expand_vec_perm_d *d) { rtx (*gen) (rtx, rtx, rtx); @@ -8094,11 +9202,6 @@ enum machine_mode vmode = d->vmode; unsigned int i, nelt = d->nelt; - /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's - numbering of elements for big-endian, we must reverse the order. */ - if (BYTES_BIG_ENDIAN) - return false; - if (d->testing_p) return true; @@ -8109,7 +9212,15 @@ return false; for (i = 0; i < nelt; ++i) - rperm[i] = GEN_INT (d->perm[i]); + { + int nunits = GET_MODE_NUNITS (vmode); + + /* If big-endian and two vectors we end up with a weird mixed-endian + mode on NEON. Reverse the index within each word but not the word + itself. */ + rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1) + : d->perm[i]); + } sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm)); sel = force_reg (vmode, sel); @@ -8138,8 +9249,12 @@ if (TARGET_SIMD) { - if (aarch64_evpc_zip (d)) + if (aarch64_evpc_rev (d)) return true; + else if (aarch64_evpc_ext (d)) + return true; + else if (aarch64_evpc_zip (d)) + return true; else if (aarch64_evpc_uzp (d)) return true; else if (aarch64_evpc_trn (d)) @@ -8264,7 +9379,8 @@ /* Limited combinations of subregs are safe on FPREGs. Particularly, 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed. 2. Scalar to Scalar for integer modes or same size float modes. - 3. Vector to Vector modes. */ + 3. Vector to Vector modes. + 4. On little-endian only, Vector-Structure to Vector modes. */ if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to)) { if (aarch64_vector_mode_supported_p (from) @@ -8280,11 +9396,199 @@ if (aarch64_vector_mode_supported_p (from) && aarch64_vector_mode_supported_p (to)) return false; + + /* Within an vector structure straddling multiple vector registers + we are in a mixed-endian representation. As such, we can't + easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can + switch between vectors and vector structures cheaply. */ + if (!BYTES_BIG_ENDIAN) + if ((aarch64_vector_mode_supported_p (from) + && aarch64_vect_struct_mode_p (to)) + || (aarch64_vector_mode_supported_p (to) + && aarch64_vect_struct_mode_p (from))) + return false; } return true; } +/* Implement MODES_TIEABLE_P. */ + +bool +aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) +{ + if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)) + return true; + + /* We specifically want to allow elements of "structure" modes to + be tieable to the structure. This more general condition allows + other rarer situations too. */ + if (TARGET_SIMD + && aarch64_vector_mode_p (mode1) + && aarch64_vector_mode_p (mode2)) + return true; + + return false; +} + +/* Return a new RTX holding the result of moving POINTER forward by + AMOUNT bytes. */ + +static rtx +aarch64_move_pointer (rtx pointer, int amount) +{ + rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount); + + return adjust_automodify_address (pointer, GET_MODE (pointer), + next, amount); +} + +/* Return a new RTX holding the result of moving POINTER forward by the + size of the mode it points to. */ + +static rtx +aarch64_progress_pointer (rtx pointer) +{ + HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer)); + + return aarch64_move_pointer (pointer, amount); +} + +/* Copy one MODE sized block from SRC to DST, then progress SRC and DST by + MODE bytes. */ + +static void +aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst, + enum machine_mode mode) +{ + rtx reg = gen_reg_rtx (mode); + + /* "Cast" the pointers to the correct mode. */ + *src = adjust_address (*src, mode, 0); + *dst = adjust_address (*dst, mode, 0); + /* Emit the memcpy. */ + emit_move_insn (reg, *src); + emit_move_insn (*dst, reg); + /* Move the pointers forward. */ + *src = aarch64_progress_pointer (*src); + *dst = aarch64_progress_pointer (*dst); +} + +/* Expand movmem, as if from a __builtin_memcpy. Return true if + we succeed, otherwise return false. */ + +bool +aarch64_expand_movmem (rtx *operands) +{ + unsigned int n; + rtx dst = operands[0]; + rtx src = operands[1]; + rtx base; + bool speed_p = !optimize_function_for_size_p (cfun); + + /* When optimizing for size, give a better estimate of the length of a + memcpy call, but use the default otherwise. */ + unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2; + + /* We can't do anything smart if the amount to copy is not constant. */ + if (!CONST_INT_P (operands[2])) + return false; + + n = UINTVAL (operands[2]); + + /* Try to keep the number of instructions low. For cases below 16 bytes we + need to make at most two moves. For cases above 16 bytes it will be one + move for each 16 byte chunk, then at most two additional moves. */ + if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions) + return false; + + base = copy_to_mode_reg (Pmode, XEXP (dst, 0)); + dst = adjust_automodify_address (dst, VOIDmode, base, 0); + + base = copy_to_mode_reg (Pmode, XEXP (src, 0)); + src = adjust_automodify_address (src, VOIDmode, base, 0); + + /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a + 1-byte chunk. */ + if (n < 4) + { + if (n >= 2) + { + aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode); + n -= 2; + } + + if (n == 1) + aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode); + + return true; + } + + /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second + 4-byte chunk, partially overlapping with the previously copied chunk. */ + if (n < 8) + { + aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode); + n -= 4; + if (n > 0) + { + int move = n - 4; + + src = aarch64_move_pointer (src, move); + dst = aarch64_move_pointer (dst, move); + aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode); + } + return true; + } + + /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of + them, then (if applicable) an 8-byte chunk. */ + while (n >= 8) + { + if (n / 16) + { + aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode); + n -= 16; + } + else + { + aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode); + n -= 8; + } + } + + /* Finish the final bytes of the copy. We can always do this in one + instruction. We either copy the exact amount we need, or partially + overlap with the previous chunk we copied and copy 8-bytes. */ + if (n == 0) + return true; + else if (n == 1) + aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode); + else if (n == 2) + aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode); + else if (n == 4) + aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode); + else + { + if (n == 3) + { + src = aarch64_move_pointer (src, -1); + dst = aarch64_move_pointer (dst, -1); + aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode); + } + else + { + int move = n - 8; + + src = aarch64_move_pointer (src, move); + dst = aarch64_move_pointer (dst, move); + aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode); + } + } + + return true; +} + #undef TARGET_ADDRESS_COST #define TARGET_ADDRESS_COST aarch64_address_cost @@ -8455,7 +9759,7 @@ #define TARGET_RETURN_IN_MSB aarch64_return_in_msb #undef TARGET_RTX_COSTS -#define TARGET_RTX_COSTS aarch64_rtx_costs +#define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper #undef TARGET_SCHED_ISSUE_RATE #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate @@ -8493,6 +9797,10 @@ #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \ aarch64_autovectorize_vector_sizes +#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV +#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \ + aarch64_atomic_assign_expand_fenv + /* Section anchor support. */ #undef TARGET_MIN_ANCHOR_OFFSET --- a/src/gcc/config/aarch64/aarch64-linux.h +++ b/src/gcc/config/aarch64/aarch64-linux.h @@ -21,7 +21,7 @@ #ifndef GCC_AARCH64_LINUX_H #define GCC_AARCH64_LINUX_H -#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}.so.1" +#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-aarch64%{mbig-endian:_be}%{mabi=ilp32:_ilp32}.so.1" #define CPP_SPEC "%{pthread:-D_REENTRANT}" @@ -33,7 +33,7 @@ -dynamic-linker " GNU_USER_DYNAMIC_LINKER " \ -X \ %{mbig-endian:-EB} %{mlittle-endian:-EL} \ - -maarch64linux%{mbig-endian:b}" + -maarch64linux%{mabi=ilp32:32}%{mbig-endian:b}" #define LINK_SPEC LINUX_TARGET_LINK_SPEC --- a/src/gcc/config/aarch64/iterators.md +++ b/src/gcc/config/aarch64/iterators.md @@ -150,6 +150,9 @@ ;; Vector modes for H and S types. (define_mode_iterator VDQHS [V4HI V8HI V2SI V4SI]) +;; Vector modes for H, S and D types. +(define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI]) + ;; Vector modes for Q, H and S types. (define_mode_iterator VDQQHS [V8QI V16QI V4HI V8HI V2SI V4SI]) @@ -267,6 +270,10 @@ UNSPEC_UZP2 ; Used in vector permute patterns. UNSPEC_TRN1 ; Used in vector permute patterns. UNSPEC_TRN2 ; Used in vector permute patterns. + UNSPEC_EXT ; Used in aarch64-simd.md. + UNSPEC_REV64 ; Used in vector reverse patterns (permute). + UNSPEC_REV32 ; Used in vector reverse patterns (permute). + UNSPEC_REV16 ; Used in vector reverse patterns (permute). UNSPEC_AESE ; Used in aarch64-simd.md. UNSPEC_AESD ; Used in aarch64-simd.md. UNSPEC_AESMC ; Used in aarch64-simd.md. @@ -352,6 +359,9 @@ (V2DI "2d") (V2SF "2s") (V4SF "4s") (V2DF "2d")]) +(define_mode_attr Vrevsuff [(V4HI "16") (V8HI "16") (V2SI "32") + (V4SI "32") (V2DI "64")]) + (define_mode_attr Vmtype [(V8QI ".8b") (V16QI ".16b") (V4HI ".4h") (V8HI ".8h") (V2SI ".2s") (V4SI ".4s") @@ -546,6 +556,32 @@ (define_mode_attr VSTRUCT_DREG [(OI "TI") (CI "EI") (XI "OI")]) +;; Mode of pair of elements for each vector mode, to define transfer +;; size for structure lane/dup loads and stores. +(define_mode_attr V_TWO_ELEM [(V8QI "HI") (V16QI "HI") + (V4HI "SI") (V8HI "SI") + (V2SI "V2SI") (V4SI "V2SI") + (DI "V2DI") (V2DI "V2DI") + (V2SF "V2SF") (V4SF "V2SF") + (DF "V2DI") (V2DF "V2DI")]) + +;; Similar, for three elements. +(define_mode_attr V_THREE_ELEM [(V8QI "BLK") (V16QI "BLK") + (V4HI "BLK") (V8HI "BLK") + (V2SI "BLK") (V4SI "BLK") + (DI "EI") (V2DI "EI") + (V2SF "BLK") (V4SF "BLK") + (DF "EI") (V2DF "EI")]) + +;; Similar, for four elements. +(define_mode_attr V_FOUR_ELEM [(V8QI "SI") (V16QI "SI") + (V4HI "V4HI") (V8HI "V4HI") + (V2SI "V4SI") (V4SI "V4SI") + (DI "OI") (V2DI "OI") + (V2SF "V4SF") (V4SF "V4SF") + (DF "OI") (V2DF "OI")]) + + ;; Mode for atomic operation suffixes (define_mode_attr atomic_sfx [(QI "b") (HI "h") (SI "") (DI "")]) @@ -847,6 +883,8 @@ UNSPEC_TRN1 UNSPEC_TRN2 UNSPEC_UZP1 UNSPEC_UZP2]) +(define_int_iterator REVERSE [UNSPEC_REV64 UNSPEC_REV32 UNSPEC_REV16]) + (define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM UNSPEC_FRINTN UNSPEC_FRINTI UNSPEC_FRINTX UNSPEC_FRINTA]) @@ -856,6 +894,10 @@ (define_int_iterator FRECP [UNSPEC_FRECPE UNSPEC_FRECPX]) +(define_int_iterator CRC [UNSPEC_CRC32B UNSPEC_CRC32H UNSPEC_CRC32W + UNSPEC_CRC32X UNSPEC_CRC32CB UNSPEC_CRC32CH + UNSPEC_CRC32CW UNSPEC_CRC32CX]) + (define_int_iterator CRYPTO_AES [UNSPEC_AESE UNSPEC_AESD]) (define_int_iterator CRYPTO_AESMC [UNSPEC_AESMC UNSPEC_AESIMC]) @@ -974,6 +1016,10 @@ (UNSPEC_TRN1 "trn") (UNSPEC_TRN2 "trn") (UNSPEC_UZP1 "uzp") (UNSPEC_UZP2 "uzp")]) +; op code for REV instructions (size within which elements are reversed). +(define_int_attr rev_op [(UNSPEC_REV64 "64") (UNSPEC_REV32 "32") + (UNSPEC_REV16 "16")]) + (define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2") (UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2") (UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")]) @@ -980,6 +1026,16 @@ (define_int_attr frecp_suffix [(UNSPEC_FRECPE "e") (UNSPEC_FRECPX "x")]) +(define_int_attr crc_variant [(UNSPEC_CRC32B "crc32b") (UNSPEC_CRC32H "crc32h") + (UNSPEC_CRC32W "crc32w") (UNSPEC_CRC32X "crc32x") + (UNSPEC_CRC32CB "crc32cb") (UNSPEC_CRC32CH "crc32ch") + (UNSPEC_CRC32CW "crc32cw") (UNSPEC_CRC32CX "crc32cx")]) + +(define_int_attr crc_mode [(UNSPEC_CRC32B "QI") (UNSPEC_CRC32H "HI") + (UNSPEC_CRC32W "SI") (UNSPEC_CRC32X "DI") + (UNSPEC_CRC32CB "QI") (UNSPEC_CRC32CH "HI") + (UNSPEC_CRC32CW "SI") (UNSPEC_CRC32CX "DI")]) + (define_int_attr aes_op [(UNSPEC_AESE "e") (UNSPEC_AESD "d")]) (define_int_attr aesmc_op [(UNSPEC_AESMC "mc") (UNSPEC_AESIMC "imc")]) --- a/src/gcc/config/aarch64/aarch64.h +++ b/src/gcc/config/aarch64/aarch64.h @@ -35,6 +35,9 @@ if (TARGET_SIMD) \ builtin_define ("__ARM_NEON"); \ \ + if (TARGET_CRC32) \ + builtin_define ("__ARM_FEATURE_CRC32"); \ + \ switch (aarch64_cmodel) \ { \ case AARCH64_CMODEL_TINY: \ @@ -188,6 +191,9 @@ /* Crypto is an optional extension to AdvSIMD. */ #define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO) +/* CRC instructions that can be enabled through +crc arch extension. */ +#define TARGET_CRC32 (AARCH64_ISA_CRC) + /* Standard register usage. */ /* 31 64-bit general purpose registers R0-R30: @@ -365,8 +371,7 @@ #define HARD_REGNO_MODE_OK(REGNO, MODE) aarch64_hard_regno_mode_ok (REGNO, MODE) -#define MODES_TIEABLE_P(MODE1, MODE2) \ - (GET_MODE_CLASS (MODE1) == GET_MODE_CLASS (MODE2)) +#define MODES_TIEABLE_P(MODE1, MODE2) aarch64_modes_tieable_p (MODE1, MODE2) #define DWARF2_UNWIND_INFO 1 @@ -409,6 +414,7 @@ enum reg_class { NO_REGS, + CALLER_SAVE_REGS, CORE_REGS, GENERAL_REGS, STACK_REG, @@ -424,6 +430,7 @@ #define REG_CLASS_NAMES \ { \ "NO_REGS", \ + "CALLER_SAVE_REGS", \ "CORE_REGS", \ "GENERAL_REGS", \ "STACK_REG", \ @@ -436,6 +443,7 @@ #define REG_CLASS_CONTENTS \ { \ { 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \ + { 0x0007ffff, 0x00000000, 0x00000000 }, /* CALLER_SAVE_REGS */ \ { 0x7fffffff, 0x00000000, 0x00000003 }, /* CORE_REGS */ \ { 0x7fffffff, 0x00000000, 0x00000003 }, /* GENERAL_REGS */ \ { 0x80000000, 0x00000000, 0x00000000 }, /* STACK_REG */ \ @@ -520,7 +528,6 @@ been saved. */ HOST_WIDE_INT padding0; HOST_WIDE_INT hardfp_offset; /* HARD_FRAME_POINTER_REGNUM */ - HOST_WIDE_INT fp_lr_offset; /* Space needed for saving fp and/or lr */ bool laid_out; }; @@ -661,12 +668,14 @@ /* The base cost overhead of a memcpy call, for MOVE_RATIO and friends. */ #define AARCH64_CALL_RATIO 8 -/* When optimizing for size, give a better estimate of the length of a memcpy - call, but use the default otherwise. But move_by_pieces_ninsns() counts - memory-to-memory moves, and we'll have to generate a load & store for each, - so halve the value to take that into account. */ +/* MOVE_RATIO dictates when we will use the move_by_pieces infrastructure. + move_by_pieces will continually copy the largest safe chunks. So a + 7-byte copy is a 4-byte + 2-byte + byte copy. This proves inefficient + for both size and speed of copy, so we will instead use the "movmem" + standard name to implement the copy. This logic does not apply when + targeting -mstrict-align, so keep a sensible default in that case. */ #define MOVE_RATIO(speed) \ - (((speed) ? 15 : AARCH64_CALL_RATIO) / 2) + (!STRICT_ALIGNMENT ? 2 : (((speed) ? 15 : AARCH64_CALL_RATIO) / 2)) /* For CLEAR_RATIO, when optimizing for size, give a better estimate of the length of a memset call, but use the default otherwise. */ @@ -826,6 +835,11 @@ #define SHIFT_COUNT_TRUNCATED !TARGET_SIMD +/* Choose appropriate mode for caller saves, so we do the minimum + required size of load/store. */ +#define HARD_REGNO_CALLER_SAVE_MODE(REGNO, NREGS, MODE) \ + aarch64_hard_regno_caller_save_mode ((REGNO), (NREGS), (MODE)) + /* Callee only saves lower 64-bits of a 128-bit register. Tell the compiler the callee clobbers the top 64-bits when restoring the bottom 64-bits. */ --- a/src/gcc/config/arm/aarch-cost-tables.h +++ b/src/gcc/config/arm/aarch-cost-tables.h @@ -39,6 +39,7 @@ 0, /* bfi. */ 0, /* bfx. */ 0, /* clz. */ + 0, /* rev. */ COSTS_N_INSNS (1), /* non_exec. */ false /* non_exec_costs_exec. */ }, @@ -139,6 +140,7 @@ COSTS_N_INSNS (1), /* bfi. */ COSTS_N_INSNS (1), /* bfx. */ 0, /* clz. */ + 0, /* rev. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -239,6 +241,7 @@ COSTS_N_INSNS (1), /* bfi. */ 0, /* bfx. */ 0, /* clz. */ + 0, /* rev. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, --- a/src/gcc/config/arm/thumb2.md +++ b/src/gcc/config/arm/thumb2.md @@ -329,7 +329,7 @@ movw%?\\t%0, %L1\\t%@ movhi str%(h%)\\t%1, %0\\t%@ movhi ldr%(h%)\\t%0, %1\\t%@ movhi" - [(set_attr "type" "mov_reg,mov_imm,mov_imm,mov_reg,store1,load1") + [(set_attr "type" "mov_reg,mov_imm,mov_imm,mov_imm,store1,load1") (set_attr "predicable" "yes") (set_attr "predicable_short_it" "yes,no,yes,no,no,no") (set_attr "length" "2,4,2,4,4,4") @@ -1370,6 +1370,103 @@ (set_attr "type" "alu_reg")] ) +; Constants for op 2 will never be given to these patterns. +(define_insn_and_split "*iordi_notdi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (ior:DI (not:DI (match_operand:DI 1 "s_register_operand" "0,r")) + (match_operand:DI 2 "s_register_operand" "r,0")))] + "TARGET_THUMB2" + "#" + "TARGET_THUMB2 && reload_completed" + [(set (match_dup 0) (ior:SI (not:SI (match_dup 1)) (match_dup 2))) + (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[5] = gen_highpart (SImode, operands[2]); + operands[2] = gen_lowpart (SImode, operands[2]); + }" + [(set_attr "length" "8") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*iordi_notzesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (ior:DI (not:DI (zero_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r"))) + (match_operand:DI 1 "s_register_operand" "0,?r")))] + "TARGET_THUMB2" + "#" + ; (not (zero_extend...)) means operand0 will always be 0xffffffff + "TARGET_THUMB2 && reload_completed" + [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1))) + (set (match_dup 3) (const_int -1))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + }" + [(set_attr "length" "4,8") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*iordi_notdi_zesidi" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "0,?r")) + (zero_extend:DI + (match_operand:SI 1 "s_register_operand" "r,r"))))] + "TARGET_THUMB2" + "#" + "TARGET_THUMB2 && reload_completed" + [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1))) + (set (match_dup 3) (not:SI (match_dup 4)))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[1] = gen_lowpart (SImode, operands[1]); + operands[4] = gen_highpart (SImode, operands[2]); + operands[2] = gen_lowpart (SImode, operands[2]); + }" + [(set_attr "length" "8") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "multiple")] +) + +(define_insn_and_split "*iordi_notsesidi_di" + [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") + (ior:DI (not:DI (sign_extend:DI + (match_operand:SI 2 "s_register_operand" "r,r"))) + (match_operand:DI 1 "s_register_operand" "0,r")))] + "TARGET_THUMB2" + "#" + "TARGET_THUMB2 && reload_completed" + [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1))) + (set (match_dup 3) (ior:SI (not:SI + (ashiftrt:SI (match_dup 2) (const_int 31))) + (match_dup 4)))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[4] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + }" + [(set_attr "length" "8") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "multiple")] +) + (define_insn "*orsi_notsi_si" [(set (match_operand:SI 0 "s_register_operand" "=r") (ior:SI (not:SI (match_operand:SI 2 "s_register_operand" "r")) --- a/src/gcc/config/arm/arm.c +++ b/src/gcc/config/arm/arm.c @@ -50,6 +50,7 @@ #include "except.h" #include "tm_p.h" #include "target.h" +#include "sched-int.h" #include "target-def.h" #include "debug.h" #include "langhooks.h" @@ -59,6 +60,7 @@ #include "params.h" #include "opts.h" #include "dumpfile.h" +#include "gimple-expr.h" /* Forward definitions of types. */ typedef struct minipool_node Mnode; @@ -94,6 +96,7 @@ static bool thumb_force_lr_save (void); static unsigned arm_size_return_regs (void); static bool arm_assemble_integer (rtx, unsigned int, int); +static void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update); static void arm_print_operand (FILE *, rtx, int); static void arm_print_operand_address (FILE *, rtx); static bool arm_print_operand_punct_valid_p (unsigned char code); @@ -585,6 +588,9 @@ #undef TARGET_MANGLE_TYPE #define TARGET_MANGLE_TYPE arm_mangle_type +#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV +#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv + #undef TARGET_BUILD_BUILTIN_VA_LIST #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list #undef TARGET_EXPAND_BUILTIN_VA_START @@ -986,6 +992,7 @@ COSTS_N_INSNS (1), /* bfi. */ COSTS_N_INSNS (1), /* bfx. */ 0, /* clz. */ + 0, /* rev. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -1069,7 +1076,109 @@ } }; +const struct cpu_cost_table cortexa8_extra_costs = +{ + /* ALU */ + { + 0, /* arith. */ + 0, /* logical. */ + COSTS_N_INSNS (1), /* shift. */ + 0, /* shift_reg. */ + COSTS_N_INSNS (1), /* arith_shift. */ + 0, /* arith_shift_reg. */ + COSTS_N_INSNS (1), /* log_shift. */ + 0, /* log_shift_reg. */ + 0, /* extend. */ + 0, /* extend_arith. */ + 0, /* bfi. */ + 0, /* bfx. */ + 0, /* clz. */ + 0, /* rev. */ + 0, /* non_exec. */ + true /* non_exec_costs_exec. */ + }, + { + /* MULT SImode */ + { + COSTS_N_INSNS (1), /* simple. */ + COSTS_N_INSNS (1), /* flag_setting. */ + COSTS_N_INSNS (1), /* extend. */ + COSTS_N_INSNS (1), /* add. */ + COSTS_N_INSNS (1), /* extend_add. */ + COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */ + }, + /* MULT DImode */ + { + 0, /* simple (N/A). */ + 0, /* flag_setting (N/A). */ + COSTS_N_INSNS (2), /* extend. */ + 0, /* add (N/A). */ + COSTS_N_INSNS (2), /* extend_add. */ + 0 /* idiv (N/A). */ + } + }, + /* LD/ST */ + { + COSTS_N_INSNS (1), /* load. */ + COSTS_N_INSNS (1), /* load_sign_extend. */ + COSTS_N_INSNS (1), /* ldrd. */ + COSTS_N_INSNS (1), /* ldm_1st. */ + 1, /* ldm_regs_per_insn_1st. */ + 2, /* ldm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (1), /* loadf. */ + COSTS_N_INSNS (1), /* loadd. */ + COSTS_N_INSNS (1), /* load_unaligned. */ + COSTS_N_INSNS (1), /* store. */ + COSTS_N_INSNS (1), /* strd. */ + COSTS_N_INSNS (1), /* stm_1st. */ + 1, /* stm_regs_per_insn_1st. */ + 2, /* stm_regs_per_insn_subsequent. */ + COSTS_N_INSNS (1), /* storef. */ + COSTS_N_INSNS (1), /* stored. */ + COSTS_N_INSNS (1) /* store_unaligned. */ + }, + { + /* FP SFmode */ + { + COSTS_N_INSNS (36), /* div. */ + COSTS_N_INSNS (11), /* mult. */ + COSTS_N_INSNS (20), /* mult_addsub. */ + COSTS_N_INSNS (30), /* fma. */ + COSTS_N_INSNS (9), /* addsub. */ + COSTS_N_INSNS (3), /* fpconst. */ + COSTS_N_INSNS (3), /* neg. */ + COSTS_N_INSNS (6), /* compare. */ + COSTS_N_INSNS (4), /* widen. */ + COSTS_N_INSNS (4), /* narrow. */ + COSTS_N_INSNS (8), /* toint. */ + COSTS_N_INSNS (8), /* fromint. */ + COSTS_N_INSNS (8) /* roundint. */ + }, + /* FP DFmode */ + { + COSTS_N_INSNS (64), /* div. */ + COSTS_N_INSNS (16), /* mult. */ + COSTS_N_INSNS (25), /* mult_addsub. */ + COSTS_N_INSNS (30), /* fma. */ + COSTS_N_INSNS (9), /* addsub. */ + COSTS_N_INSNS (3), /* fpconst. */ + COSTS_N_INSNS (3), /* neg. */ + COSTS_N_INSNS (6), /* compare. */ + COSTS_N_INSNS (6), /* widen. */ + COSTS_N_INSNS (6), /* narrow. */ + COSTS_N_INSNS (8), /* toint. */ + COSTS_N_INSNS (8), /* fromint. */ + COSTS_N_INSNS (8) /* roundint. */ + } + }, + /* Vector */ + { + COSTS_N_INSNS (1) /* alu. */ + } +}; + + const struct cpu_cost_table cortexa7_extra_costs = { /* ALU */ @@ -1087,6 +1196,7 @@ COSTS_N_INSNS (1), /* bfi. */ COSTS_N_INSNS (1), /* bfx. */ COSTS_N_INSNS (1), /* clz. */ + COSTS_N_INSNS (1), /* rev. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -1188,6 +1298,7 @@ 0, /* bfi. */ COSTS_N_INSNS (1), /* bfx. */ COSTS_N_INSNS (1), /* clz. */ + COSTS_N_INSNS (1), /* rev. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -1288,6 +1399,7 @@ COSTS_N_INSNS (1), /* bfi. */ 0, /* bfx. */ 0, /* clz. */ + 0, /* rev. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, @@ -1388,6 +1500,7 @@ 0, /* bfi. */ 0, /* bfx. */ 0, /* clz. */ + 0, /* rev. */ COSTS_N_INSNS (1), /* non_exec. */ false /* non_exec_costs_exec. */ }, @@ -1484,7 +1597,8 @@ false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false /* Prefer 32-bit encodings. */ }; const struct tune_params arm_fastmul_tune = @@ -1500,7 +1614,8 @@ false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false /* Prefer 32-bit encodings. */ }; /* StrongARM has early execution of branches, so a sequence that is worth @@ -1519,7 +1634,8 @@ false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false /* Prefer 32-bit encodings. */ }; const struct tune_params arm_xscale_tune = @@ -1535,7 +1651,8 @@ false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false /* Prefer 32-bit encodings. */ }; const struct tune_params arm_9e_tune = @@ -1551,7 +1668,8 @@ false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false /* Prefer 32-bit encodings. */ }; const struct tune_params arm_v6t2_tune = @@ -1567,7 +1685,8 @@ false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false /* Prefer 32-bit encodings. */ }; /* Generic Cortex tuning. Use more specific tunings if appropriate. */ @@ -1584,9 +1703,27 @@ false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false /* Prefer 32-bit encodings. */ }; +const struct tune_params arm_cortex_a8_tune = +{ + arm_9e_rtx_costs, + &cortexa8_extra_costs, + NULL, /* Sched adj cost. */ + 1, /* Constant limit. */ + 5, /* Max cond insns. */ + ARM_PREFETCH_NOT_BENEFICIAL, + false, /* Prefer constant pool. */ + arm_default_branch_cost, + false, /* Prefer LDRD/STRD. */ + {true, true}, /* Prefer non short circuit. */ + &arm_default_vec_cost, /* Vectorizer costs. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false /* Prefer 32-bit encodings. */ +}; + const struct tune_params arm_cortex_a7_tune = { arm_9e_rtx_costs, @@ -1600,7 +1737,8 @@ false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false /* Prefer 32-bit encodings. */ }; const struct tune_params arm_cortex_a15_tune = @@ -1616,7 +1754,8 @@ true, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + true, true /* Prefer 32-bit encodings. */ }; const struct tune_params arm_cortex_a53_tune = @@ -1632,7 +1771,8 @@ false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false /* Prefer 32-bit encodings. */ }; const struct tune_params arm_cortex_a57_tune = @@ -1648,7 +1788,8 @@ true, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + true, true /* Prefer 32-bit encodings. */ }; /* Branches can be dual-issued on Cortex-A5, so conditional execution is @@ -1667,7 +1808,8 @@ false, /* Prefer LDRD/STRD. */ {false, false}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false /* Prefer 32-bit encodings. */ }; const struct tune_params arm_cortex_a9_tune = @@ -1683,7 +1825,8 @@ false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false /* Prefer 32-bit encodings. */ }; const struct tune_params arm_cortex_a12_tune = @@ -1699,7 +1842,8 @@ true, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false /* Prefer 32-bit encodings. */ }; /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single @@ -1722,7 +1866,8 @@ false, /* Prefer LDRD/STRD. */ {false, false}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false /* Prefer 32-bit encodings. */ }; /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than @@ -1740,7 +1885,8 @@ false, /* Prefer LDRD/STRD. */ {false, false}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false /* Prefer 32-bit encodings. */ }; const struct tune_params arm_fa726te_tune = @@ -1756,7 +1902,8 @@ false, /* Prefer LDRD/STRD. */ {true, true}, /* Prefer non short circuit. */ &arm_default_vec_cost, /* Vectorizer costs. */ - false /* Prefer Neon for 64-bits bitops. */ + false, /* Prefer Neon for 64-bits bitops. */ + false, false /* Prefer 32-bit encodings. */ }; @@ -2807,7 +2954,7 @@ prefer_neon_for_64bits = true; /* Use the alternative scheduling-pressure algorithm by default. */ - maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2, + maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL, global_options.x_param_values, global_options_set.x_param_values); @@ -6080,11 +6227,6 @@ if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl)) return false; - /* Cannot tail-call to long calls, since these are out of range of - a branch instruction. */ - if (decl && arm_is_long_call_p (decl)) - return false; - /* If we are interworking and the function is not declared static then we can't tail-call it unless we know that it exists in this compilation unit (since it might be a Thumb routine). */ @@ -9338,6 +9480,47 @@ *cost = LIBCALL_COST (2); return false; + case BSWAP: + if (arm_arch6) + { + if (mode == SImode) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.rev; + + return false; + } + } + else + { + /* No rev instruction available. Look at arm_legacy_rev + and thumb_legacy_rev for the form of RTL used then. */ + if (TARGET_THUMB) + { + *cost = COSTS_N_INSNS (10); + + if (speed_p) + { + *cost += 6 * extra_cost->alu.shift; + *cost += 3 * extra_cost->alu.logical; + } + } + else + { + *cost = COSTS_N_INSNS (5); + + if (speed_p) + { + *cost += 2 * extra_cost->alu.shift; + *cost += extra_cost->alu.arith_shift; + *cost += 2 * extra_cost->alu.logical; + } + } + return true; + } + return false; + case MINUS: if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT && (mode == SFmode || !TARGET_VFP_SINGLE)) @@ -9720,8 +9903,17 @@ /* Vector mode? */ *cost = LIBCALL_COST (2); return false; + case IOR: + if (mode == SImode && arm_arch6 && aarch_rev16_p (x)) + { + *cost = COSTS_N_INSNS (1); + if (speed_p) + *cost += extra_cost->alu.rev; - case AND: case XOR: case IOR: + return true; + } + /* Fall through. */ + case AND: case XOR: if (mode == SImode) { enum rtx_code subcode = GET_CODE (XEXP (x, 0)); @@ -10620,6 +10812,36 @@ *cost = LIBCALL_COST (1); return false; + case FMA: + if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA) + { + rtx op0 = XEXP (x, 0); + rtx op1 = XEXP (x, 1); + rtx op2 = XEXP (x, 2); + + *cost = COSTS_N_INSNS (1); + + /* vfms or vfnma. */ + if (GET_CODE (op0) == NEG) + op0 = XEXP (op0, 0); + + /* vfnms or vfnma. */ + if (GET_CODE (op2) == NEG) + op2 = XEXP (op2, 0); + + *cost += rtx_cost (op0, FMA, 0, speed_p); + *cost += rtx_cost (op1, FMA, 1, speed_p); + *cost += rtx_cost (op2, FMA, 2, speed_p); + + if (speed_p) + *cost += extra_cost->fp[mode ==DFmode].fma; + + return true; + } + + *cost = LIBCALL_COST (3); + return false; + case FIX: case UNSIGNED_FIX: if (TARGET_HARD_FLOAT) @@ -10670,10 +10892,16 @@ return true; case ASM_OPERANDS: - /* Just a guess. Cost one insn per input. */ - *cost = COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x)); - return true; + { + /* Just a guess. Guess number of instructions in the asm + plus one insn per input. Always a minimum of COSTS_N_INSNS (1) + though (see PR60663). */ + int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x))); + int num_operands = ASM_OPERANDS_INPUT_LENGTH (x); + *cost = COSTS_N_INSNS (asm_length + num_operands); + return true; + } default: if (mode != VOIDmode) *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode)); @@ -16788,9 +17016,20 @@ compute_bb_for_insn (); df_analyze (); + enum Convert_Action {SKIP, CONV, SWAP_CONV}; + FOR_EACH_BB_FN (bb, cfun) { + if (current_tune->disparage_flag_setting_t16_encodings + && optimize_bb_for_speed_p (bb)) + continue; + rtx insn; + Convert_Action action = SKIP; + Convert_Action action_for_partial_flag_setting + = (current_tune->disparage_partial_flag_setting_t16_encodings + && optimize_bb_for_speed_p (bb)) + ? SKIP : CONV; COPY_REG_SET (&live, DF_LR_OUT (bb)); df_simulate_initialize_backwards (bb, &live); @@ -16800,7 +17039,7 @@ && !REGNO_REG_SET_P (&live, CC_REGNUM) && GET_CODE (PATTERN (insn)) == SET) { - enum {SKIP, CONV, SWAP_CONV} action = SKIP; + action = SKIP; rtx pat = PATTERN (insn); rtx dst = XEXP (pat, 0); rtx src = XEXP (pat, 1); @@ -16881,10 +17120,11 @@ /* ANDS , */ if (rtx_equal_p (dst, op0) && low_register_operand (op1, SImode)) - action = CONV; + action = action_for_partial_flag_setting; else if (rtx_equal_p (dst, op1) && low_register_operand (op0, SImode)) - action = SWAP_CONV; + action = action_for_partial_flag_setting == SKIP + ? SKIP : SWAP_CONV; break; case ASHIFTRT: @@ -16895,7 +17135,7 @@ /* LSLS , */ if (rtx_equal_p (dst, op0) && low_register_operand (op1, SImode)) - action = CONV; + action = action_for_partial_flag_setting; /* ASRS ,,# */ /* LSRS ,,# */ /* LSLS ,,# */ @@ -16902,7 +17142,7 @@ else if (low_register_operand (op0, SImode) && CONST_INT_P (op1) && IN_RANGE (INTVAL (op1), 0, 31)) - action = CONV; + action = action_for_partial_flag_setting; break; case ROTATERT: @@ -16909,12 +17149,16 @@ /* RORS , */ if (rtx_equal_p (dst, op0) && low_register_operand (op1, SImode)) - action = CONV; + action = action_for_partial_flag_setting; break; case NOT: + /* MVNS , */ + if (low_register_operand (op0, SImode)) + action = action_for_partial_flag_setting; + break; + case NEG: - /* MVNS , */ /* NEGS , (a.k.a RSBS) */ if (low_register_operand (op0, SImode)) action = CONV; @@ -16924,7 +17168,7 @@ /* MOVS ,# */ if (CONST_INT_P (src) && IN_RANGE (INTVAL (src), 0, 255)) - action = CONV; + action = action_for_partial_flag_setting; break; case REG: @@ -21040,7 +21284,15 @@ } -/* If CODE is 'd', then the X is a condition operand and the instruction +/* Globally reserved letters: acln + Puncutation letters currently used: @_|?().!# + Lower case letters currently used: bcdefhimpqtvwxyz + Upper case letters currently used: ABCDFGHJKLMNOPQRSTU + Letters previously used, but now deprecated/obsolete: sVWXYZ. + + Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P. + + If CODE is 'd', then the X is a condition operand and the instruction should only be executed if the condition is true. if CODE is 'D', then the X is a condition operand and the instruction should only be executed if the condition is false: however, if the mode @@ -21180,6 +21432,19 @@ } return; + case 'b': + /* Print the log2 of a CONST_INT. */ + { + HOST_WIDE_INT val; + + if (!CONST_INT_P (x) + || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0) + output_operand_lossage ("Unsupported operand for code '%c'", code); + else + fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val); + } + return; + case 'L': /* The low 16 bits of an immediate constant. */ fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff); @@ -21422,7 +21687,7 @@ register. */ case 'p': { - int mode = GET_MODE (x); + enum machine_mode mode = GET_MODE (x); int regno; if (GET_MODE_SIZE (mode) != 8 || !REG_P (x)) @@ -21446,7 +21711,7 @@ case 'P': case 'q': { - int mode = GET_MODE (x); + enum machine_mode mode = GET_MODE (x); int is_quad = (code == 'q'); int regno; @@ -21482,7 +21747,7 @@ case 'e': case 'f': { - int mode = GET_MODE (x); + enum machine_mode mode = GET_MODE (x); int regno; if ((GET_MODE_SIZE (mode) != 16 @@ -21615,7 +21880,7 @@ /* Translate an S register number into a D register number and element index. */ case 'y': { - int mode = GET_MODE (x); + enum machine_mode mode = GET_MODE (x); int regno; if (GET_MODE_SIZE (mode) != 4 || !REG_P (x)) @@ -21649,7 +21914,7 @@ number into a D register number and element index. */ case 'z': { - int mode = GET_MODE (x); + enum machine_mode mode = GET_MODE (x); int regno; if (GET_MODE_SIZE (mode) != 2 || !REG_P (x)) @@ -22610,13 +22875,20 @@ } /* We allow almost any value to be stored in the general registers. - Restrict doubleword quantities to even register pairs so that we can - use ldrd. Do not allow very large Neon structure opaque modes in - general registers; they would use too many. */ + Restrict doubleword quantities to even register pairs in ARM state + so that we can use ldrd. Do not allow very large Neon structure + opaque modes in general registers; they would use too many. */ if (regno <= LAST_ARM_REGNUM) - return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0) - && ARM_NUM_REGS (mode) <= 4; + { + if (ARM_NUM_REGS (mode) > 4) + return FALSE; + if (TARGET_THUMB2) + return TRUE; + + return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0); + } + if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM) /* We only allow integers in the fake hard registers. */ @@ -22827,6 +23099,7 @@ NEON_BINOP, NEON_TERNOP, NEON_UNOP, + NEON_BSWAP, NEON_GETLANE, NEON_SETLANE, NEON_CREATE, @@ -22848,7 +23121,6 @@ NEON_FLOAT_NARROW, NEON_FIXCONV, NEON_SELECT, - NEON_RESULTPAIR, NEON_REINTERP, NEON_VTBL, NEON_VTBX, @@ -23217,6 +23489,9 @@ ARM_BUILTIN_CRC32CH, ARM_BUILTIN_CRC32CW, + ARM_BUILTIN_GET_FPSCR, + ARM_BUILTIN_SET_FPSCR, + #undef CRYPTO1 #undef CRYPTO2 #undef CRYPTO3 @@ -23294,14 +23569,19 @@ tree V8QI_type_node; tree V4HI_type_node; + tree V4UHI_type_node; tree V4HF_type_node; tree V2SI_type_node; + tree V2USI_type_node; tree V2SF_type_node; tree V16QI_type_node; tree V8HI_type_node; + tree V8UHI_type_node; tree V4SI_type_node; + tree V4USI_type_node; tree V4SF_type_node; tree V2DI_type_node; + tree V2UDI_type_node; tree intUQI_type_node; tree intUHI_type_node; @@ -23313,27 +23593,6 @@ tree intCI_type_node; tree intXI_type_node; - tree V8QI_pointer_node; - tree V4HI_pointer_node; - tree V2SI_pointer_node; - tree V2SF_pointer_node; - tree V16QI_pointer_node; - tree V8HI_pointer_node; - tree V4SI_pointer_node; - tree V4SF_pointer_node; - tree V2DI_pointer_node; - - tree void_ftype_pv8qi_v8qi_v8qi; - tree void_ftype_pv4hi_v4hi_v4hi; - tree void_ftype_pv2si_v2si_v2si; - tree void_ftype_pv2sf_v2sf_v2sf; - tree void_ftype_pdi_di_di; - tree void_ftype_pv16qi_v16qi_v16qi; - tree void_ftype_pv8hi_v8hi_v8hi; - tree void_ftype_pv4si_v4si_v4si; - tree void_ftype_pv4sf_v4sf_v4sf; - tree void_ftype_pv2di_v2di_v2di; - tree reinterp_ftype_dreg[NUM_DREG_TYPES][NUM_DREG_TYPES]; tree reinterp_ftype_qreg[NUM_QREG_TYPES][NUM_QREG_TYPES]; tree dreg_types[NUM_DREG_TYPES], qreg_types[NUM_QREG_TYPES]; @@ -23397,6 +23656,12 @@ const_intDI_pointer_node = build_pointer_type (const_intDI_node); const_float_pointer_node = build_pointer_type (const_float_node); + /* Unsigned integer types for various mode sizes. */ + intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode)); + intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode)); + intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode)); + intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode)); + neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode)); /* Now create vector types based on our NEON element types. */ /* 64-bit vectors. */ V8QI_type_node = @@ -23403,10 +23668,14 @@ build_vector_type_for_mode (neon_intQI_type_node, V8QImode); V4HI_type_node = build_vector_type_for_mode (neon_intHI_type_node, V4HImode); + V4UHI_type_node = + build_vector_type_for_mode (intUHI_type_node, V4HImode); V4HF_type_node = build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode); V2SI_type_node = build_vector_type_for_mode (neon_intSI_type_node, V2SImode); + V2USI_type_node = + build_vector_type_for_mode (intUSI_type_node, V2SImode); V2SF_type_node = build_vector_type_for_mode (neon_float_type_node, V2SFmode); /* 128-bit vectors. */ @@ -23414,21 +23683,20 @@ build_vector_type_for_mode (neon_intQI_type_node, V16QImode); V8HI_type_node = build_vector_type_for_mode (neon_intHI_type_node, V8HImode); + V8UHI_type_node = + build_vector_type_for_mode (intUHI_type_node, V8HImode); V4SI_type_node = build_vector_type_for_mode (neon_intSI_type_node, V4SImode); + V4USI_type_node = + build_vector_type_for_mode (intUSI_type_node, V4SImode); V4SF_type_node = build_vector_type_for_mode (neon_float_type_node, V4SFmode); V2DI_type_node = build_vector_type_for_mode (neon_intDI_type_node, V2DImode); + V2UDI_type_node = + build_vector_type_for_mode (intUDI_type_node, V2DImode); - /* Unsigned integer types for various mode sizes. */ - intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode)); - intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode)); - intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode)); - intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode)); - neon_intUTI_type_node = make_unsigned_type (GET_MODE_PRECISION (TImode)); - (*lang_hooks.types.register_builtin_type) (intUQI_type_node, "__builtin_neon_uqi"); (*lang_hooks.types.register_builtin_type) (intUHI_type_node, @@ -23459,53 +23727,8 @@ (*lang_hooks.types.register_builtin_type) (intXI_type_node, "__builtin_neon_xi"); - /* Pointers to vector types. */ - V8QI_pointer_node = build_pointer_type (V8QI_type_node); - V4HI_pointer_node = build_pointer_type (V4HI_type_node); - V2SI_pointer_node = build_pointer_type (V2SI_type_node); - V2SF_pointer_node = build_pointer_type (V2SF_type_node); - V16QI_pointer_node = build_pointer_type (V16QI_type_node); - V8HI_pointer_node = build_pointer_type (V8HI_type_node); - V4SI_pointer_node = build_pointer_type (V4SI_type_node); - V4SF_pointer_node = build_pointer_type (V4SF_type_node); - V2DI_pointer_node = build_pointer_type (V2DI_type_node); - - /* Operations which return results as pairs. */ - void_ftype_pv8qi_v8qi_v8qi = - build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node, - V8QI_type_node, NULL); - void_ftype_pv4hi_v4hi_v4hi = - build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node, - V4HI_type_node, NULL); - void_ftype_pv2si_v2si_v2si = - build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node, - V2SI_type_node, NULL); - void_ftype_pv2sf_v2sf_v2sf = - build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node, - V2SF_type_node, NULL); - void_ftype_pdi_di_di = - build_function_type_list (void_type_node, intDI_pointer_node, - neon_intDI_type_node, neon_intDI_type_node, NULL); - void_ftype_pv16qi_v16qi_v16qi = - build_function_type_list (void_type_node, V16QI_pointer_node, - V16QI_type_node, V16QI_type_node, NULL); - void_ftype_pv8hi_v8hi_v8hi = - build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node, - V8HI_type_node, NULL); - void_ftype_pv4si_v4si_v4si = - build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node, - V4SI_type_node, NULL); - void_ftype_pv4sf_v4sf_v4sf = - build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node, - V4SF_type_node, NULL); - void_ftype_pv2di_v2di_v2di = - build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node, - V2DI_type_node, NULL); - if (TARGET_CRYPTO && TARGET_HARD_FLOAT) { - tree V4USI_type_node = - build_vector_type_for_mode (intUSI_type_node, V4SImode); tree V16UQI_type_node = build_vector_type_for_mode (intUQI_type_node, V16QImode); @@ -23791,25 +24014,6 @@ } break; - case NEON_RESULTPAIR: - { - switch (insn_data[d->code].operand[1].mode) - { - case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break; - case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break; - case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break; - case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break; - case DImode: ftype = void_ftype_pdi_di_di; break; - case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break; - case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break; - case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break; - case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break; - case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break; - default: gcc_unreachable (); - } - } - break; - case NEON_REINTERP: { /* We iterate over NUM_DREG_TYPES doubleword types, @@ -23869,6 +24073,31 @@ ftype = build_function_type_list (return_type, eltype, NULL); break; } + case NEON_BSWAP: + { + tree eltype = NULL_TREE; + switch (insn_data[d->code].operand[1].mode) + { + case V4HImode: + eltype = V4UHI_type_node; + break; + case V8HImode: + eltype = V8UHI_type_node; + break; + case V2SImode: + eltype = V2USI_type_node; + break; + case V4SImode: + eltype = V4USI_type_node; + break; + case V2DImode: + eltype = V2UDI_type_node; + break; + default: gcc_unreachable (); + } + ftype = build_function_type_list (eltype, eltype, NULL); + break; + } default: gcc_unreachable (); } @@ -24015,6 +24244,15 @@ IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ) IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ) + +#define FP_BUILTIN(L, U) \ + {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \ + UNKNOWN, 0}, + + FP_BUILTIN (set_fpscr, GET_FPSCR) + FP_BUILTIN (get_fpscr, SET_FPSCR) +#undef FP_BUILTIN + #define CRC32_BUILTIN(L, U) \ {0, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \ UNKNOWN, 0}, @@ -24529,6 +24767,21 @@ if (TARGET_CRC32) arm_init_crc32_builtins (); + + if (TARGET_VFP) + { + tree ftype_set_fpscr + = build_function_type_list (void_type_node, unsigned_type_node, NULL); + tree ftype_get_fpscr + = build_function_type_list (unsigned_type_node, NULL); + + arm_builtin_decls[ARM_BUILTIN_GET_FPSCR] + = add_builtin_function ("__builtin_arm_ldfscr", ftype_get_fpscr, + ARM_BUILTIN_GET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE); + arm_builtin_decls[ARM_BUILTIN_SET_FPSCR] + = add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr, + ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE); + } } /* Return the ARM builtin for CODE. */ @@ -25043,6 +25296,7 @@ case NEON_SPLIT: case NEON_FLOAT_WIDEN: case NEON_FLOAT_NARROW: + case NEON_BSWAP: case NEON_REINTERP: return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); @@ -25052,11 +25306,6 @@ return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); - case NEON_RESULTPAIR: - return arm_expand_neon_args (target, icode, 0, type_mode, exp, fcode, - NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, - NEON_ARG_STOP); - case NEON_LANEMUL: case NEON_LANEMULL: case NEON_LANEMULH: @@ -25118,24 +25367,6 @@ emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src)); } -/* Emit code to place a Neon pair result in memory locations (with equal - registers). */ -void -neon_emit_pair_result_insn (enum machine_mode mode, - rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr, - rtx op1, rtx op2) -{ - rtx mem = gen_rtx_MEM (mode, destaddr); - rtx tmp1 = gen_reg_rtx (mode); - rtx tmp2 = gen_reg_rtx (mode); - - emit_insn (intfn (tmp1, op1, op2, tmp2)); - - emit_move_insn (mem, tmp1); - mem = adjust_address (mem, mode, GET_MODE_SIZE (mode)); - emit_move_insn (mem, tmp2); -} - /* Set up OPERANDS for a register copy from SRC to DEST, taking care not to early-clobber SRC registers in the process. @@ -25256,6 +25487,25 @@ switch (fcode) { + case ARM_BUILTIN_GET_FPSCR: + case ARM_BUILTIN_SET_FPSCR: + if (fcode == ARM_BUILTIN_GET_FPSCR) + { + icode = CODE_FOR_get_fpscr; + target = gen_reg_rtx (SImode); + pat = GEN_FCN (icode) (target); + } + else + { + target = NULL_RTX; + icode = CODE_FOR_set_fpscr; + arg0 = CALL_EXPR_ARG (exp, 0); + op0 = expand_normal (arg0); + pat = GEN_FCN (icode) (op0); + } + emit_insn (pat); + return target; + case ARM_BUILTIN_TEXTRMSB: case ARM_BUILTIN_TEXTRMUB: case ARM_BUILTIN_TEXTRMSH: @@ -25889,7 +26139,7 @@ int pops_needed; unsigned available; unsigned required; - int mode; + enum machine_mode mode; int size; int restore_a4 = FALSE; @@ -29550,8 +29800,7 @@ int in_n, out_n; if (TREE_CODE (type_out) != VECTOR_TYPE - || TREE_CODE (type_in) != VECTOR_TYPE - || !(TARGET_NEON && TARGET_FPU_ARMV8 && flag_unsafe_math_optimizations)) + || TREE_CODE (type_in) != VECTOR_TYPE) return NULL_TREE; out_mode = TYPE_MODE (TREE_TYPE (type_out)); @@ -29563,7 +29812,13 @@ decl of the vectorized builtin for the appropriate vector mode. NULL_TREE is returned if no such builtin is available. */ #undef ARM_CHECK_BUILTIN_MODE -#define ARM_CHECK_BUILTIN_MODE(C) \ +#define ARM_CHECK_BUILTIN_MODE(C) \ + (TARGET_NEON && TARGET_FPU_ARMV8 \ + && flag_unsafe_math_optimizations \ + && ARM_CHECK_BUILTIN_MODE_1 (C)) + +#undef ARM_CHECK_BUILTIN_MODE_1 +#define ARM_CHECK_BUILTIN_MODE_1(C) \ (out_mode == SFmode && out_n == C \ && in_mode == SFmode && in_n == C) @@ -29588,6 +29843,30 @@ return ARM_FIND_VRINT_VARIANT (vrintz); case BUILT_IN_ROUNDF: return ARM_FIND_VRINT_VARIANT (vrinta); +#undef ARM_CHECK_BUILTIN_MODE +#define ARM_CHECK_BUILTIN_MODE(C, N) \ + (out_mode == N##Imode && out_n == C \ + && in_mode == N##Imode && in_n == C) + case BUILT_IN_BSWAP16: + if (ARM_CHECK_BUILTIN_MODE (4, H)) + return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false); + else if (ARM_CHECK_BUILTIN_MODE (8, H)) + return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false); + else + return NULL_TREE; + case BUILT_IN_BSWAP32: + if (ARM_CHECK_BUILTIN_MODE (2, S)) + return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false); + else if (ARM_CHECK_BUILTIN_MODE (4, S)) + return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false); + else + return NULL_TREE; + case BUILT_IN_BSWAP64: + if (ARM_CHECK_BUILTIN_MODE (2, D)) + return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false); + else + return NULL_TREE; + default: return NULL_TREE; } @@ -31167,4 +31446,73 @@ return false; } +static void +arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) +{ + const unsigned ARM_FE_INVALID = 1; + const unsigned ARM_FE_DIVBYZERO = 2; + const unsigned ARM_FE_OVERFLOW = 4; + const unsigned ARM_FE_UNDERFLOW = 8; + const unsigned ARM_FE_INEXACT = 16; + const unsigned HOST_WIDE_INT ARM_FE_ALL_EXCEPT = (ARM_FE_INVALID + | ARM_FE_DIVBYZERO + | ARM_FE_OVERFLOW + | ARM_FE_UNDERFLOW + | ARM_FE_INEXACT); + const unsigned HOST_WIDE_INT ARM_FE_EXCEPT_SHIFT = 8; + tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv; + tree new_fenv_var, reload_fenv, restore_fnenv; + tree update_call, atomic_feraiseexcept, hold_fnclex; + + if (!TARGET_VFP) + return; + + /* Generate the equivalent of : + unsigned int fenv_var; + fenv_var = __builtin_arm_get_fpscr (); + + unsigned int masked_fenv; + masked_fenv = fenv_var & mask; + + __builtin_arm_set_fpscr (masked_fenv); */ + + fenv_var = create_tmp_var (unsigned_type_node, NULL); + get_fpscr = arm_builtin_decls[ARM_BUILTIN_GET_FPSCR]; + set_fpscr = arm_builtin_decls[ARM_BUILTIN_SET_FPSCR]; + mask = build_int_cst (unsigned_type_node, + ~((ARM_FE_ALL_EXCEPT << ARM_FE_EXCEPT_SHIFT) + | ARM_FE_ALL_EXCEPT)); + ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node, + fenv_var, build_call_expr (get_fpscr, 0)); + masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask); + hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv); + *hold = build2 (COMPOUND_EXPR, void_type_node, + build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv), + hold_fnclex); + + /* Store the value of masked_fenv to clear the exceptions: + __builtin_arm_set_fpscr (masked_fenv); */ + + *clear = build_call_expr (set_fpscr, 1, masked_fenv); + + /* Generate the equivalent of : + unsigned int new_fenv_var; + new_fenv_var = __builtin_arm_get_fpscr (); + + __builtin_arm_set_fpscr (fenv_var); + + __atomic_feraiseexcept (new_fenv_var); */ + + new_fenv_var = create_tmp_var (unsigned_type_node, NULL); + reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var, + build_call_expr (get_fpscr, 0)); + restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var); + atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); + update_call = build_call_expr (atomic_feraiseexcept, 1, + fold_convert (integer_type_node, new_fenv_var)); + *update = build2 (COMPOUND_EXPR, void_type_node, + build2 (COMPOUND_EXPR, void_type_node, + reload_fenv, restore_fnenv), update_call); +} + #include "gt-arm.h" --- a/src/gcc/config/arm/unspecs.md +++ b/src/gcc/config/arm/unspecs.md @@ -143,6 +143,8 @@ VUNSPEC_SLX ; Represent a store-register-release-exclusive. VUNSPEC_LDA ; Represent a store-register-acquire. VUNSPEC_STL ; Represent a store-register-release. + VUNSPEC_GET_FPSCR ; Represent fetch of FPSCR content. + VUNSPEC_SET_FPSCR ; Represent assign of FPSCR content. ]) ;; Enumerators for NEON unspecs. --- a/src/gcc/config/arm/arm-modes.def +++ b/src/gcc/config/arm/arm-modes.def @@ -21,9 +21,6 @@ along with GCC; see the file COPYING3. If not see . */ -/* Extended precision floating point. - FIXME What format is this? */ -FLOAT_MODE (XF, 12, 0); /* Half-precision floating point */ FLOAT_MODE (HF, 2, 0); --- a/src/gcc/config/arm/arm-cores.def +++ b/src/gcc/config/arm/arm-cores.def @@ -141,7 +141,7 @@ ARM_CORE("generic-armv7-a", genericv7a, genericv7a, 7A, FL_LDSCHED, cortex) ARM_CORE("cortex-a5", cortexa5, cortexa5, 7A, FL_LDSCHED, cortex_a5) ARM_CORE("cortex-a7", cortexa7, cortexa7, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a7) -ARM_CORE("cortex-a8", cortexa8, cortexa8, 7A, FL_LDSCHED, cortex) +ARM_CORE("cortex-a8", cortexa8, cortexa8, 7A, FL_LDSCHED, cortex_a8) ARM_CORE("cortex-a9", cortexa9, cortexa9, 7A, FL_LDSCHED, cortex_a9) ARM_CORE("cortex-a12", cortexa12, cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a12) ARM_CORE("cortex-a15", cortexa15, cortexa15, 7A, FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV, cortex_a15) --- a/src/gcc/config/arm/arm-protos.h +++ b/src/gcc/config/arm/arm-protos.h @@ -272,6 +272,11 @@ const struct cpu_vec_costs* vec_costs; /* Prefer Neon for 64-bit bitops. */ bool prefer_neon_for_64bits; + /* Prefer 32-bit encoding instead of flag-setting 16-bit encoding. */ + bool disparage_flag_setting_t16_encodings; + /* Prefer 32-bit encoding instead of 16-bit encoding where subset of flags + would be set. */ + bool disparage_partial_flag_setting_t16_encodings; }; extern const struct tune_params *current_tune; --- a/src/gcc/config/arm/vfp.md +++ b/src/gcc/config/arm/vfp.md @@ -100,7 +100,7 @@ " [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no,no,no") - (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_reg,load1,load1,store1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores") + (set_attr "type" "mov_reg,mov_reg,mov_reg,mvn_reg,mov_imm,load1,load1,store1,store1,f_mcr,f_mrc,fmov,f_loads,f_stores") (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4") (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*") (set_attr "neg_pool_range" "*,*,*,*,*, 0, 0,*,*,*,*,*,1008,*")] @@ -1322,6 +1322,22 @@ (set_attr "conds" "unconditional")] ) +;; Write Floating-point Status and Control Register. +(define_insn "set_fpscr" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")] VUNSPEC_SET_FPSCR)] + "TARGET_VFP" + "mcr\\tp10, 7, %0, cr1, cr0, 0\\t @SET_FPSCR" + [(set_attr "type" "mrs")]) + +;; Read Floating-point Status and Control Register. +(define_insn "get_fpscr" + [(set (match_operand:SI 0 "register_operand" "=r") + (unspec_volatile:SI [(const_int 0)] VUNSPEC_GET_FPSCR))] + "TARGET_VFP" + "mrc\\tp10, 7, %0, cr1, cr0, 0\\t @GET_FPSCR" + [(set_attr "type" "mrs")]) + + ;; Unimplemented insns: ;; fldm* ;; fstm* --- a/src/gcc/config/arm/neon.md +++ b/src/gcc/config/arm/neon.md @@ -1842,9 +1842,9 @@ ; good for plain vadd, vaddq. (define_expand "neon_vadd" - [(match_operand:VDQX 0 "s_register_operand" "=w") - (match_operand:VDQX 1 "s_register_operand" "w") - (match_operand:VDQX 2 "s_register_operand" "w") + [(match_operand:VCVTF 0 "s_register_operand" "=w") + (match_operand:VCVTF 1 "s_register_operand" "w") + (match_operand:VCVTF 2 "s_register_operand" "w") (match_operand:SI 3 "immediate_operand" "i")] "TARGET_NEON" { @@ -1869,9 +1869,9 @@ ; Used for intrinsics when flag_unsafe_math_optimizations is false. (define_insn "neon_vadd_unspec" - [(set (match_operand:VDQX 0 "s_register_operand" "=w") - (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") - (match_operand:VDQX 2 "s_register_operand" "w")] + [(set (match_operand:VCVTF 0 "s_register_operand" "=w") + (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") + (match_operand:VCVTF 2 "s_register_operand" "w")] UNSPEC_VADD))] "TARGET_NEON" "vadd.\t%0, %1, %2" @@ -2132,9 +2132,9 @@ ) (define_expand "neon_vsub" - [(match_operand:VDQX 0 "s_register_operand" "=w") - (match_operand:VDQX 1 "s_register_operand" "w") - (match_operand:VDQX 2 "s_register_operand" "w") + [(match_operand:VCVTF 0 "s_register_operand" "=w") + (match_operand:VCVTF 1 "s_register_operand" "w") + (match_operand:VCVTF 2 "s_register_operand" "w") (match_operand:SI 3 "immediate_operand" "i")] "TARGET_NEON" { @@ -2149,9 +2149,9 @@ ; Used for intrinsics when flag_unsafe_math_optimizations is false. (define_insn "neon_vsub_unspec" - [(set (match_operand:VDQX 0 "s_register_operand" "=w") - (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") - (match_operand:VDQX 2 "s_register_operand" "w")] + [(set (match_operand:VCVTF 0 "s_register_operand" "=w") + (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w") + (match_operand:VCVTF 2 "s_register_operand" "w")] UNSPEC_VSUB))] "TARGET_NEON" "vsub.\t%0, %1, %2" @@ -2547,6 +2547,14 @@ [(set_attr "type" "neon_qabs")] ) +(define_insn "neon_bswap" + [(set (match_operand:VDQHSD 0 "register_operand" "=w") + (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] + "TARGET_NEON" + "vrev.8\\t%0, %1" + [(set_attr "type" "neon_rev")] +) + (define_expand "neon_vneg" [(match_operand:VDQW 0 "s_register_operand" "") (match_operand:VDQW 1 "s_register_operand" "") @@ -4140,17 +4148,6 @@ [(set_attr "type" "neon_permute")] ) -(define_expand "neon_vtrn" - [(match_operand:SI 0 "s_register_operand" "r") - (match_operand:VDQW 1 "s_register_operand" "w") - (match_operand:VDQW 2 "s_register_operand" "w")] - "TARGET_NEON" -{ - neon_emit_pair_result_insn (mode, gen_neon_vtrn_internal, - operands[0], operands[1], operands[2]); - DONE; -}) - (define_expand "neon_vzip_internal" [(parallel [(set (match_operand:VDQW 0 "s_register_operand" "") @@ -4177,17 +4174,6 @@ [(set_attr "type" "neon_zip")] ) -(define_expand "neon_vzip" - [(match_operand:SI 0 "s_register_operand" "r") - (match_operand:VDQW 1 "s_register_operand" "w") - (match_operand:VDQW 2 "s_register_operand" "w")] - "TARGET_NEON" -{ - neon_emit_pair_result_insn (mode, gen_neon_vzip_internal, - operands[0], operands[1], operands[2]); - DONE; -}) - (define_expand "neon_vuzp_internal" [(parallel [(set (match_operand:VDQW 0 "s_register_operand" "") @@ -4214,17 +4200,6 @@ [(set_attr "type" "neon_zip")] ) -(define_expand "neon_vuzp" - [(match_operand:SI 0 "s_register_operand" "r") - (match_operand:VDQW 1 "s_register_operand" "w") - (match_operand:VDQW 2 "s_register_operand" "w")] - "TARGET_NEON" -{ - neon_emit_pair_result_insn (mode, gen_neon_vuzp_internal, - operands[0], operands[1], operands[2]); - DONE; -}) - (define_expand "neon_vreinterpretv8qi" [(match_operand:V8QI 0 "s_register_operand" "") (match_operand:VDX 1 "s_register_operand" "")] @@ -5357,61 +5332,6 @@ [(set_attr "type" "neon_store4_4reg")] ) -(define_expand "neon_vand" - [(match_operand:VDQX 0 "s_register_operand" "") - (match_operand:VDQX 1 "s_register_operand" "") - (match_operand:VDQX 2 "neon_inv_logic_op2" "") - (match_operand:SI 3 "immediate_operand" "")] - "TARGET_NEON" -{ - emit_insn (gen_and3 (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "neon_vorr" - [(match_operand:VDQX 0 "s_register_operand" "") - (match_operand:VDQX 1 "s_register_operand" "") - (match_operand:VDQX 2 "neon_logic_op2" "") - (match_operand:SI 3 "immediate_operand" "")] - "TARGET_NEON" -{ - emit_insn (gen_ior3 (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "neon_veor" - [(match_operand:VDQX 0 "s_register_operand" "") - (match_operand:VDQX 1 "s_register_operand" "") - (match_operand:VDQX 2 "s_register_operand" "") - (match_operand:SI 3 "immediate_operand" "")] - "TARGET_NEON" -{ - emit_insn (gen_xor3 (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "neon_vbic" - [(match_operand:VDQX 0 "s_register_operand" "") - (match_operand:VDQX 1 "s_register_operand" "") - (match_operand:VDQX 2 "neon_logic_op2" "") - (match_operand:SI 3 "immediate_operand" "")] - "TARGET_NEON" -{ - emit_insn (gen_bic3_neon (operands[0], operands[1], operands[2])); - DONE; -}) - -(define_expand "neon_vorn" - [(match_operand:VDQX 0 "s_register_operand" "") - (match_operand:VDQX 1 "s_register_operand" "") - (match_operand:VDQX 2 "neon_inv_logic_op2" "") - (match_operand:SI 3 "immediate_operand" "")] - "TARGET_NEON" -{ - emit_insn (gen_orn3_neon (operands[0], operands[1], operands[2])); - DONE; -}) - (define_insn "neon_vec_unpack_lo_" [(set (match_operand: 0 "register_operand" "=w") (SE: (vec_select: --- a/src/gcc/config/arm/arm_neon_builtins.def +++ b/src/gcc/config/arm/arm_neon_builtins.def @@ -18,8 +18,7 @@ along with GCC; see the file COPYING3. If not see . */ -VAR10 (BINOP, vadd, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR2 (BINOP, vadd, v2sf, v4sf), VAR3 (BINOP, vaddl, v8qi, v4hi, v2si), VAR3 (BINOP, vaddw, v8qi, v4hi, v2si), VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si), @@ -54,7 +53,7 @@ VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si), VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), -VAR10 (BINOP, vsub, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), +VAR2 (BINOP, vsub, v2sf, v4sf), VAR3 (BINOP, vsubl, v8qi, v4hi, v2si), VAR3 (BINOP, vsubw, v8qi, v4hi, v2si), VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di), @@ -89,6 +88,7 @@ VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si), VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si), VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si), +VAR5 (BSWAP, bswap, v4hi, v8hi, v2si, v4si, v2di), VAR2 (UNOP, vcnt, v8qi, v16qi), VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf), VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf), @@ -149,9 +149,6 @@ VAR1 (VTBX, vtbx2, v8qi), VAR1 (VTBX, vtbx3, v8qi), VAR1 (VTBX, vtbx4, v8qi), -VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), -VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), -VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di), VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di), VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di), @@ -199,14 +196,4 @@ VAR9 (STORESTRUCT, vst4, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf), VAR7 (STORESTRUCTLANE, vst4_lane, - v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf), -VAR10 (LOGICBINOP, vand, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), -VAR10 (LOGICBINOP, vorr, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), -VAR10 (BINOP, veor, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), -VAR10 (LOGICBINOP, vbic, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), -VAR10 (LOGICBINOP, vorn, - v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) + v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) --- a/src/gcc/config/arm/aarch-common-protos.h +++ b/src/gcc/config/arm/aarch-common-protos.h @@ -24,6 +24,9 @@ #define GCC_AARCH_COMMON_PROTOS_H extern int aarch_crypto_can_dual_issue (rtx, rtx); +extern bool aarch_rev16_p (rtx); +extern bool aarch_rev16_shleft_mask_imm_p (rtx, enum machine_mode); +extern bool aarch_rev16_shright_mask_imm_p (rtx, enum machine_mode); extern int arm_early_load_addr_dep (rtx, rtx); extern int arm_early_store_addr_dep (rtx, rtx); extern int arm_mac_accumulator_is_mul_result (rtx, rtx); @@ -54,6 +57,7 @@ const int bfi; /* Bit-field insert. */ const int bfx; /* Bit-field extraction. */ const int clz; /* Count Leading Zeros. */ + const int rev; /* Reverse bits/bytes. */ const int non_exec; /* Extra cost when not executing insn. */ const bool non_exec_costs_exec; /* True if non-execution must add the exec cost. */ --- a/src/gcc/config/arm/predicates.md +++ b/src/gcc/config/arm/predicates.md @@ -291,6 +291,15 @@ || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))) (match_test "mode == GET_MODE (op)"))) +(define_special_predicate "shift_nomul_operator" + (and (ior (and (match_code "rotate") + (match_test "CONST_INT_P (XEXP (op, 1)) + && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32")) + (and (match_code "ashift,ashiftrt,lshiftrt,rotatert") + (match_test "!CONST_INT_P (XEXP (op, 1)) + || ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) < 32"))) + (match_test "mode == GET_MODE (op)"))) + ;; True for shift operators which can be used with saturation instructions. (define_special_predicate "sat_shift_operator" (and (ior (and (match_code "mult") @@ -681,5 +690,6 @@ (match_code "reg" "0"))) (define_predicate "call_insn_operand" - (ior (match_code "symbol_ref") + (ior (and (match_code "symbol_ref") + (match_test "!arm_is_long_call_p (SYMBOL_REF_DECL (op))")) (match_operand 0 "s_register_operand"))) --- a/src/gcc/config/arm/arm_neon.h +++ b/src/gcc/config/arm/arm_neon.h @@ -452,114 +452,121 @@ } poly64x2x4_t; #endif - - +/* vadd */ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vadd_s8 (int8x8_t __a, int8x8_t __b) { - return (int8x8_t)__builtin_neon_vaddv8qi (__a, __b, 1); + return __a + __b; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vadd_s16 (int16x4_t __a, int16x4_t __b) { - return (int16x4_t)__builtin_neon_vaddv4hi (__a, __b, 1); + return __a + __b; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vadd_s32 (int32x2_t __a, int32x2_t __b) { - return (int32x2_t)__builtin_neon_vaddv2si (__a, __b, 1); + return __a + __b; } __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vadd_f32 (float32x2_t __a, float32x2_t __b) { - return (float32x2_t)__builtin_neon_vaddv2sf (__a, __b, 3); +#ifdef __FAST_MATH__ + return __a + __b; +#else + return (float32x2_t) __builtin_neon_vaddv2sf (__a, __b, 3); +#endif } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vadd_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t)__builtin_neon_vaddv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); + return __a + __b; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vadd_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t)__builtin_neon_vaddv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); + return __a + __b; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vadd_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t)__builtin_neon_vaddv2si ((int32x2_t) __a, (int32x2_t) __b, 0); + return __a + __b; } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vadd_s64 (int64x1_t __a, int64x1_t __b) { - return (int64x1_t)__builtin_neon_vadddi (__a, __b, 1); + return __a + __b; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vadd_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t)__builtin_neon_vadddi ((int64x1_t) __a, (int64x1_t) __b, 0); + return __a + __b; } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vaddq_s8 (int8x16_t __a, int8x16_t __b) { - return (int8x16_t)__builtin_neon_vaddv16qi (__a, __b, 1); + return __a + __b; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vaddq_s16 (int16x8_t __a, int16x8_t __b) { - return (int16x8_t)__builtin_neon_vaddv8hi (__a, __b, 1); + return __a + __b; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vaddq_s32 (int32x4_t __a, int32x4_t __b) { - return (int32x4_t)__builtin_neon_vaddv4si (__a, __b, 1); + return __a + __b; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vaddq_s64 (int64x2_t __a, int64x2_t __b) { - return (int64x2_t)__builtin_neon_vaddv2di (__a, __b, 1); + return __a + __b; } __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vaddq_f32 (float32x4_t __a, float32x4_t __b) { - return (float32x4_t)__builtin_neon_vaddv4sf (__a, __b, 3); +#ifdef __FAST_MATH + return __a + __b; +#else + return (float32x4_t) __builtin_neon_vaddv4sf (__a, __b, 3); +#endif } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vaddq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t)__builtin_neon_vaddv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); + return __a + __b; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vaddq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t)__builtin_neon_vaddv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); + return __a + __b; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vaddq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t)__builtin_neon_vaddv4si ((int32x4_t) __a, (int32x4_t) __b, 0); + return __a + __b; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vaddq_u64 (uint64x2_t __a, uint64x2_t __b) { - return (uint64x2_t)__builtin_neon_vaddv2di ((int64x2_t) __a, (int64x2_t) __b, 0); + return __a + __b; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) @@ -949,93 +956,102 @@ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vmul_s8 (int8x8_t __a, int8x8_t __b) { - return (int8x8_t)__builtin_neon_vmulv8qi (__a, __b, 1); + return __a * __b; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vmul_s16 (int16x4_t __a, int16x4_t __b) { - return (int16x4_t)__builtin_neon_vmulv4hi (__a, __b, 1); + return __a * __b; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vmul_s32 (int32x2_t __a, int32x2_t __b) { - return (int32x2_t)__builtin_neon_vmulv2si (__a, __b, 1); + return __a * __b; } __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmul_f32 (float32x2_t __a, float32x2_t __b) { - return (float32x2_t)__builtin_neon_vmulv2sf (__a, __b, 3); +#ifdef __FAST_MATH + return __a * __b; +#else + return (float32x2_t) __builtin_neon_vmulv2sf (__a, __b, 3); +#endif + } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vmul_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); + return __a * __b; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vmul_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t)__builtin_neon_vmulv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); + return __a * __b; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vmul_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t)__builtin_neon_vmulv2si ((int32x2_t) __a, (int32x2_t) __b, 0); + return __a * __b; } -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vmul_p8 (poly8x8_t __a, poly8x8_t __b) -{ - return (poly8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 2); -} - __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vmulq_s8 (int8x16_t __a, int8x16_t __b) { - return (int8x16_t)__builtin_neon_vmulv16qi (__a, __b, 1); + return __a * __b; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vmulq_s16 (int16x8_t __a, int16x8_t __b) { - return (int16x8_t)__builtin_neon_vmulv8hi (__a, __b, 1); + return __a * __b; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vmulq_s32 (int32x4_t __a, int32x4_t __b) { - return (int32x4_t)__builtin_neon_vmulv4si (__a, __b, 1); + return __a * __b; } __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vmulq_f32 (float32x4_t __a, float32x4_t __b) { - return (float32x4_t)__builtin_neon_vmulv4sf (__a, __b, 3); +#ifdef __FAST_MATH + return __a * __b; +#else + return (float32x4_t) __builtin_neon_vmulv4sf (__a, __b, 3); +#endif } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vmulq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t)__builtin_neon_vmulv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); + return __a * __b; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vmulq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t)__builtin_neon_vmulv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); + return __a * __b; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vmulq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t)__builtin_neon_vmulv4si ((int32x4_t) __a, (int32x4_t) __b, 0); + return __a * __b; } +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vmul_p8 (poly8x8_t __a, poly8x8_t __b) +{ + return (poly8x8_t)__builtin_neon_vmulv8qi ((int8x8_t) __a, (int8x8_t) __b, 2); +} + __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) vmulq_p8 (poly8x16_t __a, poly8x16_t __b) { @@ -1520,112 +1536,121 @@ } #endif + __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vsub_s8 (int8x8_t __a, int8x8_t __b) { - return (int8x8_t)__builtin_neon_vsubv8qi (__a, __b, 1); + return __a - __b; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vsub_s16 (int16x4_t __a, int16x4_t __b) { - return (int16x4_t)__builtin_neon_vsubv4hi (__a, __b, 1); + return __a - __b; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vsub_s32 (int32x2_t __a, int32x2_t __b) { - return (int32x2_t)__builtin_neon_vsubv2si (__a, __b, 1); + return __a - __b; } __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vsub_f32 (float32x2_t __a, float32x2_t __b) { - return (float32x2_t)__builtin_neon_vsubv2sf (__a, __b, 3); +#ifdef __FAST_MATH + return __a - __b; +#else + return (float32x2_t) __builtin_neon_vsubv2sf (__a, __b, 3); +#endif } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vsub_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t)__builtin_neon_vsubv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); + return __a - __b; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vsub_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t)__builtin_neon_vsubv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); + return __a - __b; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vsub_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t)__builtin_neon_vsubv2si ((int32x2_t) __a, (int32x2_t) __b, 0); + return __a - __b; } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vsub_s64 (int64x1_t __a, int64x1_t __b) { - return (int64x1_t)__builtin_neon_vsubdi (__a, __b, 1); + return __a - __b; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vsub_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t)__builtin_neon_vsubdi ((int64x1_t) __a, (int64x1_t) __b, 0); + return __a - __b; } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vsubq_s8 (int8x16_t __a, int8x16_t __b) { - return (int8x16_t)__builtin_neon_vsubv16qi (__a, __b, 1); + return __a - __b; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vsubq_s16 (int16x8_t __a, int16x8_t __b) { - return (int16x8_t)__builtin_neon_vsubv8hi (__a, __b, 1); + return __a - __b; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vsubq_s32 (int32x4_t __a, int32x4_t __b) { - return (int32x4_t)__builtin_neon_vsubv4si (__a, __b, 1); + return __a - __b; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vsubq_s64 (int64x2_t __a, int64x2_t __b) { - return (int64x2_t)__builtin_neon_vsubv2di (__a, __b, 1); + return __a - __b; } __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vsubq_f32 (float32x4_t __a, float32x4_t __b) { - return (float32x4_t)__builtin_neon_vsubv4sf (__a, __b, 3); +#ifdef __FAST_MATH + return __a - __b; +#else + return (float32x4_t) __builtin_neon_vsubv4sf (__a, __b, 3); +#endif } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vsubq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t)__builtin_neon_vsubv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); + return __a - __b; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vsubq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t)__builtin_neon_vsubv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); + return __a - __b; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vsubq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t)__builtin_neon_vsubv4si ((int32x4_t) __a, (int32x4_t) __b, 0); + return __a - __b; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vsubq_u64 (uint64x2_t __a, uint64x2_t __b) { - return (uint64x2_t)__builtin_neon_vsubv2di ((int64x2_t) __a, (int64x2_t) __b, 0); + return __a - __b; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) @@ -11295,484 +11320,483 @@ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vand_s8 (int8x8_t __a, int8x8_t __b) { - return (int8x8_t)__builtin_neon_vandv8qi (__a, __b, 1); + return __a & __b; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vand_s16 (int16x4_t __a, int16x4_t __b) { - return (int16x4_t)__builtin_neon_vandv4hi (__a, __b, 1); + return __a & __b; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vand_s32 (int32x2_t __a, int32x2_t __b) { - return (int32x2_t)__builtin_neon_vandv2si (__a, __b, 1); + return __a & __b; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vand_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t)__builtin_neon_vandv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); + return __a & __b; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vand_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t)__builtin_neon_vandv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); + return __a & __b; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vand_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t)__builtin_neon_vandv2si ((int32x2_t) __a, (int32x2_t) __b, 0); + return __a & __b; } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vand_s64 (int64x1_t __a, int64x1_t __b) { - return (int64x1_t)__builtin_neon_vanddi (__a, __b, 1); + return __a & __b; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vand_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t)__builtin_neon_vanddi ((int64x1_t) __a, (int64x1_t) __b, 0); + return __a & __b; } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vandq_s8 (int8x16_t __a, int8x16_t __b) { - return (int8x16_t)__builtin_neon_vandv16qi (__a, __b, 1); + return __a & __b; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vandq_s16 (int16x8_t __a, int16x8_t __b) { - return (int16x8_t)__builtin_neon_vandv8hi (__a, __b, 1); + return __a & __b; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vandq_s32 (int32x4_t __a, int32x4_t __b) { - return (int32x4_t)__builtin_neon_vandv4si (__a, __b, 1); + return __a & __b; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vandq_s64 (int64x2_t __a, int64x2_t __b) { - return (int64x2_t)__builtin_neon_vandv2di (__a, __b, 1); + return __a & __b; } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vandq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t)__builtin_neon_vandv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); + return __a & __b; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vandq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t)__builtin_neon_vandv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); + return __a & __b; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vandq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t)__builtin_neon_vandv4si ((int32x4_t) __a, (int32x4_t) __b, 0); + return __a & __b; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vandq_u64 (uint64x2_t __a, uint64x2_t __b) { - return (uint64x2_t)__builtin_neon_vandv2di ((int64x2_t) __a, (int64x2_t) __b, 0); + return __a & __b; } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vorr_s8 (int8x8_t __a, int8x8_t __b) { - return (int8x8_t)__builtin_neon_vorrv8qi (__a, __b, 1); + return __a | __b; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vorr_s16 (int16x4_t __a, int16x4_t __b) { - return (int16x4_t)__builtin_neon_vorrv4hi (__a, __b, 1); + return __a | __b; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vorr_s32 (int32x2_t __a, int32x2_t __b) { - return (int32x2_t)__builtin_neon_vorrv2si (__a, __b, 1); + return __a | __b; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vorr_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t)__builtin_neon_vorrv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); + return __a | __b; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vorr_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t)__builtin_neon_vorrv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); + return __a | __b; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vorr_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t)__builtin_neon_vorrv2si ((int32x2_t) __a, (int32x2_t) __b, 0); + return __a | __b; } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vorr_s64 (int64x1_t __a, int64x1_t __b) { - return (int64x1_t)__builtin_neon_vorrdi (__a, __b, 1); + return __a | __b; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vorr_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t)__builtin_neon_vorrdi ((int64x1_t) __a, (int64x1_t) __b, 0); + return __a | __b; } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vorrq_s8 (int8x16_t __a, int8x16_t __b) { - return (int8x16_t)__builtin_neon_vorrv16qi (__a, __b, 1); + return __a | __b; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vorrq_s16 (int16x8_t __a, int16x8_t __b) { - return (int16x8_t)__builtin_neon_vorrv8hi (__a, __b, 1); + return __a | __b; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vorrq_s32 (int32x4_t __a, int32x4_t __b) { - return (int32x4_t)__builtin_neon_vorrv4si (__a, __b, 1); + return __a | __b; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vorrq_s64 (int64x2_t __a, int64x2_t __b) { - return (int64x2_t)__builtin_neon_vorrv2di (__a, __b, 1); + return __a | __b; } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vorrq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t)__builtin_neon_vorrv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); + return __a | __b; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vorrq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t)__builtin_neon_vorrv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); + return __a | __b; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vorrq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t)__builtin_neon_vorrv4si ((int32x4_t) __a, (int32x4_t) __b, 0); + return __a | __b; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vorrq_u64 (uint64x2_t __a, uint64x2_t __b) { - return (uint64x2_t)__builtin_neon_vorrv2di ((int64x2_t) __a, (int64x2_t) __b, 0); + return __a | __b; } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) veor_s8 (int8x8_t __a, int8x8_t __b) { - return (int8x8_t)__builtin_neon_veorv8qi (__a, __b, 1); + return __a ^ __b; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) veor_s16 (int16x4_t __a, int16x4_t __b) { - return (int16x4_t)__builtin_neon_veorv4hi (__a, __b, 1); + return __a ^ __b; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) veor_s32 (int32x2_t __a, int32x2_t __b) { - return (int32x2_t)__builtin_neon_veorv2si (__a, __b, 1); + return __a ^ __b; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) veor_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t)__builtin_neon_veorv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); + return __a ^ __b; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) veor_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t)__builtin_neon_veorv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); + return __a ^ __b; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) veor_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t)__builtin_neon_veorv2si ((int32x2_t) __a, (int32x2_t) __b, 0); + return __a ^ __b; } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) veor_s64 (int64x1_t __a, int64x1_t __b) { - return (int64x1_t)__builtin_neon_veordi (__a, __b, 1); + return __a ^ __b; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) veor_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t)__builtin_neon_veordi ((int64x1_t) __a, (int64x1_t) __b, 0); + return __a ^ __b; } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) veorq_s8 (int8x16_t __a, int8x16_t __b) { - return (int8x16_t)__builtin_neon_veorv16qi (__a, __b, 1); + return __a ^ __b; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) veorq_s16 (int16x8_t __a, int16x8_t __b) { - return (int16x8_t)__builtin_neon_veorv8hi (__a, __b, 1); + return __a ^ __b; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) veorq_s32 (int32x4_t __a, int32x4_t __b) { - return (int32x4_t)__builtin_neon_veorv4si (__a, __b, 1); + return __a ^ __b; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) veorq_s64 (int64x2_t __a, int64x2_t __b) { - return (int64x2_t)__builtin_neon_veorv2di (__a, __b, 1); + return __a ^ __b; } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) veorq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t)__builtin_neon_veorv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); + return __a ^ __b; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) veorq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t)__builtin_neon_veorv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); + return __a ^ __b; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) veorq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t)__builtin_neon_veorv4si ((int32x4_t) __a, (int32x4_t) __b, 0); + return __a ^ __b; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) veorq_u64 (uint64x2_t __a, uint64x2_t __b) { - return (uint64x2_t)__builtin_neon_veorv2di ((int64x2_t) __a, (int64x2_t) __b, 0); + return __a ^ __b; } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vbic_s8 (int8x8_t __a, int8x8_t __b) { - return (int8x8_t)__builtin_neon_vbicv8qi (__a, __b, 1); + return __a & ~__b; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vbic_s16 (int16x4_t __a, int16x4_t __b) { - return (int16x4_t)__builtin_neon_vbicv4hi (__a, __b, 1); + return __a & ~__b; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vbic_s32 (int32x2_t __a, int32x2_t __b) { - return (int32x2_t)__builtin_neon_vbicv2si (__a, __b, 1); + return __a & ~__b; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vbic_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t)__builtin_neon_vbicv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); + return __a & ~__b; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vbic_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t)__builtin_neon_vbicv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); + return __a & ~__b; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vbic_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t)__builtin_neon_vbicv2si ((int32x2_t) __a, (int32x2_t) __b, 0); + return __a & ~__b; } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vbic_s64 (int64x1_t __a, int64x1_t __b) { - return (int64x1_t)__builtin_neon_vbicdi (__a, __b, 1); + return __a & ~__b; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vbic_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t)__builtin_neon_vbicdi ((int64x1_t) __a, (int64x1_t) __b, 0); + return __a & ~__b; } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vbicq_s8 (int8x16_t __a, int8x16_t __b) { - return (int8x16_t)__builtin_neon_vbicv16qi (__a, __b, 1); + return __a & ~__b; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vbicq_s16 (int16x8_t __a, int16x8_t __b) { - return (int16x8_t)__builtin_neon_vbicv8hi (__a, __b, 1); + return __a & ~__b; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vbicq_s32 (int32x4_t __a, int32x4_t __b) { - return (int32x4_t)__builtin_neon_vbicv4si (__a, __b, 1); + return __a & ~__b; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vbicq_s64 (int64x2_t __a, int64x2_t __b) { - return (int64x2_t)__builtin_neon_vbicv2di (__a, __b, 1); + return __a & ~__b; } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vbicq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t)__builtin_neon_vbicv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); + return __a & ~__b; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vbicq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t)__builtin_neon_vbicv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); + return __a & ~__b; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vbicq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t)__builtin_neon_vbicv4si ((int32x4_t) __a, (int32x4_t) __b, 0); + return __a & ~__b; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vbicq_u64 (uint64x2_t __a, uint64x2_t __b) { - return (uint64x2_t)__builtin_neon_vbicv2di ((int64x2_t) __a, (int64x2_t) __b, 0); + return __a & ~__b; } __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vorn_s8 (int8x8_t __a, int8x8_t __b) { - return (int8x8_t)__builtin_neon_vornv8qi (__a, __b, 1); + return __a | ~__b; } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vorn_s16 (int16x4_t __a, int16x4_t __b) { - return (int16x4_t)__builtin_neon_vornv4hi (__a, __b, 1); + return __a | ~__b; } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vorn_s32 (int32x2_t __a, int32x2_t __b) { - return (int32x2_t)__builtin_neon_vornv2si (__a, __b, 1); + return __a | ~__b; } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vorn_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t)__builtin_neon_vornv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); + return __a | ~__b; } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vorn_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t)__builtin_neon_vornv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); + return __a | ~__b; } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vorn_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t)__builtin_neon_vornv2si ((int32x2_t) __a, (int32x2_t) __b, 0); + return __a | ~__b; } __extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) vorn_s64 (int64x1_t __a, int64x1_t __b) { - return (int64x1_t)__builtin_neon_vorndi (__a, __b, 1); + return __a | ~__b; } __extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) vorn_u64 (uint64x1_t __a, uint64x1_t __b) { - return (uint64x1_t)__builtin_neon_vorndi ((int64x1_t) __a, (int64x1_t) __b, 0); + return __a | ~__b; } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vornq_s8 (int8x16_t __a, int8x16_t __b) { - return (int8x16_t)__builtin_neon_vornv16qi (__a, __b, 1); + return __a | ~__b; } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vornq_s16 (int16x8_t __a, int16x8_t __b) { - return (int16x8_t)__builtin_neon_vornv8hi (__a, __b, 1); + return __a | ~__b; } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vornq_s32 (int32x4_t __a, int32x4_t __b) { - return (int32x4_t)__builtin_neon_vornv4si (__a, __b, 1); + return __a | ~__b; } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vornq_s64 (int64x2_t __a, int64x2_t __b) { - return (int64x2_t)__builtin_neon_vornv2di (__a, __b, 1); + return __a | ~__b; } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vornq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t)__builtin_neon_vornv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); + return __a | ~__b; } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vornq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t)__builtin_neon_vornv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); + return __a | ~__b; } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vornq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t)__builtin_neon_vornv4si ((int32x4_t) __a, (int32x4_t) __b, 0); + return __a | ~__b; } __extension__ static __inline uint64x2_t __attribute__ ((__always_inline__)) vornq_u64 (uint64x2_t __a, uint64x2_t __b) { - return (uint64x2_t)__builtin_neon_vornv2di ((int64x2_t) __a, (int64x2_t) __b, 0); + return __a | ~__b; } - __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) vreinterpret_p8_p16 (poly16x4_t __a) { --- a/src/gcc/config/arm/aarch-common.c +++ b/src/gcc/config/arm/aarch-common.c @@ -191,6 +191,83 @@ return 0; } +bool +aarch_rev16_shright_mask_imm_p (rtx val, enum machine_mode mode) +{ + return CONST_INT_P (val) + && INTVAL (val) + == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff), + mode); +} + +bool +aarch_rev16_shleft_mask_imm_p (rtx val, enum machine_mode mode) +{ + return CONST_INT_P (val) + && INTVAL (val) + == trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff00), + mode); +} + + +static bool +aarch_rev16_p_1 (rtx lhs, rtx rhs, enum machine_mode mode) +{ + if (GET_CODE (lhs) == AND + && GET_CODE (XEXP (lhs, 0)) == ASHIFT + && CONST_INT_P (XEXP (XEXP (lhs, 0), 1)) + && INTVAL (XEXP (XEXP (lhs, 0), 1)) == 8 + && REG_P (XEXP (XEXP (lhs, 0), 0)) + && CONST_INT_P (XEXP (lhs, 1)) + && GET_CODE (rhs) == AND + && GET_CODE (XEXP (rhs, 0)) == LSHIFTRT + && REG_P (XEXP (XEXP (rhs, 0), 0)) + && CONST_INT_P (XEXP (XEXP (rhs, 0), 1)) + && INTVAL (XEXP (XEXP (rhs, 0), 1)) == 8 + && CONST_INT_P (XEXP (rhs, 1)) + && REGNO (XEXP (XEXP (rhs, 0), 0)) == REGNO (XEXP (XEXP (lhs, 0), 0))) + + { + rtx lhs_mask = XEXP (lhs, 1); + rtx rhs_mask = XEXP (rhs, 1); + + return aarch_rev16_shright_mask_imm_p (rhs_mask, mode) + && aarch_rev16_shleft_mask_imm_p (lhs_mask, mode); + } + + return false; +} + +/* Recognise a sequence of bitwise operations corresponding to a rev16 operation. + These will be of the form: + ((x >> 8) & 0x00ff00ff) + | ((x << 8) & 0xff00ff00) + for SImode and with similar but wider bitmasks for DImode. + The two sub-expressions of the IOR can appear on either side so check both + permutations with the help of aarch_rev16_p_1 above. */ + +bool +aarch_rev16_p (rtx x) +{ + rtx left_sub_rtx, right_sub_rtx; + bool is_rev = false; + + if (GET_CODE (x) != IOR) + return false; + + left_sub_rtx = XEXP (x, 0); + right_sub_rtx = XEXP (x, 1); + + /* There are no canonicalisation rules for the position of the two shifts + involved in a rev, so try both permutations. */ + is_rev = aarch_rev16_p_1 (left_sub_rtx, right_sub_rtx, GET_MODE (x)); + + if (!is_rev) + is_rev = aarch_rev16_p_1 (right_sub_rtx, left_sub_rtx, GET_MODE (x)); + + return is_rev; +} + /* Return nonzero if the CONSUMER instruction (a load) does need PRODUCER's value to calculate the address. */ int --- a/src/gcc/config/arm/iterators.md +++ b/src/gcc/config/arm/iterators.md @@ -116,6 +116,9 @@ ;; Vector modes including 64-bit integer elements, but no floats. (define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI]) +;; Vector modes for H, S and D types. +(define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI]) + ;; Vector modes for float->int conversions. (define_mode_iterator VCVTF [V2SF V4SF]) @@ -191,6 +194,20 @@ ;; Right shifts (define_code_iterator rshifts [ashiftrt lshiftrt]) +;; Binary operators whose second operand can be shifted. +(define_code_iterator shiftable_ops [plus minus ior xor and]) + +;; plus and minus are the only shiftable_ops for which Thumb2 allows +;; a stack pointer opoerand. The minus operation is a candidate for an rsub +;; and hence only plus is supported. +(define_code_attr t2_binop0 + [(plus "rk") (minus "r") (ior "r") (xor "r") (and "r")]) + +;; The instruction to use when a shiftable_ops has a shift operation as +;; its first operand. +(define_code_attr arith_shift_insn + [(plus "add") (minus "rsb") (ior "orr") (xor "eor") (and "and")]) + ;;---------------------------------------------------------------------------- ;; Int iterators ;;---------------------------------------------------------------------------- --- a/src/gcc/config/arm/arm.md +++ b/src/gcc/config/arm/arm.md @@ -200,17 +200,9 @@ (const_string "yes")] (const_string "no"))) -; Allows an insn to disable certain alternatives for reasons other than -; arch support. -(define_attr "insn_enabled" "no,yes" - (const_string "yes")) - ; Enable all alternatives that are both arch_enabled and insn_enabled. (define_attr "enabled" "no,yes" - (cond [(eq_attr "insn_enabled" "no") - (const_string "no") - - (and (eq_attr "predicable_short_it" "no") + (cond [(and (eq_attr "predicable_short_it" "no") (and (eq_attr "predicated" "yes") (match_test "arm_restrict_it"))) (const_string "no") @@ -2863,6 +2855,28 @@ (set_attr "type" "multiple")] ) +(define_insn_and_split "*anddi_notdi_zesidi" + [(set (match_operand:DI 0 "s_register_operand" "=r") + (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "r")) + (zero_extend:DI + (match_operand:SI 1 "s_register_operand" "r"))))] + "TARGET_32BIT" + "#" + "TARGET_32BIT && reload_completed" + [(set (match_dup 0) (and:SI (not:SI (match_dup 2)) (match_dup 1))) + (set (match_dup 3) (const_int 0))] + " + { + operands[3] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[2] = gen_lowpart (SImode, operands[2]); + }" + [(set_attr "length" "8") + (set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "type" "multiple")] +) + (define_insn_and_split "*anddi_notsesidi_di" [(set (match_operand:DI 0 "s_register_operand" "=&r,&r") (and:DI (not:DI (sign_extend:DI @@ -9345,8 +9359,10 @@ "TARGET_32BIT" " { - if (!REG_P (XEXP (operands[0], 0)) - && (GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF)) + if ((!REG_P (XEXP (operands[0], 0)) + && GET_CODE (XEXP (operands[0], 0)) != SYMBOL_REF) + || (GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF + && arm_is_long_call_p (SYMBOL_REF_DECL (XEXP (operands[0], 0))))) XEXP (operands[0], 0) = force_reg (SImode, XEXP (operands[0], 0)); if (operands[2] == NULL_RTX) @@ -9363,8 +9379,10 @@ "TARGET_32BIT" " { - if (!REG_P (XEXP (operands[1], 0)) && - (GET_CODE (XEXP (operands[1],0)) != SYMBOL_REF)) + if ((!REG_P (XEXP (operands[1], 0)) + && GET_CODE (XEXP (operands[1], 0)) != SYMBOL_REF) + || (GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF + && arm_is_long_call_p (SYMBOL_REF_DECL (XEXP (operands[1], 0))))) XEXP (operands[1], 0) = force_reg (SImode, XEXP (operands[1], 0)); if (operands[3] == NULL_RTX) @@ -9850,39 +9868,35 @@ ;; Patterns to allow combination of arithmetic, cond code and shifts -(define_insn "*arith_shiftsi" - [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r") - (match_operator:SI 1 "shiftable_operator" - [(match_operator:SI 3 "shift_operator" - [(match_operand:SI 4 "s_register_operand" "r,r,r,r") - (match_operand:SI 5 "shift_amount_operand" "M,M,M,r")]) - (match_operand:SI 2 "s_register_operand" "rk,rk,r,rk")]))] +(define_insn "*_multsi" + [(set (match_operand:SI 0 "s_register_operand" "=r,r") + (shiftable_ops:SI + (mult:SI (match_operand:SI 2 "s_register_operand" "r,r") + (match_operand:SI 3 "power_of_two_operand" "")) + (match_operand:SI 1 "s_register_operand" "rk,")))] "TARGET_32BIT" - "%i1%?\\t%0, %2, %4%S3" + "%?\\t%0, %1, %2, lsl %b3" [(set_attr "predicable" "yes") (set_attr "predicable_short_it" "no") (set_attr "shift" "4") - (set_attr "arch" "a,t2,t2,a") - ;; Thumb2 doesn't allow the stack pointer to be used for - ;; operand1 for all operations other than add and sub. In this case - ;; the minus operation is a candidate for an rsub and hence needs - ;; to be disabled. - ;; We have to make sure to disable the fourth alternative if - ;; the shift_operator is MULT, since otherwise the insn will - ;; also match a multiply_accumulate pattern and validate_change - ;; will allow a replacement of the constant with a register - ;; despite the checks done in shift_operator. - (set_attr_alternative "insn_enabled" - [(const_string "yes") - (if_then_else - (match_operand:SI 1 "add_operator" "") - (const_string "yes") (const_string "no")) - (const_string "yes") - (if_then_else - (match_operand:SI 3 "mult_operator" "") - (const_string "no") (const_string "yes"))]) - (set_attr "type" "alu_shift_imm,alu_shift_imm,alu_shift_imm,alu_shift_reg")]) + (set_attr "arch" "a,t2") + (set_attr "type" "alu_shift_imm")]) +(define_insn "*_shiftsi" + [(set (match_operand:SI 0 "s_register_operand" "=r,r,r") + (shiftable_ops:SI + (match_operator:SI 2 "shift_nomul_operator" + [(match_operand:SI 3 "s_register_operand" "r,r,r") + (match_operand:SI 4 "shift_amount_operand" "M,M,r")]) + (match_operand:SI 1 "s_register_operand" "rk,,rk")))] + "TARGET_32BIT && GET_CODE (operands[3]) != MULT" + "%?\\t%0, %1, %3%S2" + [(set_attr "predicable" "yes") + (set_attr "predicable_short_it" "no") + (set_attr "shift" "4") + (set_attr "arch" "a,t2,a") + (set_attr "type" "alu_shift_imm,alu_shift_imm,alu_shift_reg")]) + (define_split [(set (match_operand:SI 0 "s_register_operand" "") (match_operator:SI 1 "shiftable_operator" @@ -12669,6 +12683,44 @@ (set_attr "type" "rev")] ) +;; There are no canonicalisation rules for the position of the lshiftrt, ashift +;; operations within an IOR/AND RTX, therefore we have two patterns matching +;; each valid permutation. + +(define_insn "arm_rev16si2" + [(set (match_operand:SI 0 "register_operand" "=l,l,r") + (ior:SI (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "l,l,r") + (const_int 8)) + (match_operand:SI 3 "const_int_operand" "n,n,n")) + (and:SI (lshiftrt:SI (match_dup 1) + (const_int 8)) + (match_operand:SI 2 "const_int_operand" "n,n,n"))))] + "arm_arch6 + && aarch_rev16_shleft_mask_imm_p (operands[3], SImode) + && aarch_rev16_shright_mask_imm_p (operands[2], SImode)" + "rev16\\t%0, %1" + [(set_attr "arch" "t1,t2,32") + (set_attr "length" "2,2,4") + (set_attr "type" "rev")] +) + +(define_insn "arm_rev16si2_alt" + [(set (match_operand:SI 0 "register_operand" "=l,l,r") + (ior:SI (and:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "l,l,r") + (const_int 8)) + (match_operand:SI 2 "const_int_operand" "n,n,n")) + (and:SI (ashift:SI (match_dup 1) + (const_int 8)) + (match_operand:SI 3 "const_int_operand" "n,n,n"))))] + "arm_arch6 + && aarch_rev16_shleft_mask_imm_p (operands[3], SImode) + && aarch_rev16_shright_mask_imm_p (operands[2], SImode)" + "rev16\\t%0, %1" + [(set_attr "arch" "t1,t2,32") + (set_attr "length" "2,2,4") + (set_attr "type" "rev")] +) + (define_expand "bswaphi2" [(set (match_operand:HI 0 "s_register_operand" "=r") (bswap:HI (match_operand:HI 1 "s_register_operand" "r")))] --- a/src/libobjc/ChangeLog.linaro +++ b/src/libobjc/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/libvtv/ChangeLog.linaro +++ b/src/libvtv/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/libgfortran/configure +++ b/src/libgfortran/configure @@ -25935,7 +25935,7 @@ # test is copied from libgomp, and modified to not link in -lrt as # libgfortran calls clock_gettime via a weak reference if it's found # in librt. -if test $ac_cv_func_clock_gettime = no; then +if test "$ac_cv_func_clock_gettime" = no; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking for clock_gettime in -lrt" >&5 $as_echo_n "checking for clock_gettime in -lrt... " >&6; } if test "${ac_cv_lib_rt_clock_gettime+set}" = set; then : --- a/src/libgfortran/configure.ac +++ b/src/libgfortran/configure.ac @@ -510,7 +510,7 @@ # test is copied from libgomp, and modified to not link in -lrt as # libgfortran calls clock_gettime via a weak reference if it's found # in librt. -if test $ac_cv_func_clock_gettime = no; then +if test "$ac_cv_func_clock_gettime" = no; then AC_CHECK_LIB(rt, clock_gettime, [AC_DEFINE(HAVE_CLOCK_GETTIME_LIBRT, 1, [Define to 1 if you have the `clock_gettime' function in librt.])]) --- a/src/libgfortran/ChangeLog.linaro +++ b/src/libgfortran/ChangeLog.linaro @@ -0,0 +1,27 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-25 Yvan Roux + + Backport from trunk r209747. + 2014-04-24 Kyrylo Tkachov + + * configure.ac: Quote usage of ac_cv_func_clock_gettime in if test. + * configure: Regenerate. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/libada/ChangeLog.linaro +++ b/src/libada/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/libffi/ChangeLog.linaro +++ b/src/libffi/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/libssp/ChangeLog.linaro +++ b/src/libssp/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/libcilkrts/ChangeLog.linaro +++ b/src/libcilkrts/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/libcpp/ChangeLog.linaro +++ b/src/libcpp/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/libcpp/po/ChangeLog.linaro +++ b/src/libcpp/po/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released. --- a/src/fixincludes/ChangeLog.linaro +++ b/src/fixincludes/ChangeLog.linaro @@ -0,0 +1,19 @@ +2014-07-17 Yvan Roux + + GCC Linaro 4.9-2014.07 released. + +2014-06-25 Yvan Roux + + GCC Linaro 4.9-2014.06-1 released. + +2014-06-12 Yvan Roux + + GCC Linaro 4.9-2014.06 released. + +2014-05-14 Yvan Roux + + GCC Linaro 4.9-2014.05 released. + +2014-04-22 Yvan Roux + + GCC Linaro 4.9-2014.04 released.