diff options
Diffstat (limited to 'gcc-4.6/gcc/config/i386')
-rw-r--r-- | gcc-4.6/gcc/config/i386/avxintrin.h | 2 | ||||
-rw-r--r-- | gcc-4.6/gcc/config/i386/constraints.md | 6 | ||||
-rw-r--r-- | gcc-4.6/gcc/config/i386/cygwin.h | 2 | ||||
-rw-r--r-- | gcc-4.6/gcc/config/i386/darwin.h | 2 | ||||
-rw-r--r-- | gcc-4.6/gcc/config/i386/freebsd.h | 3 | ||||
-rw-r--r-- | gcc-4.6/gcc/config/i386/i386-builtin-types.def | 1 | ||||
-rw-r--r-- | gcc-4.6/gcc/config/i386/i386.c | 1112 | ||||
-rw-r--r-- | gcc-4.6/gcc/config/i386/i386.md | 177 | ||||
-rw-r--r-- | gcc-4.6/gcc/config/i386/i386.opt | 4 | ||||
-rw-r--r-- | gcc-4.6/gcc/config/i386/libgcc-glibc.ver | 12 | ||||
-rw-r--r-- | gcc-4.6/gcc/config/i386/linux.h | 2 | ||||
-rw-r--r-- | gcc-4.6/gcc/config/i386/linux64.h | 2 | ||||
-rw-r--r-- | gcc-4.6/gcc/config/i386/mingw32.h | 8 | ||||
-rw-r--r-- | gcc-4.6/gcc/config/i386/predicates.md | 2 | ||||
-rw-r--r-- | gcc-4.6/gcc/config/i386/sse.md | 40 |
15 files changed, 1252 insertions, 123 deletions
diff --git a/gcc-4.6/gcc/config/i386/avxintrin.h b/gcc-4.6/gcc/config/i386/avxintrin.h index 8055cc6..6d4213d 100644 --- a/gcc-4.6/gcc/config/i386/avxintrin.h +++ b/gcc-4.6/gcc/config/i386/avxintrin.h @@ -759,7 +759,7 @@ _mm256_insert_epi8 (__m256i __X, int __D, int const __N) #ifdef __x86_64__ extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_insert_epi64 (__m256i __X, int __D, int const __N) +_mm256_insert_epi64 (__m256i __X, long long __D, int const __N) { __m128i __Y = _mm256_extractf128_si256 (__X, __N >> 1); __Y = _mm_insert_epi64 (__Y, __D, __N % 2); diff --git a/gcc-4.6/gcc/config/i386/constraints.md b/gcc-4.6/gcc/config/i386/constraints.md index 4bc3ed6..6233b79 100644 --- a/gcc-4.6/gcc/config/i386/constraints.md +++ b/gcc-4.6/gcc/config/i386/constraints.md @@ -19,7 +19,7 @@ ;;; Unused letters: ;;; B H T W -;;; h jk vw z +;;; h jk vw ;; Integer register constraints. ;; It is not necessary to define 'r' here. @@ -105,6 +105,10 @@ "TARGET_MMX && TARGET_INTER_UNIT_MOVES ? MMX_REGS : NO_REGS" "@internal Any MMX register, when inter-unit moves are enabled.") +(define_constraint "z" + "@internal Constant call address operand." + (match_operand 0 "constant_call_address_operand")) + ;; Integer constant constraints. (define_constraint "I" "Integer constant in the range 0 @dots{} 31, for 32-bit shifts." diff --git a/gcc-4.6/gcc/config/i386/cygwin.h b/gcc-4.6/gcc/config/i386/cygwin.h index bedf1e7..f8daeec 100644 --- a/gcc-4.6/gcc/config/i386/cygwin.h +++ b/gcc-4.6/gcc/config/i386/cygwin.h @@ -38,7 +38,7 @@ along with GCC; see the file COPYING3. If not see #undef ENDFILE_SPEC #define ENDFILE_SPEC \ - "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s}\ + "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}\ crtend.o%s" /* Normally, -lgcc is not needed since everything in it is in the DLL, but we diff --git a/gcc-4.6/gcc/config/i386/darwin.h b/gcc-4.6/gcc/config/i386/darwin.h index 934ab4b..08b6c52 100644 --- a/gcc-4.6/gcc/config/i386/darwin.h +++ b/gcc-4.6/gcc/config/i386/darwin.h @@ -131,7 +131,7 @@ extern int darwin_emit_branch_islands; #undef ENDFILE_SPEC #define ENDFILE_SPEC \ - "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ + "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ %{mpc32:crtprec32.o%s} \ %{mpc64:crtprec64.o%s} \ %{mpc80:crtprec80.o%s}" diff --git a/gcc-4.6/gcc/config/i386/freebsd.h b/gcc-4.6/gcc/config/i386/freebsd.h index 61592d4..6d2c559 100644 --- a/gcc-4.6/gcc/config/i386/freebsd.h +++ b/gcc-4.6/gcc/config/i386/freebsd.h @@ -147,3 +147,6 @@ along with GCC; see the file COPYING3. If not see #if FBSD_MAJOR >= 6 #define SUBTARGET32_DEFAULT_CPU "i486" #endif + +#define TARGET_ASM_FILE_END file_end_indicate_exec_stack + diff --git a/gcc-4.6/gcc/config/i386/i386-builtin-types.def b/gcc-4.6/gcc/config/i386/i386-builtin-types.def index 05a7f54..e3422dd 100644 --- a/gcc-4.6/gcc/config/i386/i386-builtin-types.def +++ b/gcc-4.6/gcc/config/i386/i386-builtin-types.def @@ -131,6 +131,7 @@ DEF_FUNCTION_TYPE (UINT64) DEF_FUNCTION_TYPE (UNSIGNED) DEF_FUNCTION_TYPE (VOID) DEF_FUNCTION_TYPE (PVOID) +DEF_FUNCTION_TYPE (INT) DEF_FUNCTION_TYPE (FLOAT, FLOAT) DEF_FUNCTION_TYPE (FLOAT128, FLOAT128) diff --git a/gcc-4.6/gcc/config/i386/i386.c b/gcc-4.6/gcc/config/i386/i386.c index 16d977e..6117e7d 100644 --- a/gcc-4.6/gcc/config/i386/i386.c +++ b/gcc-4.6/gcc/config/i386/i386.c @@ -58,6 +58,13 @@ along with GCC; see the file COPYING3. If not see #include "sched-int.h" #include "sbitmap.h" #include "fibheap.h" +#include "tree-flow.h" +#include "tree-pass.h" +#include "tree-dump.h" +#include "gimple-pretty-print.h" +#include "cfgloop.h" +#include "tree-scalar-evolution.h" +#include "tree-vectorizer.h" enum upper_128bits_state { @@ -2350,6 +2357,8 @@ enum processor_type ix86_tune; /* Which instruction set architecture to use. */ enum processor_type ix86_arch; +char ix86_varch[PROCESSOR_max]; + /* true if sse prefetch instruction is not NOOP. */ int x86_prefetch_sse; @@ -2489,6 +2498,7 @@ static enum calling_abi ix86_function_abi (const_tree); /* Whether -mtune= or -march= were specified */ static int ix86_tune_defaulted; static int ix86_arch_specified; +static int ix86_varch_specified; /* A mask of ix86_isa_flags that includes bit X if X was set or cleared on the command line. */ @@ -3412,6 +3422,11 @@ ix86_option_override_internal (bool main_args_p) PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL}, + {"core-avx-i", PROCESSOR_COREI7_64, CPU_COREI7, + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX + | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE + | PTA_RDRND | PTA_F16C}, {"atom", PROCESSOR_ATOM, CPU_ATOM, PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE}, @@ -4308,6 +4323,36 @@ ix86_option_override_internal (bool main_args_p) /* Disable vzeroupper pass if TARGET_AVX is disabled. */ target_flags &= ~MASK_VZEROUPPER; } + + /* Handle ix86_mv_arch_string. The values allowed are the same as + -march=<>. More than one value is allowed and values must be + comma separated. */ + if (ix86_mv_arch_string) + { + char *token; + char *varch; + int i; + + ix86_varch_specified = 1; + memset (ix86_varch, 0, sizeof (ix86_varch)); + token = XNEWVEC (char, strlen (ix86_mv_arch_string) + 1); + strcpy (token, ix86_mv_arch_string); + varch = strtok ((char *)token, ","); + while (varch != NULL) + { + for (i = 0; i < pta_size; i++) + if (!strcmp (varch, processor_alias_table[i].name)) + { + ix86_varch[processor_alias_table[i].processor] = 1; + break; + } + if (i == pta_size) + error ("bad value (%s) for %sv-arch=%s %s", + varch, prefix, suffix, sw); + varch = strtok (NULL, ","); + } + free (token); + } } /* Return TRUE if VAL is passed in register with 256bit AVX modes. */ @@ -12697,7 +12742,7 @@ legitimize_tls_address (rtx x, enum tls_model model, int for_mov) { dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest)); - set_unique_reg_note (get_last_insn (), REG_EQUIV, x); + set_unique_reg_note (get_last_insn (), REG_EQUAL, x); } break; @@ -12728,7 +12773,7 @@ legitimize_tls_address (rtx x, enum tls_model model, int for_mov) { rtx x = ix86_tls_module_base (); - set_unique_reg_note (get_last_insn (), REG_EQUIV, + set_unique_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_MINUS (Pmode, x, tp)); } @@ -12741,7 +12786,7 @@ legitimize_tls_address (rtx x, enum tls_model model, int for_mov) { dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp)); - set_unique_reg_note (get_last_insn (), REG_EQUIV, x); + set_unique_reg_note (get_last_insn (), REG_EQUAL, x); } break; @@ -16324,7 +16369,6 @@ distance_non_agu_define (unsigned int regno1, unsigned int regno2, basic_block bb = BLOCK_FOR_INSN (insn); int distance = 0; df_ref *def_rec; - enum attr_type insn_type; if (insn != BB_HEAD (bb)) { @@ -16340,8 +16384,8 @@ distance_non_agu_define (unsigned int regno1, unsigned int regno2, && (regno1 == DF_REF_REGNO (*def_rec) || regno2 == DF_REF_REGNO (*def_rec))) { - insn_type = get_attr_type (prev); - if (insn_type != TYPE_LEA) + if (recog_memoized (prev) < 0 + || get_attr_type (prev) != TYPE_LEA) goto done; } } @@ -16380,8 +16424,8 @@ distance_non_agu_define (unsigned int regno1, unsigned int regno2, && (regno1 == DF_REF_REGNO (*def_rec) || regno2 == DF_REF_REGNO (*def_rec))) { - insn_type = get_attr_type (prev); - if (insn_type != TYPE_LEA) + if (recog_memoized (prev) < 0 + || get_attr_type (prev) != TYPE_LEA) goto done; } } @@ -18679,6 +18723,11 @@ ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code, { rtx tmp; + /* AVX supports all the needed comparisons, no need to swap arguments + nor help reload. */ + if (TARGET_AVX) + return code; + switch (code) { case LTGT: @@ -18829,11 +18878,15 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) } else if (TARGET_XOP) { - rtx pcmov = gen_rtx_SET (mode, dest, - gen_rtx_IF_THEN_ELSE (mode, cmp, - op_true, - op_false)); - emit_insn (pcmov); + op_true = force_reg (mode, op_true); + + if (!nonimmediate_operand (op_false, mode)) + op_false = force_reg (mode, op_false); + + emit_insn (gen_rtx_SET (mode, dest, + gen_rtx_IF_THEN_ELSE (mode, cmp, + op_true, + op_false))); } else { @@ -18927,7 +18980,32 @@ ix86_expand_fp_vcond (rtx operands[]) code = ix86_prepare_sse_fp_compare_args (operands[0], code, &operands[4], &operands[5]); if (code == UNKNOWN) - return false; + { + rtx temp; + switch (GET_CODE (operands[3])) + { + case LTGT: + temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4], + operands[5], operands[0], operands[0]); + cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4], + operands[5], operands[1], operands[2]); + code = AND; + break; + case UNEQ: + temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4], + operands[5], operands[0], operands[0]); + cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4], + operands[5], operands[1], operands[2]); + code = IOR; + break; + default: + gcc_unreachable (); + } + cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1, + OPTAB_DIRECT); + ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); + return true; + } if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], operands[5], operands[1], operands[2])) @@ -22147,7 +22225,7 @@ assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n) for (s = ix86_stack_locals; s; s = s->next) if (s->mode == mode && s->n == n) - return copy_rtx (s->rtl); + return validize_mem (copy_rtx (s->rtl)); s = ggc_alloc_stack_local_entry (); s->n = n; @@ -22156,7 +22234,7 @@ assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n) s->next = ix86_stack_locals; ix86_stack_locals = s; - return s->rtl; + return validize_mem (s->rtl); } /* Construct the SYMBOL_REF for the tls_get_addr function. */ @@ -24443,6 +24521,33 @@ enum ix86_builtins /* CFString built-in for darwin */ IX86_BUILTIN_CFSTRING, + /* Builtins to get CPU features. */ + IX86_BUILTIN_CPU_SUPPORTS_CMOV, + IX86_BUILTIN_CPU_SUPPORTS_MMX, + IX86_BUILTIN_CPU_SUPPORTS_POPCOUNT, + IX86_BUILTIN_CPU_SUPPORTS_SSE, + IX86_BUILTIN_CPU_SUPPORTS_SSE2, + IX86_BUILTIN_CPU_SUPPORTS_SSE3, + IX86_BUILTIN_CPU_SUPPORTS_SSSE3, + IX86_BUILTIN_CPU_SUPPORTS_SSE4_1, + IX86_BUILTIN_CPU_SUPPORTS_SSE4_2, + /* Builtins to get CPU type. */ + IX86_BUILTIN_CPU_INIT, + IX86_BUILTIN_CPU_IS_AMD, + IX86_BUILTIN_CPU_IS_INTEL, + IX86_BUILTIN_CPU_IS_INTEL_ATOM, + IX86_BUILTIN_CPU_IS_INTEL_CORE2, + IX86_BUILTIN_CPU_IS_INTEL_COREI7, + IX86_BUILTIN_CPU_IS_INTEL_COREI7_NEHALEM, + IX86_BUILTIN_CPU_IS_INTEL_COREI7_WESTMERE, + IX86_BUILTIN_CPU_IS_INTEL_COREI7_SANDYBRIDGE, + IX86_BUILTIN_CPU_IS_AMDFAM10H, + IX86_BUILTIN_CPU_IS_AMDFAM10H_BARCELONA, + IX86_BUILTIN_CPU_IS_AMDFAM10H_SHANGHAI, + IX86_BUILTIN_CPU_IS_AMDFAM10H_ISTANBUL, + IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER1, + IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER2, + IX86_BUILTIN_MAX }; @@ -25809,6 +25914,848 @@ ix86_init_mmx_sse_builtins (void) } } +/* Returns a struct type with name NAME and number of fields equal to + NUM_FIELDS. Each field is a unsigned int bit field of length 1 bit. */ + +static tree +build_struct_with_one_bit_fields (int num_fields, const char *name) +{ + int i; + char field_name [10]; + tree field = NULL_TREE, field_chain = NULL_TREE; + tree type = make_node (RECORD_TYPE); + + strcpy (field_name, "k_field"); + + for (i = 0; i < num_fields; i++) + { + /* Name the fields, 0_field, 1_field, ... */ + field_name [0] = '0' + i; + field = build_decl (UNKNOWN_LOCATION, FIELD_DECL, + get_identifier (field_name), unsigned_type_node); + DECL_BIT_FIELD (field) = 1; + DECL_SIZE (field) = bitsize_one_node; + if (field_chain != NULL_TREE) + DECL_CHAIN (field) = field_chain; + field_chain = field; + } + finish_builtin_struct (type, name, field_chain, NULL_TREE); + return type; +} + +/* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */ + +static tree +make_var_decl (tree type, const char *name) +{ + tree new_decl; + struct varpool_node *vnode; + + new_decl = build_decl (UNKNOWN_LOCATION, + VAR_DECL, + get_identifier(name), + type); + + DECL_EXTERNAL (new_decl) = 1; + TREE_STATIC (new_decl) = 1; + TREE_PUBLIC (new_decl) = 1; + DECL_INITIAL (new_decl) = 0; + DECL_ARTIFICIAL (new_decl) = 0; + DECL_PRESERVE_P (new_decl) = 1; + + make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl)); + assemble_variable (new_decl, 0, 0, 0); + + vnode = varpool_node (new_decl); + gcc_assert (vnode != NULL); + /* Set finalized to 1, otherwise it asserts in function "write_symbol" in + lto-streamer-out.c. */ + vnode->finalized = 1; + + return new_decl; +} + +/* Traverses the chain of fields in STRUCT_TYPE and returns the FIELD_NUM + numbered field. */ + +static tree +get_field_from_struct (tree struct_type, int field_num) +{ + int i; + tree field = TYPE_FIELDS (struct_type); + + for (i = 0; i < field_num; i++, field = DECL_CHAIN(field)) + { + gcc_assert (field != NULL_TREE); + } + + return field; +} + +/* FNDECL is a __builtin_cpu_* call that is folded into an integer defined + in libgcc/config/i386/i386-cpuinfo.c */ + +static tree +fold_builtin_cpu (enum ix86_builtins fn_code) +{ + /* This is the order of bit-fields in __processor_features in + i386-cpuinfo.c */ + enum processor_features + { + F_CMOV = 0, + F_MMX, + F_POPCNT, + F_SSE, + F_SSE2, + F_SSE3, + F_SSSE3, + F_SSE4_1, + F_SSE4_2, + F_MAX + }; + + /* This is the order of bit-fields in __processor_model in + i386-cpuinfo.c */ + enum processor_model + { + M_AMD = 0, + M_INTEL, + M_INTEL_ATOM, + M_INTEL_CORE2, + M_INTEL_COREI7, + M_INTEL_COREI7_NEHALEM, + M_INTEL_COREI7_WESTMERE, + M_INTEL_COREI7_SANDYBRIDGE, + M_AMDFAM10H, + M_AMDFAM10H_BARCELONA, + M_AMDFAM10H_SHANGHAI, + M_AMDFAM10H_ISTANBUL, + M_AMDFAM15H_BDVER1, + M_AMDFAM15H_BDVER2, + M_MAX + }; + + static tree __processor_features_type = NULL_TREE; + static tree __cpu_features_var = NULL_TREE; + static tree __processor_model_type = NULL_TREE; + static tree __cpu_model_var = NULL_TREE; + static tree field; + static tree which_struct; + + if (__processor_features_type == NULL_TREE) + __processor_features_type = build_struct_with_one_bit_fields (F_MAX, + "__processor_features"); + + if (__processor_model_type == NULL_TREE) + __processor_model_type = build_struct_with_one_bit_fields (M_MAX, + "__processor_model"); + + if (__cpu_features_var == NULL_TREE) + __cpu_features_var = make_var_decl (__processor_features_type, + "__cpu_features"); + + if (__cpu_model_var == NULL_TREE) + __cpu_model_var = make_var_decl (__processor_model_type, + "__cpu_model"); + + /* Look at the code to identify the field requested. */ + switch (fn_code) + { + case IX86_BUILTIN_CPU_SUPPORTS_CMOV: + field = get_field_from_struct (__processor_features_type, F_CMOV); + which_struct = __cpu_features_var; + break; + case IX86_BUILTIN_CPU_SUPPORTS_MMX: + field = get_field_from_struct (__processor_features_type, F_MMX); + which_struct = __cpu_features_var; + break; + case IX86_BUILTIN_CPU_SUPPORTS_POPCOUNT: + field = get_field_from_struct (__processor_features_type, F_POPCNT); + which_struct = __cpu_features_var; + break; + case IX86_BUILTIN_CPU_SUPPORTS_SSE: + field = get_field_from_struct (__processor_features_type, F_SSE); + which_struct = __cpu_features_var; + break; + case IX86_BUILTIN_CPU_SUPPORTS_SSE2: + field = get_field_from_struct (__processor_features_type, F_SSE2); + which_struct = __cpu_features_var; + break; + case IX86_BUILTIN_CPU_SUPPORTS_SSE3: + field = get_field_from_struct (__processor_features_type, F_SSE3); + which_struct = __cpu_features_var; + break; + case IX86_BUILTIN_CPU_SUPPORTS_SSSE3: + field = get_field_from_struct (__processor_features_type, F_SSSE3); + which_struct = __cpu_features_var; + break; + case IX86_BUILTIN_CPU_SUPPORTS_SSE4_1: + field = get_field_from_struct (__processor_features_type, F_SSE4_1); + which_struct = __cpu_features_var; + break; + case IX86_BUILTIN_CPU_SUPPORTS_SSE4_2: + field = get_field_from_struct (__processor_features_type, F_SSE4_2); + which_struct = __cpu_features_var; + break; + case IX86_BUILTIN_CPU_IS_AMD: + field = get_field_from_struct (__processor_model_type, M_AMD); + which_struct = __cpu_model_var; + break; + case IX86_BUILTIN_CPU_IS_INTEL: + field = get_field_from_struct (__processor_model_type, M_INTEL); + which_struct = __cpu_model_var; + break; + case IX86_BUILTIN_CPU_IS_INTEL_ATOM: + field = get_field_from_struct (__processor_model_type, M_INTEL_ATOM); + which_struct = __cpu_model_var; + break; + case IX86_BUILTIN_CPU_IS_INTEL_CORE2: + field = get_field_from_struct (__processor_model_type, M_INTEL_CORE2); + which_struct = __cpu_model_var; + break; + case IX86_BUILTIN_CPU_IS_INTEL_COREI7: + field = get_field_from_struct (__processor_model_type, + M_INTEL_COREI7); + which_struct = __cpu_model_var; + break; + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_NEHALEM: + field = get_field_from_struct (__processor_model_type, + M_INTEL_COREI7_NEHALEM); + which_struct = __cpu_model_var; + break; + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_WESTMERE: + field = get_field_from_struct (__processor_model_type, + M_INTEL_COREI7_WESTMERE); + which_struct = __cpu_model_var; + break; + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_SANDYBRIDGE: + field = get_field_from_struct (__processor_model_type, + M_INTEL_COREI7_SANDYBRIDGE); + which_struct = __cpu_model_var; + break; + case IX86_BUILTIN_CPU_IS_AMDFAM10H: + field = get_field_from_struct (__processor_model_type, + M_AMDFAM10H); + which_struct = __cpu_model_var; + break; + case IX86_BUILTIN_CPU_IS_AMDFAM10H_BARCELONA: + field = get_field_from_struct (__processor_model_type, + M_AMDFAM10H_BARCELONA); + which_struct = __cpu_model_var; + break; + case IX86_BUILTIN_CPU_IS_AMDFAM10H_SHANGHAI: + field = get_field_from_struct (__processor_model_type, + M_AMDFAM10H_SHANGHAI); + which_struct = __cpu_model_var; + break; + case IX86_BUILTIN_CPU_IS_AMDFAM10H_ISTANBUL: + field = get_field_from_struct (__processor_model_type, + M_AMDFAM10H_ISTANBUL); + which_struct = __cpu_model_var; + break; + case IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER1: + field = get_field_from_struct (__processor_model_type, + M_AMDFAM15H_BDVER1); + which_struct = __cpu_model_var; + break; + case IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER2: + field = get_field_from_struct (__processor_model_type, + M_AMDFAM15H_BDVER2); + which_struct = __cpu_model_var; + break; + default: + return NULL_TREE; + } + + return build3 (COMPONENT_REF, TREE_TYPE (field), which_struct, field, NULL_TREE); +} + +static tree +ix86_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, + tree *args ATTRIBUTE_UNUSED, bool ignore ATTRIBUTE_UNUSED) +{ + const char* decl_name = IDENTIFIER_POINTER (DECL_NAME (fndecl)); + if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD + && strstr(decl_name, "__builtin_cpu") != NULL) + { + enum ix86_builtins code = (enum ix86_builtins) + DECL_FUNCTION_CODE (fndecl); + return fold_builtin_cpu (code); + } + return NULL_TREE; +} + +/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL + to return integer VERSION_NUM if the outcome of the function PREDICATE_DECL + is true (or false if INVERT_CHECK is true). This function will be called + during version dispatch to ecide which function version to execute. */ + +static basic_block +add_condition_to_bb (tree function_decl, int version_num, + basic_block new_bb, tree predicate_decl, + bool invert_check) +{ + gimple return_stmt; + gimple call_cond_stmt; + gimple if_else_stmt; + + basic_block bb1, bb2, bb3; + edge e12, e23; + + tree cond_var; + gimple_seq gseq; + + tree old_current_function_decl; + + old_current_function_decl = current_function_decl; + push_cfun (DECL_STRUCT_FUNCTION (function_decl)); + current_function_decl = function_decl; + + gcc_assert (new_bb != NULL); + gseq = bb_seq (new_bb); + + if (predicate_decl == NULL_TREE) + { + return_stmt = gimple_build_return (build_int_cst (NULL, version_num)); + gimple_seq_add_stmt (&gseq, return_stmt); + set_bb_seq (new_bb, gseq); + gimple_set_bb (return_stmt, new_bb); + pop_cfun (); + current_function_decl = old_current_function_decl; + return new_bb; + } + + cond_var = create_tmp_var (integer_type_node, NULL); + call_cond_stmt = gimple_build_call (predicate_decl, 0); + gimple_call_set_lhs (call_cond_stmt, cond_var); + add_referenced_var (cond_var); + mark_symbols_for_renaming (call_cond_stmt); + + gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl)); + gimple_set_bb (call_cond_stmt, new_bb); + gimple_seq_add_stmt (&gseq, call_cond_stmt); + + if (!invert_check) + if_else_stmt = gimple_build_cond (GT_EXPR, cond_var, + integer_zero_node, + NULL_TREE, NULL_TREE); + else + if_else_stmt = gimple_build_cond (LE_EXPR, cond_var, + integer_zero_node, + NULL_TREE, NULL_TREE); + + mark_symbols_for_renaming (if_else_stmt); + gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl)); + gimple_set_bb (if_else_stmt, new_bb); + gimple_seq_add_stmt (&gseq, if_else_stmt); + + return_stmt = gimple_build_return (build_int_cst (NULL, version_num)); + gimple_seq_add_stmt (&gseq, return_stmt); + + + set_bb_seq (new_bb, gseq); + + bb1 = new_bb; + e12 = split_block (bb1, if_else_stmt); + bb2 = e12->dest; + e12->flags &= ~EDGE_FALLTHRU; + e12->flags |= EDGE_TRUE_VALUE; + + e23 = split_block (bb2, return_stmt); + gimple_set_bb (return_stmt, bb2); + bb3 = e23->dest; + make_edge (bb1, bb3, EDGE_FALSE_VALUE); + + remove_edge (e23); + make_edge (bb2, EXIT_BLOCK_PTR, 0); + + free_dominance_info (CDI_DOMINATORS); + free_dominance_info (CDI_POST_DOMINATORS); + calculate_dominance_info (CDI_DOMINATORS); + calculate_dominance_info (CDI_POST_DOMINATORS); + rebuild_cgraph_edges (); + update_ssa (TODO_update_ssa); + if (dump_file) + dump_function_to_file (current_function_decl, dump_file, TDF_BLOCKS); + + pop_cfun (); + current_function_decl = old_current_function_decl; + + return bb3; +} + +/* This makes an empty function with one empty basic block *CREATED_BB + apart from the ENTRY and EXIT blocks. */ + +static tree +make_empty_function (basic_block *created_bb) +{ + tree decl, type, t; + basic_block new_bb; + tree old_current_function_decl; + tree decl_name; + char name[1000]; + static int num = 0; + + /* The condition function should return an integer. */ + type = build_function_type_list (integer_type_node, NULL_TREE); + + sprintf (name, "cond_%d", num); + num++; + decl = build_fn_decl (name, type); + + decl_name = get_identifier (name); + SET_DECL_ASSEMBLER_NAME (decl, decl_name); + DECL_NAME (decl) = decl_name; + gcc_assert (cgraph_node (decl) != NULL); + + TREE_USED (decl) = 1; + DECL_ARTIFICIAL (decl) = 1; + DECL_IGNORED_P (decl) = 0; + TREE_PUBLIC (decl) = 0; + DECL_UNINLINABLE (decl) = 1; + DECL_EXTERNAL (decl) = 0; + DECL_CONTEXT (decl) = NULL_TREE; + DECL_INITIAL (decl) = make_node (BLOCK); + DECL_STATIC_CONSTRUCTOR (decl) = 0; + TREE_READONLY (decl) = 0; + DECL_PURE_P (decl) = 0; + + /* Build result decl and add to function_decl. */ + t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node); + DECL_ARTIFICIAL (t) = 1; + DECL_IGNORED_P (t) = 1; + DECL_RESULT (decl) = t; + + gimplify_function_tree (decl); + + old_current_function_decl = current_function_decl; + push_cfun (DECL_STRUCT_FUNCTION (decl)); + current_function_decl = decl; + init_empty_tree_cfg_for_function (DECL_STRUCT_FUNCTION (decl)); + + cfun->curr_properties |= + (PROP_gimple_lcf | PROP_gimple_leh | PROP_cfg | PROP_referenced_vars | + PROP_ssa); + + new_bb = create_empty_bb (ENTRY_BLOCK_PTR); + make_edge (ENTRY_BLOCK_PTR, new_bb, EDGE_FALLTHRU); + make_edge (new_bb, EXIT_BLOCK_PTR, 0); + + /* This call is very important if this pass runs when the IR is in + SSA form. It breaks things in strange ways otherwise. */ + init_tree_ssa (DECL_STRUCT_FUNCTION (decl)); + init_ssa_operands (); + + cgraph_add_new_function (decl, true); + cgraph_call_function_insertion_hooks (cgraph_node (decl)); + cgraph_mark_needed_node (cgraph_node (decl)); + + if (dump_file) + dump_function_to_file (decl, dump_file, TDF_BLOCKS); + + pop_cfun (); + current_function_decl = old_current_function_decl; + *created_bb = new_bb; + return decl; +} + +/* This function conservatively checks if loop LOOP is tree vectorizable. + The code is adapted from tree-vectorize.cc and tree-vect-stmts.cc */ + +static bool +is_loop_form_vectorizable (struct loop *loop) +{ + /* Inner most loops should have 2 basic blocks. */ + if (!loop->inner) + { + /* This is inner most. */ + if (loop->num_nodes != 2) + return false; + /* Empty loop. */ + if (empty_block_p (loop->header)) + return false; + } + else + { + /* Bail if there are multiple nested loops. */ + if ((loop->inner)->inner || (loop->inner)->next) + return false; + /* Recursive call for the inner loop. */ + if (!is_loop_form_vectorizable (loop->inner)) + return false; + if (loop->num_nodes != 5) + return false; + /* The tree has 0 iterations. */ + if (TREE_INT_CST_LOW (number_of_latch_executions (loop)) == 0) + return false; + } + + return true; +} + +/* This function checks if there is atleast one vectorizable + load/store in loop LOOP. Code adapted from tree-vect-stmts.cc. */ + +static bool +is_loop_stmts_vectorizable (struct loop *loop) +{ + basic_block *body; + unsigned int i; + bool vect_load_store = false; + + body = get_loop_body (loop); + + for (i = 0; i < loop->num_nodes; i++) + { + gimple_stmt_iterator gsi; + for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple stmt = gsi_stmt (gsi); + enum gimple_code code = gimple_code (stmt); + + if (gimple_has_volatile_ops (stmt)) + return false; + + /* Does it have a vectorizable store or load in a hot bb? */ + if (code == GIMPLE_ASSIGN) + { + enum tree_code lhs_code = TREE_CODE (gimple_assign_lhs (stmt)); + enum tree_code rhs_code = gimple_assign_rhs_code (stmt); + + /* Only look at hot vectorizable loads/stores. */ + if (profile_status == PROFILE_READ + && !maybe_hot_bb_p (body[i])) + continue; + + if (lhs_code == ARRAY_REF + || lhs_code == INDIRECT_REF + || lhs_code == COMPONENT_REF + || lhs_code == IMAGPART_EXPR + || lhs_code == REALPART_EXPR + || lhs_code == MEM_REF) + vect_load_store = true; + else if (rhs_code == ARRAY_REF + || rhs_code == INDIRECT_REF + || rhs_code == COMPONENT_REF + || rhs_code == IMAGPART_EXPR + || rhs_code == REALPART_EXPR + || rhs_code == MEM_REF) + vect_load_store = true; + } + } + } + + return vect_load_store; +} + +/* This function checks if there are any vectorizable loops present + in CURRENT_FUNCTION_DECL. This function is called before the + loop optimization passes and is therefore very conservative in + checking for vectorizable loops. Also, all the checks used in the + vectorizer pass cannot used here since many loop optimizations + have not occurred which could change the loop structure and the + stmts. + + The conditions for a loop being vectorizable are adapted from + tree-vectorizer.c, tree-vect-stmts.c. */ + +static bool +any_loops_vectorizable_with_load_store (void) +{ + unsigned int vect_loops_num; + loop_iterator li; + struct loop *loop; + bool vectorizable_loop_found = false; + + loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); + + vect_loops_num = number_of_loops (); + + /* Bail out if there are no loops. */ + if (vect_loops_num <= 1) + { + loop_optimizer_finalize (); + return false; + } + + scev_initialize (); + + /* This is iterating over all loops. */ + FOR_EACH_LOOP (li, loop, 0) + if (optimize_loop_nest_for_speed_p (loop)) + { + if (!is_loop_form_vectorizable (loop)) + continue; + if (!is_loop_stmts_vectorizable (loop)) + continue; + vectorizable_loop_found = true; + break; + } + + + loop_optimizer_finalize (); + scev_finalize (); + + return vectorizable_loop_found; +} + +/* This makes the function that chooses the version to execute based + on the condition. This condition function will decide which version + of the function to execute. It should look like this: + + int cond_i () + { + __builtin_cpu_init (); // Get the cpu type. + a = __builtin_cpu_is_<type1> (); + if (a) + return 1; // first version created. + a = __builtin_cpu_is_<type2> (); + if (a) + return 2; // second version created. + ... + return 0; // the default version. + } + + NEW_BB is the new last basic block of this function and to which more + conditions can be added. It is updated by this function. */ + +static tree +make_condition_function (basic_block *new_bb) +{ + gimple ifunc_cpu_init_stmt; + gimple_seq gseq; + tree cond_func_decl; + tree old_current_function_decl; + + + cond_func_decl = make_empty_function (new_bb); + + old_current_function_decl = current_function_decl; + push_cfun (DECL_STRUCT_FUNCTION (cond_func_decl)); + current_function_decl = cond_func_decl; + + gseq = bb_seq (*new_bb); + + /* Since this is possibly dispatched with IFUNC, call builtin_cpu_init + explicitly, as the constructor will only fire after IFUNC + initializers. */ + ifunc_cpu_init_stmt = gimple_build_call_vec ( + ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], NULL); + gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt); + gimple_set_bb (ifunc_cpu_init_stmt, *new_bb); + set_bb_seq (*new_bb, gseq); + + pop_cfun (); + current_function_decl = old_current_function_decl; + return cond_func_decl; +} + +/* Create a new target optimization node with tune set to ARCH_TUNE. */ +static tree +create_mtune_target_opt_node (const char *arch_tune) +{ + struct cl_target_option target_options; + const char *old_tune_string; + tree optimization_node; + + /* Build an optimization node that is the same as the current one except with + "tune=arch_tune". */ + cl_target_option_save (&target_options, &global_options); + old_tune_string = ix86_tune_string; + + ix86_tune_string = arch_tune; + ix86_option_override_internal (false); + + optimization_node = build_target_option_node (); + + ix86_tune_string = old_tune_string; + cl_target_option_restore (&global_options, &target_options); + + return optimization_node; +} + +/* Should a version of this function be specially optimized for core2? + + This function should have checks to see if there are any opportunities for + core2 specific optimizations, otherwise do not create a clone. The + following opportunities are checked. + + * Check if this function has vectorizable loads/stores as it is known that + unaligned 128-bit movs to/from memory (movdqu) are very expensive on + core2 whereas the later generations like corei7 have no additional + overhead. + + This versioning is triggered only when -ftree-vectorize is turned on + and when multi-versioning for core2 is requested using -mvarch=core2. + + Return false if no versioning is required. Return true if a version must + be created. Generate the *OPTIMIZATION_NODE that must be used to optimize + the newly created version, that is tag "tune=core2" on the new version. */ + +static bool +mversionable_for_core2_p (tree *optimization_node, + tree *cond_func_decl, basic_block *new_bb) +{ + tree predicate_decl; + bool is_mversion_target_core2 = false; + bool create_version = false; + + if (ix86_varch_specified + && ix86_varch[PROCESSOR_CORE2_64]) + is_mversion_target_core2 = true; + + /* Check for criteria to create a new version for core2. */ + + /* If -ftree-vectorize is not used of MV is not requested, bail. */ + if (flag_tree_vectorize && is_mversion_target_core2) + { + /* Check if there is atleast one loop that has a vectorizable load/store. + These are the ones that can generate the unaligned mov which is known + to be very slow on core2. */ + if (any_loops_vectorizable_with_load_store ()) + create_version = true; + } + /* else if XXX: Add more criteria to version for core2. */ + + if (!create_version) + return false; + + /* If the condition function's body has not been created, create it now. */ + if (*cond_func_decl == NULL) + *cond_func_decl = make_condition_function (new_bb); + + *optimization_node = create_mtune_target_opt_node ("core2"); + + predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS_INTEL_CORE2]; + *new_bb = add_condition_to_bb (*cond_func_decl, 0, *new_bb, + predicate_decl, false); + return true; +} + +/* Should this function CURRENT_FUNCTION_DECL be multi-versioned, if so + the number of versions to be created (other than the original) is + returned. The outcome of COND_FUNC_DECL will decide the version to be + executed. The OPTIMIZATION_NODE_CHAIN has a unique node for each + version to be created. */ + +static int +ix86_mversion_function (tree fndecl ATTRIBUTE_UNUSED, + tree *optimization_node_chain, + tree *cond_func_decl) +{ + basic_block new_bb; + tree optimization_node; + int num_versions_created = 0; + + if (ix86_mv_arch_string == NULL) + return 0; + + if (mversionable_for_core2_p (&optimization_node, cond_func_decl, &new_bb)) + num_versions_created++; + + if (!num_versions_created) + return 0; + + *optimization_node_chain = tree_cons (optimization_node, + NULL_TREE, *optimization_node_chain); + + /* Return the default version as the last stmt in cond_func_decl. */ + if (*cond_func_decl != NULL) + new_bb = add_condition_to_bb (*cond_func_decl, num_versions_created, + new_bb, NULL_TREE, false); + + return num_versions_created; +} + +/* A builtin to init/return the cpu type or feature. Returns an + integer and the type is a const if IS_CONST is set. */ + +static void +make_platform_builtin (const char* name, int code, int is_const) +{ + tree decl; + tree type; + + type = ix86_get_builtin_func_type (INT_FTYPE_VOID); + decl = add_builtin_function (name, type, code, BUILT_IN_MD, + NULL, NULL_TREE); + gcc_assert (decl != NULL_TREE); + ix86_builtins[(int) code] = decl; + if (is_const) + TREE_READONLY (decl) = 1; +} + +/* Builtins to get CPU type and features supported. */ + +static void +ix86_init_platform_type_builtins (void) +{ + make_platform_builtin ("__builtin_cpu_init", + IX86_BUILTIN_CPU_INIT, 0); + make_platform_builtin ("__builtin_cpu_supports_cmov", + IX86_BUILTIN_CPU_SUPPORTS_CMOV, 1); + make_platform_builtin ("__builtin_cpu_supports_mmx", + IX86_BUILTIN_CPU_SUPPORTS_MMX, 1); + make_platform_builtin ("__builtin_cpu_supports_popcount", + IX86_BUILTIN_CPU_SUPPORTS_POPCOUNT, 1); + make_platform_builtin ("__builtin_cpu_supports_sse", + IX86_BUILTIN_CPU_SUPPORTS_SSE, 1); + make_platform_builtin ("__builtin_cpu_supports_sse2", + IX86_BUILTIN_CPU_SUPPORTS_SSE2, 1); + make_platform_builtin ("__builtin_cpu_supports_sse3", + IX86_BUILTIN_CPU_SUPPORTS_SSE3, 1); + make_platform_builtin ("__builtin_cpu_supports_ssse3", + IX86_BUILTIN_CPU_SUPPORTS_SSSE3, 1); + make_platform_builtin ("__builtin_cpu_supports_sse4_1", + IX86_BUILTIN_CPU_SUPPORTS_SSE4_1, 1); + make_platform_builtin ("__builtin_cpu_supports_sse4_2", + IX86_BUILTIN_CPU_SUPPORTS_SSE4_2, 1); + make_platform_builtin ("__builtin_cpu_is_amd", + IX86_BUILTIN_CPU_IS_AMD, 1); + make_platform_builtin ("__builtin_cpu_is_intel_atom", + IX86_BUILTIN_CPU_IS_INTEL_ATOM, 1); + make_platform_builtin ("__builtin_cpu_is_intel_core2", + IX86_BUILTIN_CPU_IS_INTEL_CORE2, 1); + make_platform_builtin ("__builtin_cpu_is_intel", + IX86_BUILTIN_CPU_IS_INTEL, 1); + make_platform_builtin ("__builtin_cpu_is_intel_corei7", + IX86_BUILTIN_CPU_IS_INTEL_COREI7, 1); + make_platform_builtin ("__builtin_cpu_is_intel_corei7_nehalem", + IX86_BUILTIN_CPU_IS_INTEL_COREI7_NEHALEM, 1); + make_platform_builtin ("__builtin_cpu_is_intel_corei7_westmere", + IX86_BUILTIN_CPU_IS_INTEL_COREI7_WESTMERE, 1); + make_platform_builtin ("__builtin_cpu_is_intel_corei7_sandybridge", + IX86_BUILTIN_CPU_IS_INTEL_COREI7_SANDYBRIDGE, 1); + make_platform_builtin ("__builtin_cpu_is_amdfam10", + IX86_BUILTIN_CPU_IS_AMDFAM10H, 1); + make_platform_builtin ("__builtin_cpu_is_amdfam10_barcelona", + IX86_BUILTIN_CPU_IS_AMDFAM10H_BARCELONA, 1); + make_platform_builtin ("__builtin_cpu_is_amdfam10_shanghai", + IX86_BUILTIN_CPU_IS_AMDFAM10H_SHANGHAI, 1); + make_platform_builtin ("__builtin_cpu_is_amdfam10_istanbul", + IX86_BUILTIN_CPU_IS_AMDFAM10H_ISTANBUL, 1); + make_platform_builtin ("__builtin_cpu_is_amdfam15_bdver1", + IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER1, 1); + make_platform_builtin ("__builtin_cpu_is_amdfam15_bdver2", + IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER2, 1); +} + +/* Detect if this unaligned vectorizable load/stores should be + considered slow. This is true for core2 where the movdqu insn + is slow, ~5x slower than the movdqa. */ + +static bool +ix86_slow_unaligned_vector_memop (void) +{ + /* This is known to be slow on core2. */ + if (ix86_tune == PROCESSOR_CORE2_64 + || ix86_tune == PROCESSOR_CORE2_32) + return true; + + return false; +} + /* Internal method for ix86_init_builtins. */ static void @@ -25892,6 +26839,9 @@ ix86_init_builtins (void) ix86_init_builtin_types (); + /* Builtins to get CPU type and features. */ + ix86_init_platform_type_builtins (); + /* TFmode support builtins. */ def_builtin_const (0, "__builtin_infq", FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ); @@ -27351,6 +28301,48 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, enum machine_mode mode0, mode1, mode2; unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + /* For CPU builtins that can be folded, fold first and expand the fold. */ + switch (fcode) + { + case IX86_BUILTIN_CPU_SUPPORTS_CMOV: + case IX86_BUILTIN_CPU_SUPPORTS_MMX: + case IX86_BUILTIN_CPU_SUPPORTS_POPCOUNT: + case IX86_BUILTIN_CPU_SUPPORTS_SSE: + case IX86_BUILTIN_CPU_SUPPORTS_SSE2: + case IX86_BUILTIN_CPU_SUPPORTS_SSE3: + case IX86_BUILTIN_CPU_SUPPORTS_SSSE3: + case IX86_BUILTIN_CPU_SUPPORTS_SSE4_1: + case IX86_BUILTIN_CPU_SUPPORTS_SSE4_2: + case IX86_BUILTIN_CPU_IS_AMD: + case IX86_BUILTIN_CPU_IS_INTEL: + case IX86_BUILTIN_CPU_IS_INTEL_ATOM: + case IX86_BUILTIN_CPU_IS_INTEL_CORE2: + case IX86_BUILTIN_CPU_IS_INTEL_COREI7: + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_NEHALEM: + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_WESTMERE: + case IX86_BUILTIN_CPU_IS_INTEL_COREI7_SANDYBRIDGE: + case IX86_BUILTIN_CPU_IS_AMDFAM10H: + case IX86_BUILTIN_CPU_IS_AMDFAM10H_BARCELONA: + case IX86_BUILTIN_CPU_IS_AMDFAM10H_SHANGHAI: + case IX86_BUILTIN_CPU_IS_AMDFAM10H_ISTANBUL: + case IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER1: + case IX86_BUILTIN_CPU_IS_AMDFAM15H_BDVER2: + { + tree fold_expr = fold_builtin_cpu ((enum ix86_builtins) fcode); + gcc_assert (fold_expr != NULL_TREE); + return expand_expr (fold_expr, target, mode, EXPAND_NORMAL); + } + case IX86_BUILTIN_CPU_INIT: + { + /* Make it call __cpu_indicator_init in libgcc. */ + tree call_expr, fndecl, type; + type = build_function_type_list (integer_type_node, NULL_TREE); + fndecl = build_fn_decl ("__cpu_indicator_init", type); + call_expr = build_call_expr (fndecl, 0); + return expand_expr (call_expr, target, mode, EXPAND_NORMAL); + } + } + /* Determine whether the builtin function is available under the current ISA. Originally the builtin was not created if it wasn't applicable to the current ISA based on the command line switches. With function specific @@ -34933,6 +35925,82 @@ ix86_autovectorize_vector_sizes (void) return (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0; } +/* If LOOP contains a possible LCP stalling instruction on corei7, + calculate new number of times to unroll instead of NUNROLL so that + the unrolled loop will still likely fit into the loop stream detector. */ +static unsigned +ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop) +{ + basic_block *body, bb; + unsigned i; + rtx insn; + bool found = false; + unsigned newunroll; + + if (ix86_tune != PROCESSOR_COREI7_64 && + ix86_tune != PROCESSOR_COREI7_32) + return nunroll; + + /* Look for instructions that store a constant into HImode (16-bit) + memory. These require a length-changing prefix and on corei7 are + prone to LCP stalls. These stalls can be avoided if the loop + is streamed from the loop stream detector. */ + body = get_loop_body (loop); + for (i = 0; i < loop->num_nodes; i++) + { + bb = body[i]; + + FOR_BB_INSNS (bb, insn) + { + rtx set_expr, dest; + set_expr = single_set (insn); + if (!set_expr) + continue; + + dest = SET_DEST (set_expr); + + /* Don't reduce unroll factor in loops with floating point + computation, which tend to benefit more heavily from + larger unroll factors and are less likely to bottleneck + at the decoder. */ + if (FLOAT_MODE_P (GET_MODE (dest))) + { + free (body); + return nunroll; + } + + if (!found + && GET_MODE (dest) == HImode + && CONST_INT_P (SET_SRC (set_expr)) + && MEM_P (dest)) + { + found = true; + /* Keep walking loop body to look for FP computations above. */ + } + } + } + free (body); + + if (!found) + return nunroll; + + if (dump_file) + { + fprintf (dump_file, + ";; Loop contains HImode store of const (possible LCP stalls),\n"); + fprintf (dump_file, + " reduce unroll factor to fit into Loop Stream Detector\n"); + } + + /* On corei7 the loop stream detector can hold 28 uops, so + don't allow unrolling to exceed that many instructions. */ + newunroll = 28 / loop->av_ninsns; + if (newunroll < nunroll) + return newunroll; + + return nunroll; +} + /* Initialize the GCC target structure. */ #undef TARGET_RETURN_IN_MEMORY #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory @@ -35097,6 +36165,15 @@ ix86_autovectorize_vector_sizes (void) #undef TARGET_BUILD_BUILTIN_VA_LIST #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list +#undef TARGET_FOLD_BUILTIN +#define TARGET_FOLD_BUILTIN ix86_fold_builtin + +#undef TARGET_MVERSION_FUNCTION +#define TARGET_MVERSION_FUNCTION ix86_mversion_function + +#undef TARGET_SLOW_UNALIGNED_VECTOR_MEMOP +#define TARGET_SLOW_UNALIGNED_VECTOR_MEMOP ix86_slow_unaligned_vector_memop + #undef TARGET_ENUM_VA_LIST_P #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list @@ -35253,6 +36330,9 @@ ix86_autovectorize_vector_sizes (void) #define TARGET_INIT_LIBFUNCS darwin_rename_builtins #endif +#undef TARGET_LOOP_UNROLL_ADJUST +#define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-i386.h" diff --git a/gcc-4.6/gcc/config/i386/i386.md b/gcc-4.6/gcc/config/i386/i386.md index 3bfa0b3..b1d7e5e 100644 --- a/gcc-4.6/gcc/config/i386/i386.md +++ b/gcc-4.6/gcc/config/i386/i386.md @@ -1960,7 +1960,7 @@ (define_insn "*movdi_internal_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" - "=r,r ,r,m ,!m,*y,*y,?r ,m ,?*Ym,?*y,*x,*x,?r ,m,?*Yi,*x,?*x,?*Ym") + "=r,r ,r,m ,!o,*y,*y,?r ,m ,?*Ym,?*y,*x,*x,?r ,m,?*Yi,*x,?*x,?*Ym") (match_operand:DI 1 "general_operand" "Z ,rem,i,re,n ,C ,*y,*Ym,*y,r ,m ,C ,*x,*Yi,*x,r ,m ,*Ym,*x"))] "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))" @@ -2905,7 +2905,7 @@ (define_insn "*movdf_internal_rex64" [(set (match_operand:DF 0 "nonimmediate_operand" - "=f,m,f,r ,m,!r,!m,Y2*x,Y2*x,Y2*x,m ,Yi,r ") + "=f,m,f,r ,m,!r,!o,Y2*x,Y2*x,Y2*x,m ,Yi,r ") (match_operand:DF 1 "general_operand" "fm,f,G,rm,r,F ,F ,C ,Y2*x,m ,Y2*x,r ,Yi"))] "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1])) @@ -5103,7 +5103,7 @@ && reload_completed && (SSE_REG_P (operands[0]) || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (operands[0])))" + && SSE_REG_P (SUBREG_REG (operands[0]))))" [(set (match_dup 0) (float:MODEF (match_dup 1)))]) (define_split @@ -5116,7 +5116,7 @@ && reload_completed && (SSE_REG_P (operands[0]) || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (operands[0])))" + && SSE_REG_P (SUBREG_REG (operands[0]))))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (float:MODEF (match_dup 2)))]) @@ -5207,7 +5207,7 @@ && reload_completed && (SSE_REG_P (operands[0]) || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (operands[0])))" + && SSE_REG_P (SUBREG_REG (operands[0]))))" [(const_int 0)] { rtx op1 = operands[1]; @@ -5248,7 +5248,7 @@ && reload_completed && (SSE_REG_P (operands[0]) || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (operands[0])))" + && SSE_REG_P (SUBREG_REG (operands[0]))))" [(const_int 0)] { operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0], @@ -5270,7 +5270,7 @@ && reload_completed && (SSE_REG_P (operands[0]) || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (operands[0])))" + && SSE_REG_P (SUBREG_REG (operands[0]))))" [(const_int 0)] { rtx op1 = operands[1]; @@ -5280,11 +5280,20 @@ if (GET_CODE (op1) == SUBREG) op1 = SUBREG_REG (op1); - if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES) + if (GENERAL_REG_P (op1)) { operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0); - emit_insn (gen_sse2_loadld (operands[4], - CONST0_RTX (V4SImode), operands[1])); + if (TARGET_INTER_UNIT_MOVES) + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[1])); + else + { + operands[5] = ix86_force_to_memory (GET_MODE (operands[1]), + operands[1]); + emit_insn (gen_sse2_loadld (operands[4], + CONST0_RTX (V4SImode), operands[5])); + ix86_free_from_memory (GET_MODE (operands[1])); + } } /* We can ignore possible trapping value in the high part of SSE register for non-trapping math. */ @@ -5305,7 +5314,7 @@ && reload_completed && (SSE_REG_P (operands[0]) || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (operands[0])))" + && SSE_REG_P (SUBREG_REG (operands[0]))))" [(const_int 0)] { operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0], @@ -5366,7 +5375,7 @@ && reload_completed && (SSE_REG_P (operands[0]) || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (operands[0])))" + && SSE_REG_P (SUBREG_REG (operands[0]))))" [(set (match_dup 0) (float:MODEF (match_dup 1)))]) (define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_sse_nointerunit" @@ -5401,7 +5410,7 @@ && reload_completed && (SSE_REG_P (operands[0]) || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (operands[0])))" + && SSE_REG_P (SUBREG_REG (operands[0]))))" [(set (match_dup 2) (match_dup 1)) (set (match_dup 0) (float:MODEF (match_dup 2)))]) @@ -5414,7 +5423,7 @@ && reload_completed && (SSE_REG_P (operands[0]) || (GET_CODE (operands[0]) == SUBREG - && SSE_REG_P (operands[0])))" + && SSE_REG_P (SUBREG_REG (operands[0]))))" [(set (match_dup 0) (float:MODEF (match_dup 1)))]) (define_insn "*float<SSEMODEI24:mode><X87MODEF:mode>2_i387_with_temp" @@ -11341,7 +11350,7 @@ (define_insn_and_split "*call_pop_1_vzeroupper" [(parallel - [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm")) + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lzm")) (match_operand:SI 1 "" "")) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) @@ -11356,7 +11365,7 @@ [(set_attr "type" "call")]) (define_insn "*call_pop_1" - [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm")) + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lzm")) (match_operand:SI 1 "" "")) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) @@ -11371,7 +11380,7 @@ (define_insn_and_split "*sibcall_pop_1_vzeroupper" [(parallel - [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U")) + [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "z,U")) (match_operand:SI 1 "" "")) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) @@ -11386,7 +11395,7 @@ [(set_attr "type" "call")]) (define_insn "*sibcall_pop_1" - [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U")) + [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "z,U")) (match_operand:SI 1 "" "")) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) @@ -11437,7 +11446,7 @@ [(set_attr "type" "call")]) (define_insn_and_split "*call_1_vzeroupper" - [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm")) + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lzm")) (match_operand 1 "" "")) (unspec [(match_operand 2 "const_int_operand" "")] UNSPEC_CALL_NEEDS_VZEROUPPER)] @@ -11449,14 +11458,14 @@ [(set_attr "type" "call")]) (define_insn "*call_1" - [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lsm")) + [(call (mem:QI (match_operand:SI 0 "call_insn_operand" "lzm")) (match_operand 1 "" ""))] "!TARGET_64BIT && !SIBLING_CALL_P (insn)" { return ix86_output_call_insn (insn, operands[0], 0); } [(set_attr "type" "call")]) (define_insn_and_split "*sibcall_1_vzeroupper" - [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U")) + [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "z,U")) (match_operand 1 "" "")) (unspec [(match_operand 2 "const_int_operand" "")] UNSPEC_CALL_NEEDS_VZEROUPPER)] @@ -11468,14 +11477,14 @@ [(set_attr "type" "call")]) (define_insn "*sibcall_1" - [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "s,U")) + [(call (mem:QI (match_operand:SI 0 "sibcall_insn_operand" "z,U")) (match_operand 1 "" ""))] "!TARGET_64BIT && SIBLING_CALL_P (insn)" { return ix86_output_call_insn (insn, operands[0], 0); } [(set_attr "type" "call")]) (define_insn_and_split "*call_1_rex64_vzeroupper" - [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm")) + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rzm")) (match_operand 1 "" "")) (unspec [(match_operand 2 "const_int_operand" "")] UNSPEC_CALL_NEEDS_VZEROUPPER)] @@ -11488,7 +11497,7 @@ [(set_attr "type" "call")]) (define_insn "*call_1_rex64" - [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm")) + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rzm")) (match_operand 1 "" ""))] "TARGET_64BIT && !SIBLING_CALL_P (insn) && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC" @@ -11497,7 +11506,7 @@ (define_insn_and_split "*call_1_rex64_ms_sysv_vzeroupper" [(parallel - [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm")) + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rzm")) (match_operand 1 "" "")) (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) (clobber (reg:TI XMM6_REG)) @@ -11522,7 +11531,7 @@ [(set_attr "type" "call")]) (define_insn "*call_1_rex64_ms_sysv" - [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm")) + [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rzm")) (match_operand 1 "" "")) (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) (clobber (reg:TI XMM6_REG)) @@ -11561,7 +11570,7 @@ [(set_attr "type" "call")]) (define_insn_and_split "*sibcall_1_rex64_vzeroupper" - [(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "s,U")) + [(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "z,U")) (match_operand 1 "" "")) (unspec [(match_operand 2 "const_int_operand" "")] UNSPEC_CALL_NEEDS_VZEROUPPER)] @@ -11573,7 +11582,7 @@ [(set_attr "type" "call")]) (define_insn "*sibcall_1_rex64" - [(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "s,U")) + [(call (mem:QI (match_operand:DI 0 "sibcall_insn_operand" "z,U")) (match_operand 1 "" ""))] "TARGET_64BIT && SIBLING_CALL_P (insn)" { return ix86_output_call_insn (insn, operands[0], 0); } @@ -14616,7 +14625,7 @@ emit_insn (gen_sse4_1_round<mode>2 (operands[0], operands[1], GEN_INT (0x04))); else - ix86_expand_rint (operand0, operand1); + ix86_expand_rint (operands[0], operands[1]); } else { @@ -14640,9 +14649,9 @@ if (optimize_insn_for_size_p ()) FAIL; if (TARGET_64BIT || (<MODE>mode != DFmode)) - ix86_expand_round (operand0, operand1); + ix86_expand_round (operands[0], operands[1]); else - ix86_expand_rounddf_32 (operand0, operand1); + ix86_expand_rounddf_32 (operands[0], operands[1]); DONE; }) @@ -14787,7 +14796,7 @@ { if (optimize_insn_for_size_p ()) FAIL; - ix86_expand_lround (operand0, operand1); + ix86_expand_lround (operands[0], operands[1]); DONE; }) @@ -14862,9 +14871,9 @@ emit_insn (gen_sse4_1_round<mode>2 (operands[0], operands[1], GEN_INT (0x01))); else if (TARGET_64BIT || (<MODE>mode != DFmode)) - ix86_expand_floorceil (operand0, operand1, true); + ix86_expand_floorceil (operands[0], operands[1], true); else - ix86_expand_floorceildf_32 (operand0, operand1, true); + ix86_expand_floorceildf_32 (operands[0], operands[1], true); } else { @@ -15044,7 +15053,7 @@ { if (TARGET_64BIT && optimize_insn_for_size_p ()) FAIL; - ix86_expand_lfloorceil (operand0, operand1, true); + ix86_expand_lfloorceil (operands[0], operands[1], true); DONE; }) @@ -15119,9 +15128,9 @@ else if (optimize_insn_for_size_p ()) FAIL; else if (TARGET_64BIT || (<MODE>mode != DFmode)) - ix86_expand_floorceil (operand0, operand1, false); + ix86_expand_floorceil (operands[0], operands[1], false); else - ix86_expand_floorceildf_32 (operand0, operand1, false); + ix86_expand_floorceildf_32 (operands[0], operands[1], false); } else { @@ -15299,7 +15308,7 @@ "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH && !flag_trapping_math" { - ix86_expand_lfloorceil (operand0, operand1, false); + ix86_expand_lfloorceil (operands[0], operands[1], false); DONE; }) @@ -15374,9 +15383,9 @@ else if (optimize_insn_for_size_p ()) FAIL; else if (TARGET_64BIT || (<MODE>mode != DFmode)) - ix86_expand_trunc (operand0, operand1); + ix86_expand_trunc (operands[0], operands[1]); else - ix86_expand_truncdf_32 (operand0, operand1); + ix86_expand_truncdf_32 (operands[0], operands[1]); } else { @@ -15699,7 +15708,8 @@ (set (match_operand:DI 1 "register_operand" "=S") (plus:DI (match_dup 3) (const_int 8)))] - "TARGET_64BIT" + "TARGET_64BIT + && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])" "movsq" [(set_attr "type" "str") (set_attr "memory" "both") @@ -15714,7 +15724,7 @@ (set (match_operand:P 1 "register_operand" "=S") (plus:P (match_dup 3) (const_int 4)))] - "" + "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])" "movs{l|d}" [(set_attr "type" "str") (set_attr "memory" "both") @@ -15729,7 +15739,7 @@ (set (match_operand:P 1 "register_operand" "=S") (plus:P (match_dup 3) (const_int 2)))] - "" + "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])" "movsw" [(set_attr "type" "str") (set_attr "memory" "both") @@ -15744,7 +15754,7 @@ (set (match_operand:P 1 "register_operand" "=S") (plus:P (match_dup 3) (const_int 1)))] - "" + "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])" "movsb" [(set_attr "type" "str") (set_attr "memory" "both") @@ -15779,7 +15789,8 @@ (set (mem:BLK (match_dup 3)) (mem:BLK (match_dup 4))) (use (match_dup 5))] - "TARGET_64BIT" + "TARGET_64BIT + && !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])" "rep{%;} movsq" [(set_attr "type" "str") (set_attr "prefix_rep" "1") @@ -15798,7 +15809,7 @@ (set (mem:BLK (match_dup 3)) (mem:BLK (match_dup 4))) (use (match_dup 5))] - "" + "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])" "rep{%;} movs{l|d}" [(set_attr "type" "str") (set_attr "prefix_rep" "1") @@ -15815,7 +15826,7 @@ (set (mem:BLK (match_dup 3)) (mem:BLK (match_dup 4))) (use (match_dup 5))] - "" + "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])" "rep{%;} movsb" [(set_attr "type" "str") (set_attr "prefix_rep" "1") @@ -15858,7 +15869,9 @@ operands[3] = gen_rtx_PLUS (Pmode, operands[0], GEN_INT (GET_MODE_SIZE (GET_MODE (operands[2])))); - if (TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ()) + /* Can't use this if the user has appropriated eax or edi. */ + if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ()) + && !(fixed_regs[AX_REG] || fixed_regs[DI_REG])) { emit_insn (gen_strset_singleop (operands[0], operands[1], operands[2], operands[3])); @@ -15880,7 +15893,8 @@ (set (match_operand:DI 0 "register_operand" "=D") (plus:DI (match_dup 1) (const_int 8)))] - "TARGET_64BIT" + "TARGET_64BIT + && !(fixed_regs[AX_REG] || fixed_regs[DI_REG])" "stosq" [(set_attr "type" "str") (set_attr "memory" "store") @@ -15892,7 +15906,7 @@ (set (match_operand:P 0 "register_operand" "=D") (plus:P (match_dup 1) (const_int 4)))] - "" + "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])" "stos{l|d}" [(set_attr "type" "str") (set_attr "memory" "store") @@ -15904,7 +15918,7 @@ (set (match_operand:P 0 "register_operand" "=D") (plus:P (match_dup 1) (const_int 2)))] - "" + "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])" "stosw" [(set_attr "type" "str") (set_attr "memory" "store") @@ -15916,7 +15930,7 @@ (set (match_operand:P 0 "register_operand" "=D") (plus:P (match_dup 1) (const_int 1)))] - "" + "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])" "stosb" [(set_attr "type" "str") (set_attr "memory" "store") @@ -15947,7 +15961,8 @@ (const_int 0)) (use (match_operand:DI 2 "register_operand" "a")) (use (match_dup 4))] - "TARGET_64BIT" + "TARGET_64BIT + && !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])" "rep{%;} stosq" [(set_attr "type" "str") (set_attr "prefix_rep" "1") @@ -15964,7 +15979,7 @@ (const_int 0)) (use (match_operand:SI 2 "register_operand" "a")) (use (match_dup 4))] - "" + "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])" "rep{%;} stos{l|d}" [(set_attr "type" "str") (set_attr "prefix_rep" "1") @@ -15980,7 +15995,7 @@ (const_int 0)) (use (match_operand:QI 2 "register_operand" "a")) (use (match_dup 4))] - "" + "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])" "rep{%;} stosb" [(set_attr "type" "str") (set_attr "prefix_rep" "1") @@ -16005,8 +16020,8 @@ if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS) FAIL; - /* Can't use this if the user has appropriated esi or edi. */ - if (fixed_regs[SI_REG] || fixed_regs[DI_REG]) + /* Can't use this if the user has appropriated ecx, esi or edi. */ + if (fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG]) FAIL; out = operands[0]; @@ -16101,7 +16116,7 @@ (clobber (match_operand:P 0 "register_operand" "=S")) (clobber (match_operand:P 1 "register_operand" "=D")) (clobber (match_operand:P 2 "register_operand" "=c"))] - "" + "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])" "repz{%;} cmpsb" [(set_attr "type" "str") (set_attr "mode" "QI") @@ -16141,7 +16156,7 @@ (clobber (match_operand:P 0 "register_operand" "=S")) (clobber (match_operand:P 1 "register_operand" "=D")) (clobber (match_operand:P 2 "register_operand" "=c"))] - "" + "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])" "repz{%;} cmpsb" [(set_attr "type" "str") (set_attr "mode" "QI") @@ -16153,11 +16168,11 @@ (set_attr "prefix_rep" "1")]) (define_expand "strlen<mode>" - [(set (match_operand:SWI48x 0 "register_operand" "") - (unspec:SWI48x [(match_operand:BLK 1 "general_operand" "") - (match_operand:QI 2 "immediate_operand" "") - (match_operand 3 "immediate_operand" "")] - UNSPEC_SCAS))] + [(set (match_operand:P 0 "register_operand" "") + (unspec:P [(match_operand:BLK 1 "general_operand" "") + (match_operand:QI 2 "immediate_operand" "") + (match_operand 3 "immediate_operand" "")] + UNSPEC_SCAS))] "" { if (ix86_expand_strlen (operands[0], operands[1], operands[2], operands[3])) @@ -16182,7 +16197,7 @@ (match_operand:P 4 "register_operand" "0")] UNSPEC_SCAS)) (clobber (match_operand:P 1 "register_operand" "=D")) (clobber (reg:CC FLAGS_REG))] - "" + "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])" "repnz{%;} scasb" [(set_attr "type" "str") (set_attr "mode" "QI") @@ -17567,7 +17582,7 @@ (define_insn_and_split "*call_value_pop_1_vzeroupper" [(parallel [(set (match_operand 0 "" "") - (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm")) + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lzm")) (match_operand:SI 2 "" ""))) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) @@ -17583,7 +17598,7 @@ (define_insn "*call_value_pop_1" [(set (match_operand 0 "" "") - (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm")) + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lzm")) (match_operand:SI 2 "" ""))) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) @@ -17595,7 +17610,7 @@ (define_insn_and_split "*sibcall_value_pop_1_vzeroupper" [(parallel [(set (match_operand 0 "" "") - (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U")) + (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "z,U")) (match_operand:SI 2 "" ""))) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) @@ -17611,7 +17626,7 @@ (define_insn "*sibcall_value_pop_1" [(set (match_operand 0 "" "") - (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U")) + (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "z,U")) (match_operand:SI 2 "" ""))) (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) @@ -17712,7 +17727,7 @@ (define_insn_and_split "*call_value_1_vzeroupper" [(set (match_operand 0 "" "") - (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm")) + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lzm")) (match_operand:SI 2 "" ""))) (unspec [(match_operand 3 "const_int_operand" "")] UNSPEC_CALL_NEEDS_VZEROUPPER)] @@ -17725,7 +17740,7 @@ (define_insn "*call_value_1" [(set (match_operand 0 "" "") - (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lsm")) + (call (mem:QI (match_operand:SI 1 "call_insn_operand" "lzm")) (match_operand:SI 2 "" "")))] "!TARGET_64BIT && !SIBLING_CALL_P (insn)" { return ix86_output_call_insn (insn, operands[1], 1); } @@ -17733,7 +17748,7 @@ (define_insn_and_split "*sibcall_value_1_vzeroupper" [(set (match_operand 0 "" "") - (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U")) + (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "z,U")) (match_operand:SI 2 "" ""))) (unspec [(match_operand 3 "const_int_operand" "")] UNSPEC_CALL_NEEDS_VZEROUPPER)] @@ -17746,7 +17761,7 @@ (define_insn "*sibcall_value_1" [(set (match_operand 0 "" "") - (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "s,U")) + (call (mem:QI (match_operand:SI 1 "sibcall_insn_operand" "z,U")) (match_operand:SI 2 "" "")))] "!TARGET_64BIT && SIBLING_CALL_P (insn)" { return ix86_output_call_insn (insn, operands[1], 1); } @@ -17754,7 +17769,7 @@ (define_insn_and_split "*call_value_1_rex64_vzeroupper" [(set (match_operand 0 "" "") - (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm")) + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rzm")) (match_operand:DI 2 "" ""))) (unspec [(match_operand 3 "const_int_operand" "")] UNSPEC_CALL_NEEDS_VZEROUPPER)] @@ -17768,7 +17783,7 @@ (define_insn "*call_value_1_rex64" [(set (match_operand 0 "" "") - (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm")) + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rzm")) (match_operand:DI 2 "" "")))] "TARGET_64BIT && !SIBLING_CALL_P (insn) && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC" @@ -17778,7 +17793,7 @@ (define_insn_and_split "*call_value_1_rex64_ms_sysv_vzeroupper" [(parallel [(set (match_operand 0 "" "") - (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm")) + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rzm")) (match_operand:DI 2 "" ""))) (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) (clobber (reg:TI XMM6_REG)) @@ -17804,7 +17819,7 @@ (define_insn "*call_value_1_rex64_ms_sysv" [(set (match_operand 0 "" "") - (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm")) + (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rzm")) (match_operand:DI 2 "" ""))) (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL) (clobber (reg:TI XMM6_REG)) @@ -17846,7 +17861,7 @@ (define_insn_and_split "*sibcall_value_1_rex64_vzeroupper" [(set (match_operand 0 "" "") - (call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "s,U")) + (call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "z,U")) (match_operand:DI 2 "" ""))) (unspec [(match_operand 3 "const_int_operand" "")] UNSPEC_CALL_NEEDS_VZEROUPPER)] @@ -17859,7 +17874,7 @@ (define_insn "*sibcall_value_1_rex64" [(set (match_operand 0 "" "") - (call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "s,U")) + (call (mem:QI (match_operand:DI 1 "sibcall_insn_operand" "z,U")) (match_operand:DI 2 "" "")))] "TARGET_64BIT && SIBLING_CALL_P (insn)" { return ix86_output_call_insn (insn, operands[1], 1); } @@ -18270,8 +18285,8 @@ (match_operand:SI 3 "const_int_operand" "i")] UNSPECV_LWPVAL_INTRINSIC)] "TARGET_LWP" - "/* Avoid unused variable warning. */ - (void) operand0;") + ;; Avoid unused variable warning. + "(void) operands[0];") (define_insn "*lwp_lwpval<mode>3_1" [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r") diff --git a/gcc-4.6/gcc/config/i386/i386.opt b/gcc-4.6/gcc/config/i386/i386.opt index fe5949f..dca06e7 100644 --- a/gcc-4.6/gcc/config/i386/i386.opt +++ b/gcc-4.6/gcc/config/i386/i386.opt @@ -101,6 +101,10 @@ march= Target RejectNegative Joined Var(ix86_arch_string) Generate code for given CPU +mvarch= +Target RejectNegative Joined Var(ix86_mv_arch_string) +Multiversion for the given CPU(s) + masm= Target RejectNegative Joined Var(ix86_asm_string) Use given assembler dialect diff --git a/gcc-4.6/gcc/config/i386/libgcc-glibc.ver b/gcc-4.6/gcc/config/i386/libgcc-glibc.ver index e79d326..78b286c 100644 --- a/gcc-4.6/gcc/config/i386/libgcc-glibc.ver +++ b/gcc-4.6/gcc/config/i386/libgcc-glibc.ver @@ -147,6 +147,12 @@ GCC_4.3.0 { __trunctfxf2 __unordtf2 } + +GCC_4.6.0 { + __cpu_indicator_init + __cpu_model + __cpu_features +} %else GCC_4.4.0 { __addtf3 @@ -183,4 +189,10 @@ GCC_4.4.0 { GCC_4.5.0 { __extendxftf2 } + +GCC_4.6.0 { + __cpu_indicator_init + __cpu_model + __cpu_features +} %endif diff --git a/gcc-4.6/gcc/config/i386/linux.h b/gcc-4.6/gcc/config/i386/linux.h index 019cea9..3a23598 100644 --- a/gcc-4.6/gcc/config/i386/linux.h +++ b/gcc-4.6/gcc/config/i386/linux.h @@ -123,7 +123,7 @@ along with GCC; see the file COPYING3. If not see /* Similar to standard Linux, but adding -ffast-math support. */ #undef ENDFILE_SPEC #define ENDFILE_SPEC \ - "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ + "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ %{mpc32:crtprec32.o%s} \ %{mpc64:crtprec64.o%s} \ %{mpc80:crtprec80.o%s} \ diff --git a/gcc-4.6/gcc/config/i386/linux64.h b/gcc-4.6/gcc/config/i386/linux64.h index 429c273..b3fe500 100644 --- a/gcc-4.6/gcc/config/i386/linux64.h +++ b/gcc-4.6/gcc/config/i386/linux64.h @@ -102,7 +102,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see /* Similar to standard Linux, but adding -ffast-math support. */ #undef ENDFILE_SPEC #define ENDFILE_SPEC \ - "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ + "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ %{mpc32:crtprec32.o%s} \ %{mpc64:crtprec64.o%s} \ %{mpc80:crtprec80.o%s} \ diff --git a/gcc-4.6/gcc/config/i386/mingw32.h b/gcc-4.6/gcc/config/i386/mingw32.h index 4f8a63a..27da92b 100644 --- a/gcc-4.6/gcc/config/i386/mingw32.h +++ b/gcc-4.6/gcc/config/i386/mingw32.h @@ -124,7 +124,7 @@ along with GCC; see the file COPYING3. If not see #undef ENDFILE_SPEC #define ENDFILE_SPEC \ - "%{ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ + "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \ crtend.o%s" /* Override startfile prefix defaults. */ @@ -239,3 +239,9 @@ __enable_execute_stack (void *addr) \ /* We should find a way to not have to update this manually. */ #define LIBGCJ_SONAME "libgcj" /*LIBGCC_EH_EXTN*/ "-12.dll" +/* For 32-bit Windows we need valid frame-pointer for function using + setjmp. */ +#undef SUBTARGET_FRAME_POINTER_REQUIRED +#define SUBTARGET_FRAME_POINTER_REQUIRED \ + (!TARGET_64BIT && cfun->calls_setjmp) + diff --git a/gcc-4.6/gcc/config/i386/predicates.md b/gcc-4.6/gcc/config/i386/predicates.md index 7cce9d4..fc5eb2d 100644 --- a/gcc-4.6/gcc/config/i386/predicates.md +++ b/gcc-4.6/gcc/config/i386/predicates.md @@ -1105,7 +1105,7 @@ ;; Return true if OP is a binary operator that can be promoted to wider mode. (define_predicate "promotable_binary_operator" - (ior (match_code "plus,and,ior,xor,ashift") + (ior (match_code "plus,minus,and,ior,xor,ashift") (and (match_code "mult") (match_test "TARGET_TUNE_PROMOTE_HIMODE_IMUL")))) diff --git a/gcc-4.6/gcc/config/i386/sse.md b/gcc-4.6/gcc/config/i386/sse.md index 216c0a1..b5b900a 100644 --- a/gcc-4.6/gcc/config/i386/sse.md +++ b/gcc-4.6/gcc/config/i386/sse.md @@ -4189,7 +4189,7 @@ (vec_select:<avxhalfvecmode> (match_operand:AVX256MODE4P 1 "nonimmediate_operand" "xm,x") (parallel [(const_int 0) (const_int 1)])))] - "TARGET_AVX" + "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "#" "&& reload_completed" [(const_int 0)] @@ -4223,7 +4223,7 @@ (match_operand:AVX256MODE8P 1 "nonimmediate_operand" "xm,x") (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)])))] - "TARGET_AVX" + "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "#" "&& reload_completed" [(const_int 0)] @@ -4260,7 +4260,7 @@ (const_int 2) (const_int 3) (const_int 4) (const_int 5) (const_int 6) (const_int 7)])))] - "TARGET_AVX" + "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "#" "&& reload_completed" [(const_int 0)] @@ -4303,7 +4303,7 @@ (const_int 10) (const_int 11) (const_int 12) (const_int 13) (const_int 14) (const_int 15)])))] - "TARGET_AVX" + "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "#" "&& reload_completed" [(const_int 0)] @@ -4357,9 +4357,9 @@ (vec_select:SF (match_operand:V4SF 1 "memory_operand" "o") (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))] - "" + "TARGET_SSE" "#" - "reload_completed" + "&& reload_completed" [(const_int 0)] { int i = INTVAL (operands[2]); @@ -4521,15 +4521,14 @@ [(set (match_operand:V4DF 0 "register_operand" "=x,x") (vec_select:V4DF (vec_concat:V8DF - (match_operand:V4DF 1 "nonimmediate_operand" "xm,x") - (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm")) + (match_operand:V4DF 1 "nonimmediate_operand" " x,m") + (match_operand:V4DF 2 "nonimmediate_operand" "xm,1")) (parallel [(const_int 0) (const_int 4) (const_int 2) (const_int 6)])))] - "TARGET_AVX - && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))" + "TARGET_AVX" "@ - vmovddup\t{%1, %0|%0, %1} - vunpcklpd\t{%2, %1, %0|%0, %1, %2}" + vunpcklpd\t{%2, %1, %0|%0, %1, %2} + vmovddup\t{%1, %0|%0, %1}" [(set_attr "type" "sselog") (set_attr "prefix" "vex") (set_attr "mode" "V4DF")]) @@ -9901,6 +9900,9 @@ operands[2], operands[3], operands[4], operands[5], operands[6])); + if (!(flags || ecx || xmm0)) + emit_note (NOTE_INSN_DELETED); + DONE; } [(set_attr "type" "sselog") @@ -10028,6 +10030,9 @@ emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL, operands[2], operands[3], operands[4])); + if (!(flags || ecx || xmm0)) + emit_note (NOTE_INSN_DELETED); + DONE; } [(set_attr "type" "sselog") @@ -10459,8 +10464,8 @@ [(set (match_operand:SSEMODE 0 "register_operand" "=x,x") (if_then_else:SSEMODE (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m") - (match_operand:SSEMODE 1 "vector_move_operand" "x,x") - (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))] + (match_operand:SSEMODE 1 "register_operand" "x,x") + (match_operand:SSEMODE 2 "nonimmediate_operand" "xm,x")))] "TARGET_XOP" "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sse4arg")]) @@ -10469,8 +10474,8 @@ [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x") (if_then_else:AVX256MODE (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m") - (match_operand:AVX256MODE 1 "vector_move_operand" "x,x") - (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))] + (match_operand:AVX256MODE 1 "register_operand" "x,x") + (match_operand:AVX256MODE 2 "nonimmediate_operand" "xm,x")))] "TARGET_XOP" "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sse4arg")]) @@ -12001,8 +12006,7 @@ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") (unspec:AVXMODEF2P [(match_operand:AVXMODEF2P 1 "memory_operand" "m") - (match_operand:<avxpermvecmode> 2 "register_operand" "x") - (match_dup 0)] + (match_operand:<avxpermvecmode> 2 "register_operand" "x")] UNSPEC_MASKLOAD))] "TARGET_AVX" "vmaskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}" |