diff options
Diffstat (limited to 'gcc-4.6/gcc/loop-unroll.c')
-rw-r--r-- | gcc-4.6/gcc/loop-unroll.c | 166 |
1 files changed, 159 insertions, 7 deletions
diff --git a/gcc-4.6/gcc/loop-unroll.c b/gcc-4.6/gcc/loop-unroll.c index a1ef58d..2a49d0c 100644 --- a/gcc-4.6/gcc/loop-unroll.c +++ b/gcc-4.6/gcc/loop-unroll.c @@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see #include "hashtab.h" #include "recog.h" #include "target.h" +#include "diagnostic.h" /* This pass performs loop unrolling and peeling. We only perform these optimizations on innermost loops (with single exception) because @@ -152,6 +153,109 @@ static void combine_var_copies_in_loop_exit (struct var_to_expand *, basic_block); static rtx get_expansion (struct var_to_expand *); +static void +report_unroll_peel(struct loop *loop, location_t locus) +{ + struct niter_desc *desc; + int niters = 0; + char iter_str[50]; + + desc = get_simple_loop_desc (loop); + + if (desc->const_iter) + niters = desc->niter; + else if (loop->header->count) + niters = expected_loop_iterations (loop); + + sprintf(iter_str,", %s iterations %d", + desc->const_iter?"const":"average", + niters); + inform (locus, "%s%s loop by %d (header execution count %d%s)", + loop->lpt_decision.decision == LPT_PEEL_COMPLETELY ? + "Completely " : "", + loop->lpt_decision.decision == LPT_PEEL_SIMPLE ? + "Peel" : "Unroll", + loop->lpt_decision.times, + (int)loop->header->count, + loop->lpt_decision.decision == LPT_PEEL_COMPLETELY ? + "" : iter_str); +} + +/* Determine whether LOOP contains floating-point computation. */ +static bool +loop_has_FP_comp(struct loop *loop) +{ + rtx set, dest; + basic_block *body, bb; + unsigned i; + rtx insn; + + body = get_loop_body (loop); + for (i = 0; i < loop->num_nodes; i++) + { + bb = body[i]; + + FOR_BB_INSNS (bb, insn) + { + set = single_set (insn); + if (!set) + continue; + + dest = SET_DEST (set); + if (FLOAT_MODE_P (GET_MODE (dest))) + { + free (body); + return true; + } + } + } + free (body); + return false; +} + +/* This returns a bit vector */ +typedef enum { + NO_LIMIT = 0, + LIMIT_UNROLL = 0x1, + LIMIT_PEEL = 0x2, + LIMIT_BOTH = 0x3 +} limit_type; + +extern int cgraph_codesize_estimate; + +/* Determine whether LOOP unrolling/peeling should be constrained based + on code footprint estimates. */ +static limit_type +limit_code_size(struct loop *loop) +{ + unsigned size_threshold; + limit_type result = NO_LIMIT; + int result_int = 0; + + if (!flag_dyn_ipa) + return NO_LIMIT; + + gcc_assert (cgraph_codesize_estimate >= 0); + + /* Ignore FP loops, which are more likely to benefit heavily from + unrolling. */ + if (loop_has_FP_comp(loop)) + return NO_LIMIT; + + size_threshold = PARAM_VALUE (PARAM_UNROLLPEEL_CODESIZE_THRESHOLD); + if (cgraph_codesize_estimate <= (int)size_threshold) + return NO_LIMIT; + + if (flag_ripa_peel_size_limit) + result_int |= LIMIT_PEEL; + + if (flag_ripa_unroll_size_limit) + result_int |= LIMIT_UNROLL; + + result = (limit_type)result_int; + return result; +} + /* Unroll and/or peel (depending on FLAGS) LOOPS. */ void unroll_and_peel_loops (int flags) @@ -160,6 +264,8 @@ unroll_and_peel_loops (int flags) bool check; loop_iterator li; + record_loop_exits(); + /* First perform complete loop peeling (it is almost surely a win, and affects parameters for further decision a lot). */ peel_loops_completely (flags); @@ -234,16 +340,18 @@ peel_loops_completely (int flags) { struct loop *loop; loop_iterator li; + location_t locus; /* Scan the loops, the inner ones first. */ FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) { loop->lpt_decision.decision = LPT_NONE; + locus = get_loop_location(loop); if (dump_file) - fprintf (dump_file, - "\n;; *** Considering loop %d for complete peeling ***\n", - loop->num); + fprintf (dump_file, "\n;; *** Considering loop %d for complete peeling at BB %d from %s:%d ***\n", + loop->num, loop->header->index, LOCATION_FILE(locus), + LOCATION_LINE(locus)); loop->ninsns = num_loop_insns (loop); @@ -253,6 +361,11 @@ peel_loops_completely (int flags) if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY) { + if (flag_opt_info >= OPT_INFO_MIN) + { + report_unroll_peel(loop, locus); + } + peel_loop_completely (loop); #ifdef ENABLE_CHECKING verify_dominators (CDI_DOMINATORS); @@ -268,14 +381,19 @@ decide_unrolling_and_peeling (int flags) { struct loop *loop; loop_iterator li; + location_t locus; + limit_type limit; /* Scan the loops, inner ones first. */ FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) { loop->lpt_decision.decision = LPT_NONE; + locus = get_loop_location(loop); if (dump_file) - fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num); + fprintf (dump_file, "\n;; *** Considering loop %d at BB %d from %s:%d ***\n", + loop->num, loop->header->index, LOCATION_FILE(locus), + LOCATION_LINE(locus)); /* Do not peel cold areas. */ if (optimize_loop_for_size_p (loop)) @@ -305,16 +423,45 @@ decide_unrolling_and_peeling (int flags) loop->ninsns = num_loop_insns (loop); loop->av_ninsns = average_num_loop_insns (loop); + /* Determine whether to limit code size growth from unrolling and + peeling. This is currently enabled only under LIPO (dynamic IPA) + where we have a partial call graph. It is not applied to loops + with constant trip counts, as it is easier to determine the + profitability of unrolling and peeling such loops. */ + limit = limit_code_size(loop); + if (limit != NO_LIMIT) + { + if (dump_file) + { + fprintf (dump_file, ";; Due to large code size footprint estimate, limit "); + if (limit == (LIMIT_UNROLL|LIMIT_PEEL)) + fprintf (dump_file, "unrolling and peeling\n"); + else if (limit == LIMIT_UNROLL) + fprintf (dump_file, "unrolling\n"); + else + fprintf (dump_file, "peeling\n"); + } + } + /* Try transformations one by one in decreasing order of priority. */ decide_unroll_constant_iterations (loop, flags); - if (loop->lpt_decision.decision == LPT_NONE) + if (loop->lpt_decision.decision == LPT_NONE + && !(limit & LIMIT_UNROLL)) decide_unroll_runtime_iterations (loop, flags); - if (loop->lpt_decision.decision == LPT_NONE) + if (loop->lpt_decision.decision == LPT_NONE + && !(limit & LIMIT_UNROLL)) decide_unroll_stupid (loop, flags); - if (loop->lpt_decision.decision == LPT_NONE) + if (loop->lpt_decision.decision == LPT_NONE + && !(limit & LIMIT_PEEL)) decide_peel_simple (loop, flags); + + if (flag_opt_info >= OPT_INFO_MIN + && loop->lpt_decision.decision != LPT_NONE) + { + report_unroll_peel(loop, locus); + } } } @@ -364,6 +511,7 @@ decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED) if (dump_file) fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n"); loop->lpt_decision.decision = LPT_PEEL_COMPLETELY; + loop->lpt_decision.times = 0; } /* Decide whether the LOOP is suitable for complete peeling. */ @@ -459,6 +607,7 @@ decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED) if (dump_file) fprintf (dump_file, ";; Decided to peel loop completely npeel %u\n", npeel); loop->lpt_decision.decision = LPT_PEEL_COMPLETELY; + loop->lpt_decision.times = desc->niter; } /* Peel all iterations of LOOP, remove exit edges and cancel the loop @@ -569,6 +718,9 @@ decide_unroll_constant_iterations (struct loop *loop, int flags) if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); + if (targetm.loop_unroll_adjust) + nunroll = targetm.loop_unroll_adjust (nunroll, loop); + /* Skip big loops. */ if (nunroll <= 1) { |