aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.6/gcc/loop-unroll.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.6/gcc/loop-unroll.c')
-rw-r--r--gcc-4.6/gcc/loop-unroll.c166
1 files changed, 159 insertions, 7 deletions
diff --git a/gcc-4.6/gcc/loop-unroll.c b/gcc-4.6/gcc/loop-unroll.c
index a1ef58d..2a49d0c 100644
--- a/gcc-4.6/gcc/loop-unroll.c
+++ b/gcc-4.6/gcc/loop-unroll.c
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see
#include "hashtab.h"
#include "recog.h"
#include "target.h"
+#include "diagnostic.h"
/* This pass performs loop unrolling and peeling. We only perform these
optimizations on innermost loops (with single exception) because
@@ -152,6 +153,109 @@ static void combine_var_copies_in_loop_exit (struct var_to_expand *,
basic_block);
static rtx get_expansion (struct var_to_expand *);
+static void
+report_unroll_peel(struct loop *loop, location_t locus)
+{
+ struct niter_desc *desc;
+ int niters = 0;
+ char iter_str[50];
+
+ desc = get_simple_loop_desc (loop);
+
+ if (desc->const_iter)
+ niters = desc->niter;
+ else if (loop->header->count)
+ niters = expected_loop_iterations (loop);
+
+ sprintf(iter_str,", %s iterations %d",
+ desc->const_iter?"const":"average",
+ niters);
+ inform (locus, "%s%s loop by %d (header execution count %d%s)",
+ loop->lpt_decision.decision == LPT_PEEL_COMPLETELY ?
+ "Completely " : "",
+ loop->lpt_decision.decision == LPT_PEEL_SIMPLE ?
+ "Peel" : "Unroll",
+ loop->lpt_decision.times,
+ (int)loop->header->count,
+ loop->lpt_decision.decision == LPT_PEEL_COMPLETELY ?
+ "" : iter_str);
+}
+
+/* Determine whether LOOP contains floating-point computation. */
+static bool
+loop_has_FP_comp(struct loop *loop)
+{
+ rtx set, dest;
+ basic_block *body, bb;
+ unsigned i;
+ rtx insn;
+
+ body = get_loop_body (loop);
+ for (i = 0; i < loop->num_nodes; i++)
+ {
+ bb = body[i];
+
+ FOR_BB_INSNS (bb, insn)
+ {
+ set = single_set (insn);
+ if (!set)
+ continue;
+
+ dest = SET_DEST (set);
+ if (FLOAT_MODE_P (GET_MODE (dest)))
+ {
+ free (body);
+ return true;
+ }
+ }
+ }
+ free (body);
+ return false;
+}
+
+/* This returns a bit vector */
+typedef enum {
+ NO_LIMIT = 0,
+ LIMIT_UNROLL = 0x1,
+ LIMIT_PEEL = 0x2,
+ LIMIT_BOTH = 0x3
+} limit_type;
+
+extern int cgraph_codesize_estimate;
+
+/* Determine whether LOOP unrolling/peeling should be constrained based
+ on code footprint estimates. */
+static limit_type
+limit_code_size(struct loop *loop)
+{
+ unsigned size_threshold;
+ limit_type result = NO_LIMIT;
+ int result_int = 0;
+
+ if (!flag_dyn_ipa)
+ return NO_LIMIT;
+
+ gcc_assert (cgraph_codesize_estimate >= 0);
+
+ /* Ignore FP loops, which are more likely to benefit heavily from
+ unrolling. */
+ if (loop_has_FP_comp(loop))
+ return NO_LIMIT;
+
+ size_threshold = PARAM_VALUE (PARAM_UNROLLPEEL_CODESIZE_THRESHOLD);
+ if (cgraph_codesize_estimate <= (int)size_threshold)
+ return NO_LIMIT;
+
+ if (flag_ripa_peel_size_limit)
+ result_int |= LIMIT_PEEL;
+
+ if (flag_ripa_unroll_size_limit)
+ result_int |= LIMIT_UNROLL;
+
+ result = (limit_type)result_int;
+ return result;
+}
+
/* Unroll and/or peel (depending on FLAGS) LOOPS. */
void
unroll_and_peel_loops (int flags)
@@ -160,6 +264,8 @@ unroll_and_peel_loops (int flags)
bool check;
loop_iterator li;
+ record_loop_exits();
+
/* First perform complete loop peeling (it is almost surely a win,
and affects parameters for further decision a lot). */
peel_loops_completely (flags);
@@ -234,16 +340,18 @@ peel_loops_completely (int flags)
{
struct loop *loop;
loop_iterator li;
+ location_t locus;
/* Scan the loops, the inner ones first. */
FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
{
loop->lpt_decision.decision = LPT_NONE;
+ locus = get_loop_location(loop);
if (dump_file)
- fprintf (dump_file,
- "\n;; *** Considering loop %d for complete peeling ***\n",
- loop->num);
+ fprintf (dump_file, "\n;; *** Considering loop %d for complete peeling at BB %d from %s:%d ***\n",
+ loop->num, loop->header->index, LOCATION_FILE(locus),
+ LOCATION_LINE(locus));
loop->ninsns = num_loop_insns (loop);
@@ -253,6 +361,11 @@ peel_loops_completely (int flags)
if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY)
{
+ if (flag_opt_info >= OPT_INFO_MIN)
+ {
+ report_unroll_peel(loop, locus);
+ }
+
peel_loop_completely (loop);
#ifdef ENABLE_CHECKING
verify_dominators (CDI_DOMINATORS);
@@ -268,14 +381,19 @@ decide_unrolling_and_peeling (int flags)
{
struct loop *loop;
loop_iterator li;
+ location_t locus;
+ limit_type limit;
/* Scan the loops, inner ones first. */
FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
{
loop->lpt_decision.decision = LPT_NONE;
+ locus = get_loop_location(loop);
if (dump_file)
- fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num);
+ fprintf (dump_file, "\n;; *** Considering loop %d at BB %d from %s:%d ***\n",
+ loop->num, loop->header->index, LOCATION_FILE(locus),
+ LOCATION_LINE(locus));
/* Do not peel cold areas. */
if (optimize_loop_for_size_p (loop))
@@ -305,16 +423,45 @@ decide_unrolling_and_peeling (int flags)
loop->ninsns = num_loop_insns (loop);
loop->av_ninsns = average_num_loop_insns (loop);
+ /* Determine whether to limit code size growth from unrolling and
+ peeling. This is currently enabled only under LIPO (dynamic IPA)
+ where we have a partial call graph. It is not applied to loops
+ with constant trip counts, as it is easier to determine the
+ profitability of unrolling and peeling such loops. */
+ limit = limit_code_size(loop);
+ if (limit != NO_LIMIT)
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, ";; Due to large code size footprint estimate, limit ");
+ if (limit == (LIMIT_UNROLL|LIMIT_PEEL))
+ fprintf (dump_file, "unrolling and peeling\n");
+ else if (limit == LIMIT_UNROLL)
+ fprintf (dump_file, "unrolling\n");
+ else
+ fprintf (dump_file, "peeling\n");
+ }
+ }
+
/* Try transformations one by one in decreasing order of
priority. */
decide_unroll_constant_iterations (loop, flags);
- if (loop->lpt_decision.decision == LPT_NONE)
+ if (loop->lpt_decision.decision == LPT_NONE
+ && !(limit & LIMIT_UNROLL))
decide_unroll_runtime_iterations (loop, flags);
- if (loop->lpt_decision.decision == LPT_NONE)
+ if (loop->lpt_decision.decision == LPT_NONE
+ && !(limit & LIMIT_UNROLL))
decide_unroll_stupid (loop, flags);
- if (loop->lpt_decision.decision == LPT_NONE)
+ if (loop->lpt_decision.decision == LPT_NONE
+ && !(limit & LIMIT_PEEL))
decide_peel_simple (loop, flags);
+
+ if (flag_opt_info >= OPT_INFO_MIN
+ && loop->lpt_decision.decision != LPT_NONE)
+ {
+ report_unroll_peel(loop, locus);
+ }
}
}
@@ -364,6 +511,7 @@ decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED)
if (dump_file)
fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n");
loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
+ loop->lpt_decision.times = 0;
}
/* Decide whether the LOOP is suitable for complete peeling. */
@@ -459,6 +607,7 @@ decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED)
if (dump_file)
fprintf (dump_file, ";; Decided to peel loop completely npeel %u\n", npeel);
loop->lpt_decision.decision = LPT_PEEL_COMPLETELY;
+ loop->lpt_decision.times = desc->niter;
}
/* Peel all iterations of LOOP, remove exit edges and cancel the loop
@@ -569,6 +718,9 @@ decide_unroll_constant_iterations (struct loop *loop, int flags)
if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
+ if (targetm.loop_unroll_adjust)
+ nunroll = targetm.loop_unroll_adjust (nunroll, loop);
+
/* Skip big loops. */
if (nunroll <= 1)
{