21 Star 26 Fork 93

src-openEuler / gcc

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0034-Autofdo-Enable-discrimibator-and-MCF-algorithm-on-Au.patch 9.82 KB
一键复制 编辑 原始数据 按行查看 历史
赵震宇 提交于 2024-04-01 17:15 . [Sync] Sync patch from openeuler/gcc
From b020447c840c6e22440a9b9063298a06333fd2f1 Mon Sep 17 00:00:00 2001
From: zhenyu--zhao <zhaozhenyu17@huawei.com>
Date: Sat, 23 Mar 2024 22:56:09 +0800
Subject: [PATCH] [Autofdo]Enable discrimibator and MCF algorithm on Autofdo
---
gcc/auto-profile.cc | 171 +++++++++++++++++++++++++++++++++++++++++++-
gcc/cfghooks.cc | 7 ++
gcc/opts.cc | 5 +-
gcc/tree-inline.cc | 14 ++++
4 files changed, 193 insertions(+), 4 deletions(-)
diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc
index 2b34b80b8..f45f0ec66 100644
--- a/gcc/auto-profile.cc
+++ b/gcc/auto-profile.cc
@@ -466,6 +466,17 @@ string_table::get_index (const char *name) const
if (name == NULL)
return -1;
string_index_map::const_iterator iter = map_.find (name);
+ /* Function name may be duplicate. Try to distinguish by the
+ #file_name#function_name defined by the autofdo tool chain. */
+ if (iter == map_.end ())
+ {
+ char* file_name = get_original_name (lbasename (dump_base_name));
+ char* file_func_name
+ = concat ("#", file_name, "#", name, NULL);
+ iter = map_.find (file_func_name);
+ free (file_name);
+ free (file_func_name);
+ }
if (iter == map_.end ())
return -1;
@@ -654,7 +665,7 @@ function_instance::read_function_instance (function_instance_stack *stack,
for (unsigned i = 0; i < num_pos_counts; i++)
{
- unsigned offset = gcov_read_unsigned () & 0xffff0000;
+ unsigned offset = gcov_read_unsigned ();
unsigned num_targets = gcov_read_unsigned ();
gcov_type count = gcov_read_counter ();
s->pos_counts[offset].count = count;
@@ -733,6 +744,10 @@ autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const
function_instance *s = get_function_instance_by_inline_stack (stack);
if (s == NULL)
return false;
+ if (s->get_count_info (stack[0].second + stmt->bb->discriminator, info))
+ {
+ return true;
+ }
return s->get_count_info (stack[0].second, info);
}
@@ -1395,6 +1410,66 @@ afdo_propagate (bb_set *annotated_bb)
}
}
+/* Process the following scene when the branch probability
+ inversion when do function afdo_propagate (). E.g.
+ BB_NUM (sample count)
+ BB1 (1000)
+ / \
+ BB2 (10) BB3 (0)
+ \ /
+ BB4
+ In afdo_propagate ().count of BB3 is calculated by
+ COUNT (BB3) = 990 (990 = COUNT (BB1) - COUNT (BB2) = 1000 - 10)
+ In fact, BB3 may be colder than BB2 by sample count.
+ This function allocate source BB count to wach succ BB by sample
+ rate, E.g.
+ BB2_COUNT = BB1_COUNT * (BB2_COUNT / (BB2_COUNT + BB3_COUNT)) */
+
+static void
+afdo_preprocess_bb_count ()
+{
+ basic_block bb;
+ FOR_ALL_BB_FN (bb, cfun)
+ {
+ if (bb->count.ipa_p () && EDGE_COUNT (bb->succs) > 1
+ && bb->count > profile_count::zero ().afdo ())
+ {
+ basic_block bb1 = EDGE_SUCC (bb, 0)->dest;
+ basic_block bb2 = EDGE_SUCC (bb, 1)->dest;
+ if (single_succ_edge (bb1) && single_succ_edge (bb2)
+ && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
+ {
+ gcov_type max_count = 0;
+ gcov_type total_count = 0;
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ if (!e->dest->count.ipa_p ())
+ {
+ continue;
+ }
+ max_count = MAX (max_count, e->dest->count.to_gcov_type ());
+ total_count += e->dest->count.to_gcov_type ();
+ }
+ /* Only bb_count > max_count * 2, branch probability will
+ inversion. */
+ if (max_count > 0 && bb->count.to_gcov_type () > max_count * 2)
+ {
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ gcov_type target_count = bb->count.to_gcov_type ()
+ * e->dest->count.to_gcov_type ()/ total_count;
+ e->dest->count
+ = profile_count::from_gcov_type
+ (target_count).afdo ();
+ }
+ }
+ }
+ }
+ }
+}
+
/* Propagate counts on control flow graph and calculate branch
probabilities. */
@@ -1420,6 +1495,7 @@ afdo_calculate_branch_prob (bb_set *annotated_bb)
}
afdo_find_equiv_class (annotated_bb);
+ afdo_preprocess_bb_count ();
afdo_propagate (annotated_bb);
FOR_EACH_BB_FN (bb, cfun)
@@ -1523,6 +1599,83 @@ afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
return false;
}
+/* Preparation before executing MCF algorithm. */
+
+static void
+afdo_init_mcf ()
+{
+ basic_block bb;
+ edge e;
+ edge_iterator ei;
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\n init calling mcf_smooth_cfg (). \n");
+ }
+
+ /* Step1: when use mcf, BB id must be continous,
+ so we need compact_blocks (). */
+ compact_blocks ();
+
+ /* Step2: allocate memory for MCF input data. */
+ bb_gcov_counts.safe_grow_cleared (cfun->cfg->x_last_basic_block);
+ edge_gcov_counts = new hash_map<edge, gcov_type>;
+
+ /* Step3: init MCF input data from cfg. */
+ FOR_ALL_BB_FN (bb, cfun)
+ {
+ /* Init BB count for MCF. */
+ bb_gcov_count (bb) = bb->count.to_gcov_type ();
+
+ gcov_type total_count = 0;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ total_count += e->dest->count.to_gcov_type ();
+ }
+
+ /* If there is no sample in each successor blocks, source
+ BB samples are allocated to each edge by branch static prob. */
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ if (total_count == 0)
+ {
+ edge_gcov_count (e) = e->src->count.to_gcov_type ()
+ * e->probability.to_reg_br_prob_base () / REG_BR_PROB_BASE;
+ }
+ else
+ {
+ edge_gcov_count (e) = e->src->count.to_gcov_type ()
+ * e->dest->count.to_gcov_type () / total_count;
+ }
+ }
+ }
+}
+
+
+/* Free the resources used by MCF and reset BB count from MCF result.
+ branch probability has been updated in mcf_smooth_cfg (). */
+
+static void
+afdo_process_after_mcf ()
+{
+ basic_block bb;
+ /* Reset BB count from MCF result. */
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ if (bb_gcov_count (bb))
+ {
+ bb->count
+ = profile_count::from_gcov_type (bb_gcov_count (bb)).afdo ();
+ }
+ }
+
+ /* Clean up MCF resource. */
+ bb_gcov_counts.release ();
+ delete edge_gcov_counts;
+ edge_gcov_counts = NULL;
+}
+
/* Annotate auto profile to the control flow graph. Do not annotate value
profile for stmts in PROMOTED_STMTS. */
@@ -1574,8 +1727,20 @@ afdo_annotate_cfg (const stmt_set &promoted_stmts)
afdo_source_profile->mark_annotated (cfun->function_end_locus);
if (max_count > profile_count::zero ())
{
- /* Calculate, propagate count and probability information on CFG. */
- afdo_calculate_branch_prob (&annotated_bb);
+ /* 1 means -fprofile-correction is enbaled manually, and MCF
+ algorithm will be used to calculate count and probability.
+ Otherwise, use the default calculate algorithm. */
+ if (flag_profile_correction == 1)
+ {
+ afdo_init_mcf ();
+ mcf_smooth_cfg ();
+ afdo_process_after_mcf ();
+ }
+ else
+ {
+ /* Calculate, propagate count and probability information on CFG. */
+ afdo_calculate_branch_prob (&annotated_bb);
+ }
}
update_max_bb_count ();
profile_status_for_fn (cfun) = PROFILE_READ;
diff --git a/gcc/cfghooks.cc b/gcc/cfghooks.cc
index c0b7bdcd9..323663010 100644
--- a/gcc/cfghooks.cc
+++ b/gcc/cfghooks.cc
@@ -542,6 +542,9 @@ split_block_1 (basic_block bb, void *i)
return NULL;
new_bb->count = bb->count;
+ /* Copy discriminator from original bb for distinguishes among
+ several basic blocks that share a common locus, allowing for
+ more accurate autofdo. */
new_bb->discriminator = bb->discriminator;
if (dom_info_available_p (CDI_DOMINATORS))
@@ -1113,6 +1116,10 @@ duplicate_block (basic_block bb, edge e, basic_block after, copy_bb_data *id)
move_block_after (new_bb, after);
new_bb->flags = (bb->flags & ~BB_DUPLICATED);
+ /* Copy discriminator from original bb for distinguishes among
+ several basic blocks that share a common locus, allowing for
+ more accurate autofdo. */
+ new_bb->discriminator = bb->discriminator;
FOR_EACH_EDGE (s, ei, bb->succs)
{
/* Since we are creating edges from a new block to successors
diff --git a/gcc/opts.cc b/gcc/opts.cc
index 2bba88140..4b4925331 100644
--- a/gcc/opts.cc
+++ b/gcc/opts.cc
@@ -3014,7 +3014,10 @@ common_handle_option (struct gcc_options *opts,
/* FALLTHRU */
case OPT_fauto_profile:
enable_fdo_optimizations (opts, opts_set, value);
- SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction, value);
+ /* 2 is special and means flag_profile_correction trun on by
+ -fauto-profile. */
+ SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction,
+ (value ? 2 : 0));
break;
case OPT_fipa_struct_reorg_:
diff --git a/gcc/tree-inline.cc b/gcc/tree-inline.cc
index f892cee3f..f50dbbc52 100644
--- a/gcc/tree-inline.cc
+++ b/gcc/tree-inline.cc
@@ -2038,6 +2038,10 @@ copy_bb (copy_body_data *id, basic_block bb,
basic_block_info automatically. */
copy_basic_block = create_basic_block (NULL, (basic_block) prev->aux);
copy_basic_block->count = bb->count.apply_scale (num, den);
+ /* Copy discriminator from original bb for distinguishes among
+ several basic blocks that share a common locus, allowing for
+ more accurate autofdo. */
+ copy_basic_block->discriminator = bb->discriminator;
copy_gsi = gsi_start_bb (copy_basic_block);
@@ -3058,6 +3062,16 @@ copy_cfg_body (copy_body_data * id,
den += e->count ();
ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = den;
}
+ /* When autofdo uses PMU as the sampling unit, the number of
+ ENTRY_BLOCK_PTR_FOR_FN cannot be obtained directly and will
+ be zero. It using for adjust_for_ipa_scaling will cause the
+ inlined BB count incorrectly overestimated. So set den equal
+ to num, which is the source inline BB count to avoid
+ overestimated. */
+ if (den == profile_count::zero ().afdo ())
+ {
+ den = num;
+ }
profile_count::adjust_for_ipa_scaling (&num, &den);
--
2.33.0
1
https://gitee.com/src-openeuler/gcc.git
git@gitee.com:src-openeuler/gcc.git
src-openeuler
gcc
gcc
master

搜索帮助

53164aa7 5694891 3bd8fe86 5694891