Skip to content

Commit 260252e

Browse files
committed
Improve static and AFDO profile combination
This patch makes afdo_adjust_guessed_profile more agressive on finding scales on the boundaries of connected components with no annotation. Originaly I looked for edges into or out of the component with known AFDO counts and I also haled edges from basic block with known AFDO count and known static probability estimate. Common problem is with components not containing any in edges, but only out edges (i.e. those with ENTRY_BLOCK). In this case I added logic that looks for edges out of the component to BBs with known AFDO count. If all flow to the BB is either from the component or has AFDO count, we can deterine scale precisely. It may happen that there are edges from other components. In this case we know upper bound and use it, since it is better than nothing. I also noticed that some components have 0 count in all profile and then scaling gives up, which is fixed. I also optimized the code a bit by replacing map holding current component with an array holding component ID and broke out saling logic into separate functions. The patch fixes perl regression I introduced in last change. according to https://lnt.opensuse.org/db_default/v4/SPEC/67674 there were improvements (percentage is runtime change): 538.imagick_r -32.52% 549.fotonik3d_r -22.68% 520.omnetpp_r -12.37% 503.bwaves_r -8.71% 508.namd_r -5.10% 526.blender_r -2.11% and regressions: 554.roms_r 45.95% 527.cam4_r 21.69% 511.povray_r 13.59% 500.perlbench_r 10.19% 507.cactuBSSN_r 9.81% 510.parest_r 9.69% 548.exchange2_r 8.42% 502.gcc_r 5.10% 544.nab_r 3.76% 519.lbm_r 2.34% 541.leela_r 2.16% 525.x264_r 2.14% This is a bit wild, but hope things will settle donw once we chase out obvious problems (such as losing the profile of functions that has not been inlined). gcc/ChangeLog: * auto-profile.cc (afdo_indirect_call): Compute speculative edge probability. (add_scale): Break out from ... (scale_bbs): Break out from ... (afdo_adjust_guessed_profile): ... here; use componet array instead of current_component hash_map; handle components with only 0 profile; be more agressive on finding scales along the boundary.
1 parent 30e8f97 commit 260252e

File tree

1 file changed

+159
-85
lines changed

1 file changed

+159
-85
lines changed

gcc/auto-profile.cc

Lines changed: 159 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,10 +1076,10 @@ afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
10761076
fprintf (dump_file, "\n");
10771077
}
10781078

1079-
/* FIXME: Count should be initialized. */
10801079
struct cgraph_edge *new_edge
1081-
= indirect_edge->make_speculative (direct_call,
1082-
profile_count::uninitialized ());
1080+
= indirect_edge->make_speculative
1081+
(direct_call,
1082+
gimple_bb (stmt)->count.apply_scale (99, 100));
10831083
cgraph_edge::redirect_call_stmt_to_callee (new_edge);
10841084
gimple_remove_histogram_value (cfun, stmt, hist);
10851085
inline_call (new_edge, true, NULL, NULL, false);
@@ -1549,6 +1549,60 @@ cmp (const void *a, const void *b)
15491549
return 0;
15501550
}
15511551

1552+
/* Add scale ORIG/ANNOTATED to SCALES. */
1553+
1554+
static void
1555+
add_scale (vec <sreal> *scales, profile_count annotated, profile_count orig)
1556+
{
1557+
if (dump_file)
1558+
{
1559+
orig.dump (dump_file);
1560+
fprintf (dump_file, " should be ");
1561+
annotated.dump (dump_file);
1562+
fprintf (dump_file, "\n");
1563+
}
1564+
if (orig.nonzero_p ())
1565+
{
1566+
sreal scale
1567+
= annotated.guessed_local ()
1568+
.to_sreal_scale (orig);
1569+
if (dump_file)
1570+
fprintf (dump_file, " adding scale %.16f\n",
1571+
scale.to_double ());
1572+
scales->safe_push (scale);
1573+
}
1574+
}
1575+
1576+
/* Scale counts of all basic blocks in BBS by SCALE and convert them to
1577+
IPA quality. */
1578+
1579+
static void
1580+
scale_bbs (const vec <basic_block> &bbs, sreal scale)
1581+
{
1582+
if (dump_file)
1583+
fprintf (dump_file, " Scaling by %.16f\n", scale.to_double ());
1584+
for (basic_block b : bbs)
1585+
if (!(b->count == profile_count::zero ())
1586+
&& b->count.initialized_p ())
1587+
{
1588+
profile_count o = b->count;
1589+
b->count = b->count.force_guessed () * scale;
1590+
1591+
/* If we scaled to 0, make it auto-fdo since that is treated
1592+
less agressively. */
1593+
if (!b->count.nonzero_p () && o.nonzero_p ())
1594+
b->count = profile_count::zero ().afdo ();
1595+
if (dump_file)
1596+
{
1597+
fprintf (dump_file, " bb %i count updated ", b->index);
1598+
o.dump (dump_file);
1599+
fprintf (dump_file, " -> ");
1600+
b->count.dump (dump_file);
1601+
fprintf (dump_file, "\n");
1602+
}
1603+
}
1604+
}
1605+
15521606
/* In case given basic block was fully optimized out, AutoFDO
15531607
will have no data about it. In this case try to preserve static profile.
15541608
Identify connected components (in undirected form of CFG) which has
@@ -1558,26 +1612,33 @@ cmp (const void *a, const void *b)
15581612
void
15591613
afdo_adjust_guessed_profile (bb_set *annotated_bb)
15601614
{
1561-
auto_sbitmap visited (last_basic_block_for_fn (cfun));
15621615
/* Basic blocks of connected component currently processed. */
1563-
auto_vec <basic_block, 20> bbs (n_basic_blocks_for_fn (cfun) + 1);
1616+
auto_vec <basic_block, 20> bbs (n_basic_blocks_for_fn (cfun));
15641617
/* Scale factors found. */
1565-
auto_vec <sreal, 20> scales (n_basic_blocks_for_fn (cfun) + 1);
1566-
auto_vec <basic_block, 20> stack (n_basic_blocks_for_fn (cfun) + 1);
1567-
1568-
bitmap_clear (visited);
1618+
auto_vec <sreal, 20> scales;
1619+
auto_vec <basic_block, 20> stack (n_basic_blocks_for_fn (cfun));
15691620

15701621
basic_block seed_bb;
1571-
FOR_BB_BETWEEN (seed_bb, ENTRY_BLOCK_PTR_FOR_FN (cfun),
1572-
EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
1573-
if (!is_bb_annotated (seed_bb, *annotated_bb)
1574-
&& bitmap_set_bit (visited, seed_bb->index))
1622+
unsigned int component_id = 1;
1623+
1624+
/* Map from basic block to its component.
1625+
0 is used for univisited BBs,
1626+
1 means that BB is annotated,
1627+
>=2 is an id of the component BB belongs to. */
1628+
auto_vec <unsigned int, 20> component;
1629+
component.safe_grow (last_basic_block_for_fn (cfun));
1630+
FOR_ALL_BB_FN (seed_bb, cfun)
1631+
component[seed_bb->index]
1632+
= is_bb_annotated (seed_bb, *annotated_bb) ? 1 : 0;
1633+
FOR_ALL_BB_FN (seed_bb, cfun)
1634+
if (!component[seed_bb->index])
15751635
{
1576-
hash_set <basic_block> current_component;
1577-
15781636
stack.quick_push (seed_bb);
1637+
component_id++;
15791638
bbs.truncate (0);
15801639
scales.truncate (0);
1640+
component[seed_bb->index] = component_id;
1641+
profile_count max_count = profile_count::zero ();
15811642

15821643
/* Identify connected component starting in BB. */
15831644
if (dump_file)
@@ -1588,19 +1649,33 @@ afdo_adjust_guessed_profile (bb_set *annotated_bb)
15881649
basic_block b = stack.pop ();
15891650

15901651
bbs.quick_push (b);
1591-
current_component.add (b);
1652+
max_count = max_count.max (b->count);
15921653

15931654
for (edge e: b->preds)
1594-
if (!is_bb_annotated (e->src, *annotated_bb)
1595-
&& bitmap_set_bit (visited, e->src->index))
1596-
stack.quick_push (e->src);
1655+
if (!component[e->src->index])
1656+
{
1657+
stack.quick_push (e->src);
1658+
component[e->src->index] = component_id;
1659+
}
15971660
for (edge e: b->succs)
1598-
if (!is_bb_annotated (e->dest, *annotated_bb)
1599-
&& bitmap_set_bit (visited, e->dest->index))
1600-
stack.quick_push (e->dest);
1661+
if (!component[e->dest->index])
1662+
{
1663+
stack.quick_push (e->dest);
1664+
component[e->dest->index] = component_id;
1665+
}
16011666
}
16021667
while (!stack.is_empty ());
16031668

1669+
/* If all blocks in components has 0 count, we do not need
1670+
to scale, only we must convert to IPA quality. */
1671+
if (!max_count.nonzero_p ())
1672+
{
1673+
if (dump_file)
1674+
fprintf (dump_file, " All counts are 0; scale = 1\n");
1675+
scale_bbs (bbs, 1);
1676+
continue;
1677+
}
1678+
16041679
/* Now visit the component and try to figure out its desired
16051680
frequency. */
16061681
for (basic_block b : bbs)
@@ -1653,84 +1728,83 @@ afdo_adjust_guessed_profile (bb_set *annotated_bb)
16531728
}
16541729
else
16551730
{
1656-
gcc_checking_assert (current_component.contains (e->src));
16571731
current_component_count += e->count ();
1732+
gcc_checking_assert (component[e->src->index] == component_id);
16581733
}
16591734
if (boundary && current_component_count.initialized_p ())
16601735
{
1661-
profile_count in_count = b->count - current_component_count;
16621736
if (dump_file)
1663-
{
1664-
fprintf (dump_file, " bb %i in count ", b->index);
1665-
in_count.dump (dump_file);
1666-
fprintf (dump_file, " should be ");
1667-
annotated_count.dump (dump_file);
1668-
fprintf (dump_file, "\n");
1669-
}
1670-
if (in_count.nonzero_p ())
1671-
{
1672-
sreal scale
1673-
= annotated_count.guessed_local ()
1674-
.to_sreal_scale (in_count);
1675-
if (dump_file)
1676-
fprintf (dump_file, " adding scale %.16f\n",
1677-
scale.to_double ());
1678-
scales.safe_push (scale);
1679-
}
1737+
fprintf (dump_file, " bb %i in count ", b->index);
1738+
add_scale (&scales,
1739+
annotated_count,
1740+
b->count - current_component_count);
16801741
}
16811742
for (edge e: b->succs)
16821743
if (AFDO_EINFO (e)->is_annotated ())
16831744
{
1684-
profile_count out_count = e->count ();
1685-
profile_count annotated_count = AFDO_EINFO (e)->get_count ();
16861745
if (dump_file)
1687-
{
1688-
fprintf (dump_file, " edge %i->%i count ",
1689-
b->index, e->dest->index);
1690-
out_count.dump (dump_file);
1691-
fprintf (dump_file, " should be ");
1692-
annotated_count.dump (dump_file);
1693-
fprintf (dump_file, "\n");
1694-
}
1695-
if (out_count.nonzero_p ())
1696-
{
1697-
sreal scale
1698-
= annotated_count.guessed_local ()
1699-
.to_sreal_scale (out_count);
1700-
if (dump_file)
1701-
fprintf (dump_file, " adding scale %.16f\n",
1702-
scale.to_double ());
1703-
scales.safe_push (scale);
1704-
}
1746+
fprintf (dump_file, " edge %i->%i count ",
1747+
b->index, e->dest->index);
1748+
add_scale (&scales, AFDO_EINFO (e)->get_count (), e->count ());
1749+
}
1750+
else if (is_bb_annotated (e->dest, *annotated_bb))
1751+
{
1752+
profile_count annotated_count = e->dest->count;
1753+
profile_count out_count = profile_count::zero ();
1754+
bool ok = true;
1755+
for (edge e2: e->dest->preds)
1756+
if (AFDO_EINFO (e2)->is_annotated ())
1757+
annotated_count -= AFDO_EINFO (e2)->get_count ();
1758+
else if (component[e->src->index] == component_id)
1759+
out_count += e->count ();
1760+
else if (e->probability.nonzero_p ())
1761+
{
1762+
ok = false;
1763+
break;
1764+
}
1765+
if (!ok)
1766+
continue;
1767+
if (dump_file)
1768+
fprintf (dump_file,
1769+
" edge %i->%i has annotated sucessor; count ",
1770+
b->index, e->dest->index);
1771+
add_scale (&scales, annotated_count, e->count ());
17051772
}
17061773

17071774
}
1775+
1776+
/* If we failed to find annotated entry or exit edge,
1777+
look for exit edges and scale profile so the dest
1778+
BB get all flow it needs. This is inprecise because
1779+
the edge is not annotated and thus BB has more than
1780+
one such predecessor. */
1781+
if (!scales.length ())
1782+
for (basic_block b : bbs)
1783+
if (b->count.nonzero_p ())
1784+
for (edge e: b->succs)
1785+
if (is_bb_annotated (e->dest, *annotated_bb))
1786+
{
1787+
profile_count annotated_count = e->dest->count;
1788+
for (edge e2: e->dest->preds)
1789+
if (AFDO_EINFO (e2)->is_annotated ())
1790+
annotated_count -= AFDO_EINFO (e2)->get_count ();
1791+
if (dump_file)
1792+
fprintf (dump_file,
1793+
" edge %i->%i has annotated sucessor;"
1794+
" upper bound count ",
1795+
b->index, e->dest->index);
1796+
add_scale (&scales, annotated_count, e->count ());
1797+
}
17081798
if (!scales.length ())
1709-
continue;
1799+
{
1800+
if (dump_file)
1801+
fprintf (dump_file,
1802+
" Can not determine count from the boundary; giving up");
1803+
continue;
1804+
}
1805+
gcc_checking_assert (scales.length ());
17101806
scales.qsort (cmp);
1711-
sreal scale = scales[scales.length () / 2];
1712-
if (dump_file)
1713-
fprintf (dump_file, " Scaling by %.16f\n", scale.to_double ());
1714-
for (basic_block b : bbs)
1715-
if (!(b->count == profile_count::zero ())
1716-
&& b->count.initialized_p ())
1717-
{
1718-
profile_count o = b->count;
1719-
b->count = b->count.force_guessed () * scale;
1720-
1721-
/* If we scaled to 0, make it auto-fdo since that is treated
1722-
less agressively. */
1723-
if (!b->count.nonzero_p () && o.nonzero_p ())
1724-
b->count = profile_count::zero ().afdo ();
1725-
if (dump_file)
1726-
{
1727-
fprintf (dump_file, " bb %i count updated ", b->index);
1728-
o.dump (dump_file);
1729-
fprintf (dump_file, " -> ");
1730-
b->count.dump (dump_file);
1731-
fprintf (dump_file, "\n");
1732-
}
1733-
}
1807+
scale_bbs (bbs, scales[scales.length () / 2]);
17341808
}
17351809
}
17361810

0 commit comments

Comments
 (0)