@@ -1076,10 +1076,10 @@ afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
1076
1076
fprintf (dump_file, " \n " );
1077
1077
}
1078
1078
1079
- /* FIXME: Count should be initialized. */
1080
1079
struct cgraph_edge *new_edge
1081
- = indirect_edge->make_speculative (direct_call,
1082
- profile_count::uninitialized ());
1080
+ = indirect_edge->make_speculative
1081
+ (direct_call,
1082
+ gimple_bb (stmt)->count .apply_scale (99 , 100 ));
1083
1083
cgraph_edge::redirect_call_stmt_to_callee (new_edge);
1084
1084
gimple_remove_histogram_value (cfun, stmt, hist);
1085
1085
inline_call (new_edge, true , NULL , NULL , false );
@@ -1549,6 +1549,60 @@ cmp (const void *a, const void *b)
1549
1549
return 0 ;
1550
1550
}
1551
1551
1552
+ /* Add scale ORIG/ANNOTATED to SCALES. */
1553
+
1554
+ static void
1555
+ add_scale (vec <sreal> *scales, profile_count annotated, profile_count orig)
1556
+ {
1557
+ if (dump_file)
1558
+ {
1559
+ orig.dump (dump_file);
1560
+ fprintf (dump_file, " should be " );
1561
+ annotated.dump (dump_file);
1562
+ fprintf (dump_file, " \n " );
1563
+ }
1564
+ if (orig.nonzero_p ())
1565
+ {
1566
+ sreal scale
1567
+ = annotated.guessed_local ()
1568
+ .to_sreal_scale (orig);
1569
+ if (dump_file)
1570
+ fprintf (dump_file, " adding scale %.16f\n " ,
1571
+ scale.to_double ());
1572
+ scales->safe_push (scale);
1573
+ }
1574
+ }
1575
+
1576
+ /* Scale counts of all basic blocks in BBS by SCALE and convert them to
1577
+ IPA quality. */
1578
+
1579
+ static void
1580
+ scale_bbs (const vec <basic_block> &bbs, sreal scale)
1581
+ {
1582
+ if (dump_file)
1583
+ fprintf (dump_file, " Scaling by %.16f\n " , scale.to_double ());
1584
+ for (basic_block b : bbs)
1585
+ if (!(b->count == profile_count::zero ())
1586
+ && b->count .initialized_p ())
1587
+ {
1588
+ profile_count o = b->count ;
1589
+ b->count = b->count .force_guessed () * scale;
1590
+
1591
+ /* If we scaled to 0, make it auto-fdo since that is treated
1592
+ less agressively. */
1593
+ if (!b->count .nonzero_p () && o.nonzero_p ())
1594
+ b->count = profile_count::zero ().afdo ();
1595
+ if (dump_file)
1596
+ {
1597
+ fprintf (dump_file, " bb %i count updated " , b->index );
1598
+ o.dump (dump_file);
1599
+ fprintf (dump_file, " -> " );
1600
+ b->count .dump (dump_file);
1601
+ fprintf (dump_file, " \n " );
1602
+ }
1603
+ }
1604
+ }
1605
+
1552
1606
/* In case given basic block was fully optimized out, AutoFDO
1553
1607
will have no data about it. In this case try to preserve static profile.
1554
1608
Identify connected components (in undirected form of CFG) which has
@@ -1558,26 +1612,33 @@ cmp (const void *a, const void *b)
1558
1612
void
1559
1613
afdo_adjust_guessed_profile (bb_set *annotated_bb)
1560
1614
{
1561
- auto_sbitmap visited (last_basic_block_for_fn (cfun));
1562
1615
/* Basic blocks of connected component currently processed. */
1563
- auto_vec <basic_block, 20 > bbs (n_basic_blocks_for_fn (cfun) + 1 );
1616
+ auto_vec <basic_block, 20 > bbs (n_basic_blocks_for_fn (cfun));
1564
1617
/* Scale factors found. */
1565
- auto_vec <sreal, 20 > scales (n_basic_blocks_for_fn (cfun) + 1 );
1566
- auto_vec <basic_block, 20 > stack (n_basic_blocks_for_fn (cfun) + 1 );
1567
-
1568
- bitmap_clear (visited);
1618
+ auto_vec <sreal, 20 > scales;
1619
+ auto_vec <basic_block, 20 > stack (n_basic_blocks_for_fn (cfun));
1569
1620
1570
1621
basic_block seed_bb;
1571
- FOR_BB_BETWEEN (seed_bb, ENTRY_BLOCK_PTR_FOR_FN (cfun),
1572
- EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
1573
- if (!is_bb_annotated (seed_bb, *annotated_bb)
1574
- && bitmap_set_bit (visited, seed_bb->index ))
1622
+ unsigned int component_id = 1 ;
1623
+
1624
+ /* Map from basic block to its component.
1625
+ 0 is used for univisited BBs,
1626
+ 1 means that BB is annotated,
1627
+ >=2 is an id of the component BB belongs to. */
1628
+ auto_vec <unsigned int , 20 > component;
1629
+ component.safe_grow (last_basic_block_for_fn (cfun));
1630
+ FOR_ALL_BB_FN (seed_bb, cfun)
1631
+ component[seed_bb->index ]
1632
+ = is_bb_annotated (seed_bb, *annotated_bb) ? 1 : 0 ;
1633
+ FOR_ALL_BB_FN (seed_bb, cfun)
1634
+ if (!component[seed_bb->index ])
1575
1635
{
1576
- hash_set <basic_block> current_component;
1577
-
1578
1636
stack.quick_push (seed_bb);
1637
+ component_id++;
1579
1638
bbs.truncate (0 );
1580
1639
scales.truncate (0 );
1640
+ component[seed_bb->index ] = component_id;
1641
+ profile_count max_count = profile_count::zero ();
1581
1642
1582
1643
/* Identify connected component starting in BB. */
1583
1644
if (dump_file)
@@ -1588,19 +1649,33 @@ afdo_adjust_guessed_profile (bb_set *annotated_bb)
1588
1649
basic_block b = stack.pop ();
1589
1650
1590
1651
bbs.quick_push (b);
1591
- current_component. add (b);
1652
+ max_count = max_count. max (b-> count );
1592
1653
1593
1654
for (edge e: b->preds )
1594
- if (!is_bb_annotated (e->src , *annotated_bb)
1595
- && bitmap_set_bit (visited, e->src ->index ))
1596
- stack.quick_push (e->src );
1655
+ if (!component[e->src ->index ])
1656
+ {
1657
+ stack.quick_push (e->src );
1658
+ component[e->src ->index ] = component_id;
1659
+ }
1597
1660
for (edge e: b->succs )
1598
- if (!is_bb_annotated (e->dest , *annotated_bb)
1599
- && bitmap_set_bit (visited, e->dest ->index ))
1600
- stack.quick_push (e->dest );
1661
+ if (!component[e->dest ->index ])
1662
+ {
1663
+ stack.quick_push (e->dest );
1664
+ component[e->dest ->index ] = component_id;
1665
+ }
1601
1666
}
1602
1667
while (!stack.is_empty ());
1603
1668
1669
+ /* If all blocks in components has 0 count, we do not need
1670
+ to scale, only we must convert to IPA quality. */
1671
+ if (!max_count.nonzero_p ())
1672
+ {
1673
+ if (dump_file)
1674
+ fprintf (dump_file, " All counts are 0; scale = 1\n " );
1675
+ scale_bbs (bbs, 1 );
1676
+ continue ;
1677
+ }
1678
+
1604
1679
/* Now visit the component and try to figure out its desired
1605
1680
frequency. */
1606
1681
for (basic_block b : bbs)
@@ -1653,84 +1728,83 @@ afdo_adjust_guessed_profile (bb_set *annotated_bb)
1653
1728
}
1654
1729
else
1655
1730
{
1656
- gcc_checking_assert (current_component.contains (e->src ));
1657
1731
current_component_count += e->count ();
1732
+ gcc_checking_assert (component[e->src ->index ] == component_id);
1658
1733
}
1659
1734
if (boundary && current_component_count.initialized_p ())
1660
1735
{
1661
- profile_count in_count = b->count - current_component_count;
1662
1736
if (dump_file)
1663
- {
1664
- fprintf (dump_file, " bb %i in count " , b->index );
1665
- in_count.dump (dump_file);
1666
- fprintf (dump_file, " should be " );
1667
- annotated_count.dump (dump_file);
1668
- fprintf (dump_file, " \n " );
1669
- }
1670
- if (in_count.nonzero_p ())
1671
- {
1672
- sreal scale
1673
- = annotated_count.guessed_local ()
1674
- .to_sreal_scale (in_count);
1675
- if (dump_file)
1676
- fprintf (dump_file, " adding scale %.16f\n " ,
1677
- scale.to_double ());
1678
- scales.safe_push (scale);
1679
- }
1737
+ fprintf (dump_file, " bb %i in count " , b->index );
1738
+ add_scale (&scales,
1739
+ annotated_count,
1740
+ b->count - current_component_count);
1680
1741
}
1681
1742
for (edge e: b->succs )
1682
1743
if (AFDO_EINFO (e)->is_annotated ())
1683
1744
{
1684
- profile_count out_count = e->count ();
1685
- profile_count annotated_count = AFDO_EINFO (e)->get_count ();
1686
1745
if (dump_file)
1687
- {
1688
- fprintf (dump_file, " edge %i->%i count " ,
1689
- b->index , e->dest ->index );
1690
- out_count.dump (dump_file);
1691
- fprintf (dump_file, " should be " );
1692
- annotated_count.dump (dump_file);
1693
- fprintf (dump_file, " \n " );
1694
- }
1695
- if (out_count.nonzero_p ())
1696
- {
1697
- sreal scale
1698
- = annotated_count.guessed_local ()
1699
- .to_sreal_scale (out_count);
1700
- if (dump_file)
1701
- fprintf (dump_file, " adding scale %.16f\n " ,
1702
- scale.to_double ());
1703
- scales.safe_push (scale);
1704
- }
1746
+ fprintf (dump_file, " edge %i->%i count " ,
1747
+ b->index , e->dest ->index );
1748
+ add_scale (&scales, AFDO_EINFO (e)->get_count (), e->count ());
1749
+ }
1750
+ else if (is_bb_annotated (e->dest , *annotated_bb))
1751
+ {
1752
+ profile_count annotated_count = e->dest ->count ;
1753
+ profile_count out_count = profile_count::zero ();
1754
+ bool ok = true ;
1755
+ for (edge e2 : e->dest ->preds )
1756
+ if (AFDO_EINFO (e2 )->is_annotated ())
1757
+ annotated_count -= AFDO_EINFO (e2 )->get_count ();
1758
+ else if (component[e->src ->index ] == component_id)
1759
+ out_count += e->count ();
1760
+ else if (e->probability .nonzero_p ())
1761
+ {
1762
+ ok = false ;
1763
+ break ;
1764
+ }
1765
+ if (!ok)
1766
+ continue ;
1767
+ if (dump_file)
1768
+ fprintf (dump_file,
1769
+ " edge %i->%i has annotated sucessor; count " ,
1770
+ b->index , e->dest ->index );
1771
+ add_scale (&scales, annotated_count, e->count ());
1705
1772
}
1706
1773
1707
1774
}
1775
+
1776
+ /* If we failed to find annotated entry or exit edge,
1777
+ look for exit edges and scale profile so the dest
1778
+ BB get all flow it needs. This is inprecise because
1779
+ the edge is not annotated and thus BB has more than
1780
+ one such predecessor. */
1781
+ if (!scales.length ())
1782
+ for (basic_block b : bbs)
1783
+ if (b->count .nonzero_p ())
1784
+ for (edge e: b->succs )
1785
+ if (is_bb_annotated (e->dest , *annotated_bb))
1786
+ {
1787
+ profile_count annotated_count = e->dest ->count ;
1788
+ for (edge e2 : e->dest ->preds )
1789
+ if (AFDO_EINFO (e2 )->is_annotated ())
1790
+ annotated_count -= AFDO_EINFO (e2 )->get_count ();
1791
+ if (dump_file)
1792
+ fprintf (dump_file,
1793
+ " edge %i->%i has annotated sucessor;"
1794
+ " upper bound count " ,
1795
+ b->index , e->dest ->index );
1796
+ add_scale (&scales, annotated_count, e->count ());
1797
+ }
1708
1798
if (!scales.length ())
1709
- continue ;
1799
+ {
1800
+ if (dump_file)
1801
+ fprintf (dump_file,
1802
+ " Can not determine count from the boundary; giving up" );
1803
+ continue ;
1804
+ }
1805
+ gcc_checking_assert (scales.length ());
1710
1806
scales.qsort (cmp);
1711
- sreal scale = scales[scales.length () / 2 ];
1712
- if (dump_file)
1713
- fprintf (dump_file, " Scaling by %.16f\n " , scale.to_double ());
1714
- for (basic_block b : bbs)
1715
- if (!(b->count == profile_count::zero ())
1716
- && b->count .initialized_p ())
1717
- {
1718
- profile_count o = b->count ;
1719
- b->count = b->count .force_guessed () * scale;
1720
-
1721
- /* If we scaled to 0, make it auto-fdo since that is treated
1722
- less agressively. */
1723
- if (!b->count .nonzero_p () && o.nonzero_p ())
1724
- b->count = profile_count::zero ().afdo ();
1725
- if (dump_file)
1726
- {
1727
- fprintf (dump_file, " bb %i count updated " , b->index );
1728
- o.dump (dump_file);
1729
- fprintf (dump_file, " -> " );
1730
- b->count .dump (dump_file);
1731
- fprintf (dump_file, " \n " );
1732
- }
1733
- }
1807
+ scale_bbs (bbs, scales[scales.length () / 2 ]);
1734
1808
}
1735
1809
}
1736
1810
0 commit comments