Skip to content

Commit d6bc681

Browse files
geordiKobzol
andcommitted
Store metrics from metrics.json into PGO CI timer
Co-authored-by: Jakub Beránek <[email protected]>
1 parent 0b439b1 commit d6bc681

File tree

1 file changed

+102
-34
lines changed

1 file changed

+102
-34
lines changed

src/ci/stage-build.py

+102-34
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import contextlib
77
import getpass
88
import glob
9+
import json
910
import logging
1011
import os
1112
import pprint
@@ -17,7 +18,8 @@
1718
import urllib.request
1819
from io import StringIO
1920
from pathlib import Path
20-
from typing import Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union
21+
from typing import Callable, ContextManager, Dict, Iterable, Iterator, List, Optional, \
22+
Tuple, Union
2123

2224
PGO_HOST = os.environ["PGO_HOST"]
2325

@@ -115,6 +117,9 @@ def supports_bolt(self) -> bool:
115117
def llvm_bolt_profile_merged_file(self) -> Path:
116118
return self.opt_artifacts() / "bolt.profdata"
117119

120+
def metrics_path(self) -> Path:
121+
return self.build_root() / "build" / "metrics.json"
122+
118123

119124
class LinuxPipeline(Pipeline):
120125
def checkout_path(self) -> Path:
@@ -208,32 +213,27 @@ def get_timestamp() -> float:
208213

209214

210215
Duration = float
211-
TimerSection = Union[Duration, "Timer"]
212216

213217

214-
def iterate_sections(section: TimerSection, name: str, level: int = 0) -> Iterator[
218+
def iterate_timers(timer: "Timer", name: str, level: int = 0) -> Iterator[
215219
Tuple[int, str, Duration]]:
216220
"""
217-
Hierarchically iterate the sections of a timer, in a depth-first order.
221+
Hierarchically iterate the children of a timer, in a depth-first order.
218222
"""
219-
if isinstance(section, Duration):
220-
yield (level, name, section)
221-
elif isinstance(section, Timer):
222-
yield (level, name, section.total_duration())
223-
for (child_name, child_section) in section.sections:
224-
yield from iterate_sections(child_section, child_name, level=level + 1)
225-
else:
226-
assert False
223+
yield (level, name, timer.total_duration())
224+
for (child_name, child_timer) in timer.children:
225+
yield from iterate_timers(child_timer, child_name, level=level + 1)
227226

228227

229228
class Timer:
230229
def __init__(self, parent_names: Tuple[str, ...] = ()):
231-
self.sections: List[Tuple[str, TimerSection]] = []
230+
self.children: List[Tuple[str, Timer]] = []
232231
self.section_active = False
233232
self.parent_names = parent_names
233+
self.duration_excluding_children: Duration = 0
234234

235235
@contextlib.contextmanager
236-
def section(self, name: str) -> "Timer":
236+
def section(self, name: str) -> ContextManager["Timer"]:
237237
assert not self.section_active
238238
self.section_active = True
239239

@@ -252,33 +252,26 @@ def section(self, name: str) -> "Timer":
252252
end = get_timestamp()
253253
duration = end - start
254254

255-
if child_timer.has_children():
256-
self.sections.append((name, child_timer))
257-
else:
258-
self.sections.append((name, duration))
255+
child_timer.duration_excluding_children = duration - child_timer.total_duration()
256+
self.add_child(name, child_timer)
259257
if exc is None:
260258
LOGGER.info(f"Section `{full_name}` ended: OK ({duration:.2f}s)")
261259
else:
262260
LOGGER.info(f"Section `{full_name}` ended: FAIL ({duration:.2f}s)")
263261
self.section_active = False
264262

265263
def total_duration(self) -> Duration:
266-
duration = 0
267-
for (_, section) in self.sections:
268-
if isinstance(section, Duration):
269-
duration += section
270-
else:
271-
duration += section.total_duration()
272-
return duration
264+
return self.duration_excluding_children + sum(
265+
c.total_duration() for (_, c) in self.children)
273266

274267
def has_children(self) -> bool:
275-
return len(self.sections) > 0
268+
return len(self.children) > 0
276269

277270
def print_stats(self):
278271
rows = []
279-
for (child_name, child_section) in self.sections:
280-
for (level, name, duration) in iterate_sections(child_section, child_name, level=0):
281-
label = f"{' ' * level}{name}:"
272+
for (child_name, child_timer) in self.children:
273+
for (level, name, duration) in iterate_timers(child_timer, child_name, level=0):
274+
label = f"{' ' * level}{name}:"
282275
rows.append((label, duration))
283276

284277
# Empty row
@@ -306,6 +299,60 @@ def print_stats(self):
306299
print(divider, file=output, end="")
307300
LOGGER.info(f"Timer results\n{output.getvalue()}")
308301

302+
def add_child(self, name: str, timer: "Timer"):
303+
self.children.append((name, timer))
304+
305+
def add_duration(self, name: str, duration: Duration):
306+
timer = Timer(parent_names=self.parent_names + (name,))
307+
timer.duration_excluding_children = duration
308+
self.add_child(name, timer)
309+
310+
311+
class BuildStep:
312+
def __init__(self, type: str, children: List["BuildStep"], duration: float):
313+
self.type = type
314+
self.children = children
315+
self.duration = duration
316+
317+
def find_all_by_type(self, type: str) -> Iterator["BuildStep"]:
318+
if type == self.type:
319+
yield self
320+
for child in self.children:
321+
yield from child.find_all_by_type(type)
322+
323+
def __repr__(self):
324+
return f"BuildStep(type={self.type}, duration={self.duration}, children={len(self.children)})"
325+
326+
327+
def load_last_metrics(path: Path) -> BuildStep:
328+
"""
329+
Loads the metrics of the most recent bootstrap execution from a metrics.json file.
330+
"""
331+
with open(path, "r") as f:
332+
metrics = json.load(f)
333+
invocation = metrics["invocations"][-1]
334+
335+
def parse(entry) -> Optional[BuildStep]:
336+
if "kind" not in entry or entry["kind"] != "rustbuild_step":
337+
return None
338+
type = entry.get("type", "")
339+
duration = entry.get("duration_excluding_children_sec", 0)
340+
children = []
341+
342+
for child in entry.get("children", ()):
343+
step = parse(child)
344+
if step is not None:
345+
children.append(step)
346+
duration += step.duration
347+
return BuildStep(type=type, children=children, duration=duration)
348+
349+
children = [parse(child) for child in invocation.get("children", ())]
350+
return BuildStep(
351+
type="root",
352+
children=children,
353+
duration=invocation.get("duration_including_children_sec", 0)
354+
)
355+
309356

310357
@contextlib.contextmanager
311358
def change_cwd(dir: Path):
@@ -645,7 +692,7 @@ def print_binary_sizes(pipeline: Pipeline):
645692
with StringIO() as output:
646693
for path in paths:
647694
path_str = f"{path.name}:"
648-
print(f"{path_str:<30}{format_bytes(path.stat().st_size):>14}", file=output)
695+
print(f"{path_str:<50}{format_bytes(path.stat().st_size):>14}", file=output)
649696
LOGGER.info(f"Rustc binary size\n{output.getvalue()}")
650697

651698

@@ -659,6 +706,22 @@ def print_free_disk_space(pipeline: Pipeline):
659706
f"Free disk space: {format_bytes(free)} out of total {format_bytes(total)} ({(used / total) * 100:.2f}% used)")
660707

661708

709+
def record_metrics(pipeline: Pipeline, timer: Timer):
710+
metrics = load_last_metrics(pipeline.metrics_path())
711+
if metrics is None:
712+
return
713+
llvm_steps = metrics.find_all_by_type("bootstrap::native::Llvm")
714+
llvm_duration = sum(step.duration for step in llvm_steps)
715+
rustc_steps = metrics.find_all_by_type("bootstrap::compile::Rustc")
716+
rustc_duration = sum(step.duration for step in rustc_steps)
717+
718+
# The LLVM step is part of the Rustc step
719+
rustc_duration -= llvm_duration
720+
721+
timer.add_duration("LLVM", llvm_duration)
722+
timer.add_duration("Rustc", rustc_duration)
723+
724+
662725
def execute_build_pipeline(timer: Timer, pipeline: Pipeline, final_build_args: List[str]):
663726
# Clear and prepare tmp directory
664727
shutil.rmtree(pipeline.opt_artifacts(), ignore_errors=True)
@@ -668,12 +731,13 @@ def execute_build_pipeline(timer: Timer, pipeline: Pipeline, final_build_args: L
668731

669732
# Stage 1: Build rustc + PGO instrumented LLVM
670733
with timer.section("Stage 1 (LLVM PGO)") as stage1:
671-
with stage1.section("Build rustc and LLVM"):
734+
with stage1.section("Build rustc and LLVM") as rustc_build:
672735
build_rustc(pipeline, args=[
673736
"--llvm-profile-generate"
674737
], env=dict(
675738
LLVM_PROFILE_DIR=str(pipeline.llvm_profile_dir_root() / "prof-%p")
676739
))
740+
record_metrics(pipeline, rustc_build)
677741

678742
with stage1.section("Gather profiles"):
679743
gather_llvm_profiles(pipeline)
@@ -687,11 +751,12 @@ def execute_build_pipeline(timer: Timer, pipeline: Pipeline, final_build_args: L
687751

688752
# Stage 2: Build PGO instrumented rustc + LLVM
689753
with timer.section("Stage 2 (rustc PGO)") as stage2:
690-
with stage2.section("Build rustc and LLVM"):
754+
with stage2.section("Build rustc and LLVM") as rustc_build:
691755
build_rustc(pipeline, args=[
692756
"--rust-profile-generate",
693757
pipeline.rustc_profile_dir_root()
694758
])
759+
record_metrics(pipeline, rustc_build)
695760

696761
with stage2.section("Gather profiles"):
697762
gather_rustc_profiles(pipeline)
@@ -706,12 +771,14 @@ def execute_build_pipeline(timer: Timer, pipeline: Pipeline, final_build_args: L
706771
# Stage 3: Build rustc + BOLT instrumented LLVM
707772
if pipeline.supports_bolt():
708773
with timer.section("Stage 3 (LLVM BOLT)") as stage3:
709-
with stage3.section("Build rustc and LLVM"):
774+
with stage3.section("Build rustc and LLVM") as rustc_build:
710775
build_rustc(pipeline, args=[
711776
"--llvm-profile-use",
712777
pipeline.llvm_profile_merged_file(),
713778
"--llvm-bolt-profile-generate",
714779
])
780+
record_metrics(pipeline, rustc_build)
781+
715782
with stage3.section("Gather profiles"):
716783
gather_llvm_bolt_profiles(pipeline)
717784

@@ -723,8 +790,9 @@ def execute_build_pipeline(timer: Timer, pipeline: Pipeline, final_build_args: L
723790
]
724791

725792
# Stage 4: Build PGO optimized rustc + PGO/BOLT optimized LLVM
726-
with timer.section("Stage 4 (final build)"):
793+
with timer.section("Stage 4 (final build)") as stage4:
727794
cmd(final_build_args)
795+
record_metrics(pipeline, stage4)
728796

729797

730798
if __name__ == "__main__":

0 commit comments

Comments
 (0)