Skip to content

Commit 55f083a

Browse files
authored
Add a script for diffing cachegrind output (grpc#1086)
Motivation: When profiling with cachegrind it's often useful to compare changes across runs. It's reasonably easy to do this for program totals but a little harder to see changes between instructions. Modifications: Add a script for diffing output from cachegrind. Features: - runs input files through swift demangle (unless you tell it not to) - sort on file1, file2, or the delta - ignores changes less than a configurable low watermark (configurable) Result: Easier to diff output from cachegrind
1 parent 31dffb8 commit 55f083a

File tree

1 file changed

+338
-0
lines changed

1 file changed

+338
-0
lines changed

scripts/cg_diff.py

Lines changed: 338 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,338 @@
1+
#!/usr/bin/env python3
2+
3+
# Copyright 2020, gRPC Authors All rights reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
import argparse
17+
import enum
18+
import os
19+
import subprocess
20+
import sys
21+
22+
23+
class State(enum.Enum):
24+
READING_HEADERS = enum.auto()
25+
READING_INSTRUCTION = enum.auto()
26+
READING_COUNTS = enum.auto()
27+
READING_SUMMARY = enum.auto()
28+
29+
30+
class InstructionCounts(object):
31+
def __init__(self, events):
32+
self._events = events
33+
self._counts = {}
34+
35+
@property
36+
def events(self):
37+
return self._events
38+
39+
@property
40+
def instructions(self):
41+
return self._counts.keys()
42+
43+
def add(self, instruction, counts):
44+
"""Add a list of counts or the given instruction."""
45+
if instruction in self._counts:
46+
existing = self._counts[instruction]
47+
self._counts[instruction] = [a + b for (a, b) in zip(existing, counts)]
48+
else:
49+
self._counts[instruction] = counts
50+
51+
def count(self, instruction, event):
52+
"""The number of occurrences of the event for the given instruction."""
53+
counts = self._counts.get(instruction)
54+
index = self._events.index(event)
55+
if counts:
56+
return counts[index]
57+
else:
58+
return 0
59+
60+
def aggregate(self):
61+
"""Aggregates event counts over all instructions."""
62+
return [sum(x) for x in zip(*self._counts.values())]
63+
64+
def aggregate_by_event(self, event):
65+
"""Aggregates event counts over all instructions for a given event."""
66+
return self.aggregate_by_index(self._events.index(event))
67+
68+
def aggregate_by_index(self, index):
69+
"""Aggregates event counts over all instructions for the event at the given index."""
70+
return sum(x[index] for x in self._counts.values())
71+
72+
73+
class Parser(object):
74+
HEADERS = ["desc:", "cmd:"]
75+
76+
def __init__(self):
77+
# Parsing state.
78+
self._state = State.READING_HEADERS
79+
# File for current instruction
80+
self._file = None
81+
# Function for current instruction
82+
self._function = None
83+
# Instruction counts
84+
self._counts = None
85+
86+
@property
87+
def counts(self):
88+
return self._counts
89+
90+
@property
91+
def _key(self):
92+
fl = "???" if self._file is None else self._file
93+
fn = "???" if self._function is None else self._function
94+
return fl + ":" + fn
95+
96+
### Helpers
97+
98+
def _is_header(self, line):
99+
return any(line.startswith(p) for p in Parser.HEADERS)
100+
101+
def _read_events_header(self, line):
102+
if line.startswith("events:"):
103+
self._counts = InstructionCounts(line[7:].strip().split(" "))
104+
return True
105+
else:
106+
return False
107+
108+
def _read_function(self, line):
109+
if not line.startswith("fn="):
110+
return None
111+
return line[3:].strip()
112+
113+
def _read_file(self, line):
114+
if not line.startswith("fl="):
115+
return None
116+
return line[3:].strip()
117+
118+
def _read_file_or_function(self, line, reset_instruction=False):
119+
function = self._read_function(line)
120+
if function is not None:
121+
self._function = function
122+
self._file = None if reset_instruction else self._file
123+
return State.READING_INSTRUCTION
124+
125+
file = self._read_file(line)
126+
if file is not None:
127+
self._file = file
128+
self._function = None if reset_instruction else self._function
129+
return State.READING_INSTRUCTION
130+
131+
return None
132+
133+
### Section parsing
134+
135+
def _read_headers(self, line):
136+
if self._read_events_header(line) or self._is_header(line):
137+
# Still reading headers.
138+
return State.READING_HEADERS
139+
140+
# Not a header, maybe a file or function.
141+
next_state = self._read_file_or_function(line)
142+
if next_state is None:
143+
raise RuntimeWarning("Unhandled line:", line)
144+
145+
return next_state
146+
147+
def _read_instruction(self, line, reset_instruction=False):
148+
next_state = self._read_file_or_function(line, reset_instruction)
149+
if next_state is not None:
150+
return next_state
151+
152+
if self._read_summary(line):
153+
return State.READING_SUMMARY
154+
155+
return self._read_counts(line)
156+
157+
def _read_counts(self, line):
158+
# Drop the line number
159+
counts = [int(x) for x in line.split(" ")][1:]
160+
self._counts.add(self._key, counts)
161+
return State.READING_COUNTS
162+
163+
def _read_summary(self, line):
164+
if line.startswith("summary:"):
165+
summary = [int(x) for x in line[8:].strip().split(" ")]
166+
computed_summary = self._counts.aggregate()
167+
assert summary == computed_summary
168+
return True
169+
else:
170+
return False
171+
172+
### Parse
173+
174+
def parse(self, file, demangle):
175+
"""Parse the given file."""
176+
with open(file) as fh:
177+
if demangle:
178+
demangled = subprocess.check_output(["swift", "demangle"], stdin=fh)
179+
self._parse_lines(x.decode("utf-8") for x in demangled.splitlines())
180+
else:
181+
self._parse_lines(fh)
182+
183+
return self._counts
184+
185+
def _parse_lines(self, lines):
186+
for line in lines:
187+
self._next_line(line)
188+
189+
def _next_line(self, line):
190+
"""Parses a line of input."""
191+
if self._state is State.READING_HEADERS:
192+
self._state = self._read_headers(line)
193+
elif self._state is State.READING_INSTRUCTION:
194+
self._state = self._read_instruction(line)
195+
elif self._state is State.READING_COUNTS:
196+
self._state = self._read_instruction(line, reset_instruction=True)
197+
elif self._state is State.READING_SUMMARY:
198+
# We're done.
199+
return
200+
else:
201+
raise RuntimeError("Unexpected state", self._state)
202+
203+
204+
def parse(filename, demangle):
205+
parser = Parser()
206+
return parser.parse(filename, demangle)
207+
208+
209+
def print_summary(args):
210+
# No need to demangle for summary.
211+
counts1 = parse(args.file1, False)
212+
aggregate1 = counts1.aggregate_by_event(args.event)
213+
counts2 = parse(args.file2, False)
214+
aggregate2 = counts2.aggregate_by_event(args.event)
215+
216+
delta = aggregate2 - aggregate1
217+
pc = 100.0 * delta / aggregate1
218+
print("{:16,} {}".format(aggregate1, os.path.basename(args.file1)))
219+
print("{:16,} {}".format(aggregate2, os.path.basename(args.file2)))
220+
print("{:+16,} ({:+.3f}%)".format(delta, pc))
221+
222+
223+
def print_diff_table(args):
224+
counts1 = parse(args.file1, args.demangle)
225+
aggregate1 = counts1.aggregate_by_event(args.event)
226+
counts2 = parse(args.file2, args.demangle)
227+
aggregate2 = counts2.aggregate_by_event(args.event)
228+
229+
file1_total = aggregate1
230+
diffs = []
231+
232+
def _count(key, counts):
233+
block = counts.get(key)
234+
return 0 if block is None else block.counts[0]
235+
236+
def _row(c1, c2, key):
237+
delta = c2 - c1
238+
delta_pc = 100.0 * (delta / file1_total)
239+
return (c1, c2, delta, delta_pc, key)
240+
241+
def _row_for_key(key):
242+
c1 = counts1.count(key, args.event)
243+
c2 = counts2.count(key, args.event)
244+
return _row(c1, c2, key)
245+
246+
if args.only_common:
247+
keys = counts1.instructions & counts2.instructions
248+
else:
249+
keys = counts1.instructions | counts2.instructions
250+
251+
rows = [_row_for_key(k) for k in keys]
252+
rows.append(_row(aggregate1, aggregate2, "PROGRAM TOTALS"))
253+
254+
print(
255+
" | ".join(
256+
[
257+
"file1".rjust(14),
258+
"file2".rjust(14),
259+
"delta".rjust(14),
260+
"%".rjust(7),
261+
"name",
262+
]
263+
)
264+
)
265+
266+
index = _sort_index(args.sort)
267+
reverse = not args.ascending
268+
sorted_rows = sorted(rows, key=lambda x: x[index], reverse=reverse)
269+
for (c1, c2, delta, delta_pc, key) in sorted_rows:
270+
if abs(delta_pc) >= args.low_watermark:
271+
print(
272+
" | ".join(
273+
[
274+
"{:14,}".format(c1),
275+
"{:14,}".format(c2),
276+
"{:+14,}".format(delta),
277+
"{:+7.3f}".format(delta_pc),
278+
key,
279+
]
280+
)
281+
)
282+
283+
284+
def _sort_index(key):
285+
return ("file1", "file2", "delta").index(key)
286+
287+
288+
if __name__ == "__main__":
289+
parser = argparse.ArgumentParser("cg_diff.py")
290+
291+
parser.add_argument(
292+
"--sort",
293+
choices=("file1", "file2", "delta"),
294+
default="file1",
295+
help="The column to sort on.",
296+
)
297+
298+
parser.add_argument(
299+
"--ascending", action="store_true", help="Sorts in ascending order."
300+
)
301+
302+
parser.add_argument(
303+
"--only-common",
304+
action="store_true",
305+
help="Only print instructions present in both files.",
306+
)
307+
308+
parser.add_argument(
309+
"--no-demangle",
310+
action="store_false",
311+
dest="demangle",
312+
help="Disables demangling of input files.",
313+
)
314+
315+
parser.add_argument("--event", default="Ir", help="The event to compare.")
316+
317+
parser.add_argument(
318+
"--low-watermark",
319+
type=float,
320+
default=0.01,
321+
help="A low watermark, percentage changes in counts "
322+
"relative to the total instruction count of "
323+
"file1 below this value will not be printed.",
324+
)
325+
326+
parser.add_argument(
327+
"--summary", action="store_true", help="Prints a summary of the diff."
328+
)
329+
330+
parser.add_argument("file1")
331+
parser.add_argument("file2")
332+
333+
args = parser.parse_args()
334+
335+
if args.summary:
336+
print_summary(args)
337+
else:
338+
print_diff_table(args)

0 commit comments

Comments
 (0)