Add a script for diffing cachegrind output (grpc#1086)

glbrntt · web-flow · commit 55f083ada102 · 2020-12-18T15:17:16.000Z
Motivation:

When profiling with cachegrind it's often useful to compare changes
across runs. It's reasonably easy to do this for program totals but a
little harder to see changes between instructions.

Modifications:

Add a script for diffing output from cachegrind. Features:
- runs input files through swift demangle (unless you tell it not to)
- sort on file1, file2, or the delta
- ignores changes less than a configurable low watermark (configurable)

Result:

Easier to diff output from cachegrind
diff --git a/scripts/cg_diff.py b/scripts/cg_diff.py
@@ -0,0 +1,338 @@
+#!/usr/bin/env python3
+
+# Copyright 2020, gRPC Authors All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import enum
+import os
+import subprocess
+import sys
+
+
+class State(enum.Enum):
+    READING_HEADERS = enum.auto()
+    READING_INSTRUCTION = enum.auto()
+    READING_COUNTS = enum.auto()
+    READING_SUMMARY = enum.auto()
+
+
+class InstructionCounts(object):
+    def __init__(self, events):
+        self._events = events
+        self._counts = {}
+
+    @property
+    def events(self):
+        return self._events
+
+    @property
+    def instructions(self):
+        return self._counts.keys()
+
+    def add(self, instruction, counts):
+        """Add a list of counts or the given instruction."""
+        if instruction in self._counts:
+            existing = self._counts[instruction]
+            self._counts[instruction] = [a + b for (a, b) in zip(existing, counts)]
+        else:
+            self._counts[instruction] = counts
+
+    def count(self, instruction, event):
+        """The number of occurrences of the event for the given instruction."""
+        counts = self._counts.get(instruction)
+        index = self._events.index(event)
+        if counts:
+            return counts[index]
+        else:
+            return 0
+
+    def aggregate(self):
+        """Aggregates event counts over all instructions."""
+        return [sum(x) for x in zip(*self._counts.values())]
+
+    def aggregate_by_event(self, event):
+        """Aggregates event counts over all instructions for a given event."""
+        return self.aggregate_by_index(self._events.index(event))
+
+    def aggregate_by_index(self, index):
+        """Aggregates event counts over all instructions for the event at the given index."""
+        return sum(x[index] for x in self._counts.values())
+
+
+class Parser(object):
+    HEADERS = ["desc:", "cmd:"]
+
+    def __init__(self):
+        # Parsing state.
+        self._state = State.READING_HEADERS
+        # File for current instruction
+        self._file = None
+        # Function for current instruction
+        self._function = None
+        # Instruction counts
+        self._counts = None
+
+    @property
+    def counts(self):
+        return self._counts
+
+    @property
+    def _key(self):
+        fl = "???" if self._file is None else self._file
+        fn = "???" if self._function is None else self._function
+        return fl + ":" + fn
+
+    ### Helpers
+
+    def _is_header(self, line):
+        return any(line.startswith(p) for p in Parser.HEADERS)
+
+    def _read_events_header(self, line):
+        if line.startswith("events:"):
+            self._counts = InstructionCounts(line[7:].strip().split(" "))
+            return True
+        else:
+            return False
+
+    def _read_function(self, line):
+        if not line.startswith("fn="):
+            return None
+        return line[3:].strip()
+
+    def _read_file(self, line):
+        if not line.startswith("fl="):
+            return None
+        return line[3:].strip()
+
+    def _read_file_or_function(self, line, reset_instruction=False):
+        function = self._read_function(line)
+        if function is not None:
+            self._function = function
+            self._file = None if reset_instruction else self._file
+            return State.READING_INSTRUCTION
+
+        file = self._read_file(line)
+        if file is not None:
+            self._file = file
+            self._function = None if reset_instruction else self._function
+            return State.READING_INSTRUCTION
+
+        return None
+
+    ### Section parsing
+
+    def _read_headers(self, line):
+        if self._read_events_header(line) or self._is_header(line):
+            # Still reading headers.
+            return State.READING_HEADERS
+
+        # Not a header, maybe a file or function.
+        next_state = self._read_file_or_function(line)
+        if next_state is None:
+            raise RuntimeWarning("Unhandled line:", line)
+
+        return next_state
+
+    def _read_instruction(self, line, reset_instruction=False):
+        next_state = self._read_file_or_function(line, reset_instruction)
+        if next_state is not None:
+            return next_state
+
+        if self._read_summary(line):
+            return State.READING_SUMMARY
+
+        return self._read_counts(line)
+
+    def _read_counts(self, line):
+        # Drop the line number
+        counts = [int(x) for x in line.split(" ")][1:]
+        self._counts.add(self._key, counts)
+        return State.READING_COUNTS
+
+    def _read_summary(self, line):
+        if line.startswith("summary:"):
+            summary = [int(x) for x in line[8:].strip().split(" ")]
+            computed_summary = self._counts.aggregate()
+            assert summary == computed_summary
+            return True
+        else:
+            return False
+
+    ### Parse
+
+    def parse(self, file, demangle):
+        """Parse the given file."""
+        with open(file) as fh:
+            if demangle:
+                demangled = subprocess.check_output(["swift", "demangle"], stdin=fh)
+                self._parse_lines(x.decode("utf-8") for x in demangled.splitlines())
+            else:
+                self._parse_lines(fh)
+
+        return self._counts
+
+    def _parse_lines(self, lines):
+        for line in lines:
+            self._next_line(line)
+
+    def _next_line(self, line):
+        """Parses a line of input."""
+        if self._state is State.READING_HEADERS:
+            self._state = self._read_headers(line)
+        elif self._state is State.READING_INSTRUCTION:
+            self._state = self._read_instruction(line)
+        elif self._state is State.READING_COUNTS:
+            self._state = self._read_instruction(line, reset_instruction=True)
+        elif self._state is State.READING_SUMMARY:
+            # We're done.
+            return
+        else:
+            raise RuntimeError("Unexpected state", self._state)
+
+
+def parse(filename, demangle):
+    parser = Parser()
+    return parser.parse(filename, demangle)
+
+
+def print_summary(args):
+    # No need to demangle for summary.
+    counts1 = parse(args.file1, False)
+    aggregate1 = counts1.aggregate_by_event(args.event)
+    counts2 = parse(args.file2, False)
+    aggregate2 = counts2.aggregate_by_event(args.event)
+
+    delta = aggregate2 - aggregate1
+    pc = 100.0 * delta / aggregate1
+    print("{:16,} {}".format(aggregate1, os.path.basename(args.file1)))
+    print("{:16,} {}".format(aggregate2, os.path.basename(args.file2)))
+    print("{:+16,} ({:+.3f}%)".format(delta, pc))
+
+
+def print_diff_table(args):
+    counts1 = parse(args.file1, args.demangle)
+    aggregate1 = counts1.aggregate_by_event(args.event)
+    counts2 = parse(args.file2, args.demangle)
+    aggregate2 = counts2.aggregate_by_event(args.event)
+
+    file1_total = aggregate1
+    diffs = []
+
+    def _count(key, counts):
+        block = counts.get(key)
+        return 0 if block is None else block.counts[0]
+
+    def _row(c1, c2, key):
+        delta = c2 - c1
+        delta_pc = 100.0 * (delta / file1_total)
+        return (c1, c2, delta, delta_pc, key)
+
+    def _row_for_key(key):
+        c1 = counts1.count(key, args.event)
+        c2 = counts2.count(key, args.event)
+        return _row(c1, c2, key)
+
+    if args.only_common:
+        keys = counts1.instructions & counts2.instructions
+    else:
+        keys = counts1.instructions | counts2.instructions
+
+    rows = [_row_for_key(k) for k in keys]
+    rows.append(_row(aggregate1, aggregate2, "PROGRAM TOTALS"))
+
+    print(
+        " | ".join(
+            [
+                "file1".rjust(14),
+                "file2".rjust(14),
+                "delta".rjust(14),
+                "%".rjust(7),
+                "name",
+            ]
+        )
+    )
+
+    index = _sort_index(args.sort)
+    reverse = not args.ascending
+    sorted_rows = sorted(rows, key=lambda x: x[index], reverse=reverse)
+    for (c1, c2, delta, delta_pc, key) in sorted_rows:
+        if abs(delta_pc) >= args.low_watermark:
+            print(
+                " | ".join(
+                    [
+                        "{:14,}".format(c1),
+                        "{:14,}".format(c2),
+                        "{:+14,}".format(delta),
+                        "{:+7.3f}".format(delta_pc),
+                        key,
+                    ]
+                )
+            )
+
+
+def _sort_index(key):
+    return ("file1", "file2", "delta").index(key)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser("cg_diff.py")
+
+    parser.add_argument(
+        "--sort",
+        choices=("file1", "file2", "delta"),
+        default="file1",
+        help="The column to sort on.",
+    )
+
+    parser.add_argument(
+        "--ascending", action="store_true", help="Sorts in ascending order."
+    )
+
+    parser.add_argument(
+        "--only-common",
+        action="store_true",
+        help="Only print instructions present in both files.",
+    )
+
+    parser.add_argument(
+        "--no-demangle",
+        action="store_false",
+        dest="demangle",
+        help="Disables demangling of input files.",
+    )
+
+    parser.add_argument("--event", default="Ir", help="The event to compare.")
+
+    parser.add_argument(
+        "--low-watermark",
+        type=float,
+        default=0.01,
+        help="A low watermark, percentage changes in counts "
+        "relative to the total instruction count of "
+        "file1 below this value will not be printed.",
+    )
+
+    parser.add_argument(
+        "--summary", action="store_true", help="Prints a summary of the diff."
+    )
+
+    parser.add_argument("file1")
+    parser.add_argument("file2")
+
+    args = parser.parse_args()
+
+    if args.summary:
+        print_summary(args)
+    else:
+        print_diff_table(args)