Skip to content

Commit

Permalink
Added pympler memory profiling to implementation report.
Browse files Browse the repository at this point in the history
  • Loading branch information
sg495 committed Jul 28, 2022
1 parent d91b73b commit 4cb2705
Show file tree
Hide file tree
Showing 2 changed files with 168 additions and 77 deletions.
184 changes: 132 additions & 52 deletions report.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,105 +6,184 @@
if __name__ != "__main__":
raise RuntimeError("usage: report.py [-h] [-d]")

# == Memory profiling ==
# == Script imports ==

import argparse
import gc
import sys
from typing import Any, Callable, Collection, Dict, List, Optional, Tuple, Union

# `rich` is not a dependency for the `multiformats` library
from rich.console import Console
from rich.panel import Panel
from rich.table import Table

# `setuptools_scm` is a development dependency for the `multiformats` library
from setuptools_scm import get_version # type: ignore

# `psutil` is not a dependency for the `multiformats` library
import psutil # type: ignore

mem_usage = {}
# `pympler` is not a dependency for the `multiformats` library
from pympler import tracker # type: ignore

def bytesize_str(nbytes: int) -> str:
"""
Pretty string representation of bytesizes.
"""
sign = ""
if nbytes < 0:
nbytes *= -1
sign = "-"
suffixes = ["B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"]
suffix_idx = 0
while nbytes >= 1024 and suffix_idx <= 2:
nbytes //= 1024
suffix_idx += 1
return f"{sign}{nbytes}{suffixes[suffix_idx]}"

def print_diff(diff: list[tuple[str, int, int]], console: Console) -> None:
"""
Prints a tracker diff object to console
"""
table = Table()
table.add_column("Type", style="bold green")
table.add_column("Count")
table.add_column("Size")
for t, c, s in sorted(diff, key=lambda entry: -entry[2]):
table.add_row(t, str(c), bytesize_str(s))
console.print(table)

# == Intro panel with version ==

version = get_version(root='.', version_scheme="post-release")

console = Console(record=True, width=110)
console.print(Panel(f"Multiformats implementation report [bold blue]v{version}[white]"))

# == Memory profiling ==

pympler_count = {}
pympler_mem_usage = {}
psutil_mem_usage = {}

gc.collect()
baseline = psutil.Process().memory_full_info().uss / (1024 * 1024)
prev = baseline
pympler_prev = baseline
psutil_prev = baseline

import typing_extensions

tr = tracker.SummaryTracker()
tr.diff()
import typing_extensions
gc.collect()
diff = psutil.Process().memory_full_info().uss / (1024 * 1024)-prev
mem_usage["typing-extensions"] = diff
prev += diff

tracker_diff = tr.diff()
pympler_count["typing-extensions"] = sum(entry[1] for entry in tracker_diff)
pypler_diff = sum(entry[2] for entry in tracker_diff)
pympler_mem_usage["typing-extensions"] = pypler_diff / (1024 * 1024)
pympler_prev += pypler_diff
psutil_diff = psutil.Process().memory_full_info().uss / (1024 * 1024)-psutil_prev
psutil_mem_usage["typing-extensions"] = psutil_diff
psutil_prev += psutil_diff

tr = tracker.SummaryTracker()
tr.diff()
import typing_validation

gc.collect()
diff = psutil.Process().memory_full_info().uss / (1024 * 1024)-prev
mem_usage["typing-validation"] = diff
prev += diff

tracker_diff = tr.diff()
pympler_count["typing-validation"] = sum(entry[1] for entry in tracker_diff)
pypler_diff = sum(entry[2] for entry in tracker_diff)
pympler_mem_usage["typing-validation"] = pypler_diff / (1024 * 1024)
pympler_prev += pypler_diff
psutil_diff = psutil.Process().memory_full_info().uss / (1024 * 1024)-psutil_prev
psutil_mem_usage["typing-validation"] = psutil_diff
psutil_prev += psutil_diff

tr = tracker.SummaryTracker()
tr.diff()
import bases

gc.collect()
diff = psutil.Process().memory_full_info().uss / (1024 * 1024)-prev
mem_usage["bases"] = diff
prev += diff

tracker_diff = tr.diff()
pympler_count["bases"] = sum(entry[1] for entry in tracker_diff)
pypler_diff = sum(entry[2] for entry in tracker_diff)
pympler_mem_usage["bases"] = pypler_diff / (1024 * 1024)
pympler_prev += pypler_diff
psutil_diff = psutil.Process().memory_full_info().uss / (1024 * 1024)-psutil_prev
psutil_mem_usage["bases"] = psutil_diff
psutil_prev += psutil_diff

tr = tracker.SummaryTracker()
tr.diff()
import multiformats
from multiformats import *

gc.collect()
diff = psutil.Process().memory_full_info().uss / (1024 * 1024)-prev
mem_usage["multiformats"] = diff

mem_usage_total = sum(mem_usage.values())
mem_usage_pct = {k: v/mem_usage_total for k, v in mem_usage.items()}


# == Script imports ==

import argparse
from typing import Any, Callable, Collection, Dict, List, Optional, Tuple, Union
from typing_extensions import Literal

# `rich` is not a dependency for the `multiformats` library
from rich.console import Console
from rich.panel import Panel
from rich.table import Table

# `setuptools_scm` is a development dependency for the `multiformats` library
from setuptools_scm import get_version # type: ignore
tracker_diff = tr.diff()
pympler_count["multiformats"] = sum(entry[1] for entry in tracker_diff)
pypler_diff = sum(entry[2] for entry in tracker_diff)
pympler_mem_usage["multiformats"] = pypler_diff / (1024 * 1024)
pympler_prev += pypler_diff
psutil_diff = psutil.Process().memory_full_info().uss / (1024 * 1024)-psutil_prev
psutil_mem_usage["multiformats"] = psutil_diff
psutil_prev += psutil_diff

pympler_mem_usage_total = sum(pympler_mem_usage.values())
pympler_mem_usage_pct = {k: v/pympler_mem_usage_total for k, v in pympler_mem_usage.items()}
psutil_mem_usage_total = sum(psutil_mem_usage.values())
psutil_mem_usage_pct = {k: v/psutil_mem_usage_total for k, v in psutil_mem_usage.items()}

# == Extract commandline args ==

description = "Implementation report for multiformats."
parser = argparse.ArgumentParser(description=description)
parser.add_argument("-d", help='print codes as decimal rather than hex', action="store_true")
parser.add_argument("-r", help='saves report to file', action="store_true")
args = parser.parse_args()
hex_codes = not args.d
save_report = args.r
code2str: Callable[[int], str] = hex if hex_codes else str # type: ignore

# == Intro panel with version ==

version = get_version(root='.', version_scheme="post-release")
# == Memory usage table ==

console = Console(record=True, width=110)
console.print(Panel(f"Multiformats implementation report [bold blue]v{version}[white]"))
console.rule("Memory Usage (pympler)")

table = Table()
table.add_column("Component", style="white")
table.add_column("Obj. count", style="white", justify="right")
table.add_column("Memory", style="bold blue", justify="right")
table.add_column("Memory %", style="bold blue", justify="right")
for k, v in pympler_mem_usage.items():
pct = f"{pympler_mem_usage_pct[k]:.0%}" if k in pympler_mem_usage_pct else ""
if v >= 1000/1024:
table.add_row(k, str(pympler_count[k]), f"{v:.1f}MiB", pct)
else:
table.add_row(k, str(pympler_count[k]), f"{1024*v:.0f}KiB", pct)
console.print(f"> memory baseline: [bold blue]{baseline:.1f}MiB[white]")
console.print(f"> multiformats memory total: [bold blue]{pympler_mem_usage_total:.1f}MiB[white]")
console.print(table)

# == Memory usage table ==

console.rule("Memory Usage")
console.rule("Memory Usage (psutil)")

table = Table()
table.add_column("Component", style="white")
table.add_column("Memory", style="bold blue", justify="right")
table.add_column("Memory %", style="bold blue", justify="right")
for k, v in mem_usage.items():
pct = f"{mem_usage_pct[k]:.0%}" if k in mem_usage_pct else ""
for k, v in psutil_mem_usage.items():
pct = f"{psutil_mem_usage_pct[k]:.0%}" if k in psutil_mem_usage_pct else ""
if v >= 1000/1024:
table.add_row(k, f"{v:.1f}MiB", pct)
else:
table.add_row(k, f"{1024*v:.0f}KiB", pct)
console.print(f"> python+psutil memory baseline: [bold blue]{baseline:.1f}MiB[white]")
console.print(f"> multiformats memory total: [bold blue]{mem_usage_total:.1f}MiB[white]")
console.print(f"> memory baseline: [bold blue]{baseline:.1f}MiB[white]")
console.print(f"> multiformats memory total: [bold blue]{psutil_mem_usage_total:.1f}MiB[white]")
console.print(table)


# == Group multihash multicodecs together ==
# TODO: consider introduce grouped multicodecs doing this directly, to reduce mem footprint (currently footprint is negligible)

_multihash_indices: Dict[str, int] = {}
_grouped_multicodecs: List[Tuple[str, str, Optional[List[int]], List[int], List[bool], Literal["draft", "permanent"]]] = []
_grouped_multicodecs: List[Tuple[str, str, Optional[List[int]], List[int], List[bool], typing_extensions.Literal["draft", "permanent"]]] = []
for codec in multicodec.table(tag="multihash"):
is_implemented = multihash.is_implemented(codec.name)
tokens = codec.name.split("-")
Expand Down Expand Up @@ -259,4 +338,5 @@ def set_str(l: Collection[int], *, use_hex: bool = False, minlen: int = 4, maxle

# == Exporting report ==

console.save_text("report.txt")
if save_report:
console.save_text("report.txt")
Loading

0 comments on commit 4cb2705

Please sign in to comment.