Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Analysis: added matplotlib figure generation with several desirable performance metrics. #612

Open
wants to merge 31 commits into
base: development
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
adc02c0
added figure file
nikwl May 21, 2021
49f2a17
integrated figfile contents into analyzer.py
nikwl May 21, 2021
4914ee2
passed logfile where I should have passed logdir
nikwl May 21, 2021
c0a75b5
default logdir and logfile should now be none
nikwl May 21, 2021
3eda2a1
the figfile condition to run new code was flipped
nikwl May 21, 2021
ba577f8
removed period for consistency
nikwl May 21, 2021
56d5363
forgot to add code that converts directory to list of files
nikwl May 21, 2021
bf7f02c
pyplot imported incorrectly
nikwl May 21, 2021
8d06393
fig file now passable for either log file or log dir
nikwl May 21, 2021
8458013
assertation prevents generating figure with too few datapoints
nikwl May 21, 2021
876724f
directory handling was passed figfile instead of logfilenames
nikwl May 21, 2021
866dbd3
fixed bug with cumulative plot
nikwl May 21, 2021
f6c958f
Revert "fix: avoid more missing process errors"
nikwl Jun 22, 2021
b106fea
baby's first merge
nikwl Jun 22, 2021
375a90e
updating fork
nikwl Jun 22, 2021
e909d1b
migrated graph
nikwl Jun 22, 2021
b89bc40
Merge branch 'ericaltendorf:development' into development
nikwl Aug 7, 2021
95e1c63
several fixes, added some cli arguments, should work now
nikwl Aug 7, 2021
be3871c
Merge branch 'development' into development
altendky Aug 28, 2021
f87c2ab
Merge branch 'development' into development
altendky Aug 28, 2021
46260ad
Fixed several discontinuities that I think were caused by the previou…
nikwl Aug 29, 2021
a6c65ed
logdir is no longer required, instead it pull from the logdir defined…
nikwl Aug 29, 2021
134d4b7
Added type annotations to functions
nikwl Aug 29, 2021
b478a14
Merge branch 'development' into nikwl/development
altendky Aug 29, 2021
a8039dd
black
altendky Aug 29, 2021
b55fb57
tidy
altendky Aug 29, 2021
2b356f5
Merge branch 'development' into nikwl_development
altendky Aug 29, 2021
37d6dc6
Merge pull request #1 from altendky/nikwl_development
nikwl Aug 29, 2021
1011b0f
Updated graph.py parser to new style. Reformatted graph.py with black.
nikwl Aug 29, 2021
3bc6d90
Update setup.cfg
altendky Aug 30, 2021
d15ec4c
[mypy-matplotlib] ignore_missing_imports = true
altendky Aug 30, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
([#898](https://github.com/ericaltendorf/plotman/pull/898))
- Output same entries to plotman.log from 'plotman interactive' and ' plotman plot/archive' "daemons".
([#878](https://github.com/ericaltendorf/plotman/pull/878))
- `plotman graph` command to create a matplotlib plot for completed plots.
Creates a graph image showing plots over time, average plot rate, average plot time, and total number of plots over time.
([#612](https://github.com/ericaltendorf/plotman/pull/612))

## [0.5.1] - 2021-07-15
### Fixed
Expand Down
3 changes: 3 additions & 0 deletions mypy.ini
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ ignore_missing_imports = true
[mypy-click]
ignore_missing_imports = true

[mypy-matplotlib]
ignore_missing_imports = true

[mypy-pendulum]
# TODO: https://github.com/sdispater/pendulum/pull/551
implicit_reexport = true
Expand Down
4 changes: 4 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ checks =
mypy == 0.902
types-pkg_resources ~= 0.1.2
%(test)s
%(graph)s
graph =
matplotlib ~= 3.4
numpy ~= 1.20

[options.data_files]
config = src/plotman/resources/plotman.yaml
Expand Down
203 changes: 203 additions & 0 deletions src/plotman/graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
import os

import numpy as np
import matplotlib
import matplotlib.pyplot as plt

import plotman.plotters


def create_ax_dumbbell(
ax: matplotlib.pyplot.axis, data: np.array, max_stacked: int = 50
) -> None:
"""
Create a dumbbell plot of concurrent plot instances over time.
Parameters:
ax: a matplotlib axis.
data: numpy arrary with [start times, end times].
"""

def newline(p1: float, p2: float) -> matplotlib.lines.Line2D:
l = matplotlib.lines.Line2D([p1[0], p2[0]], [p1[1], p2[1]], color="r")
ax.add_line(l)
return l

# Prevent the stack from growing to tall
num_rows = data.shape[0]
stacker = []
for _ in range(int(np.ceil(num_rows / float(max_stacked)))):
stacker.extend(list(range(max_stacked)))
stacker = np.array(stacker)
if num_rows % float(max_stacked) != 0:
stacker = stacker[: -(max_stacked - int(num_rows % float(max_stacked)))]

for (p1, p2), i in zip(data[:, :2], stacker):
newline([p1, i], [p2, i])
ax.scatter(data[:, 0], stacker, color="b")
ax.scatter(data[:, 1], stacker, color="b")

ax.set_ylabel("Plots")
ax.set_xlim(np.min(data[:, 0]) - 2, np.max(data[:, 1]) + 2)


def create_ax_plotrate(
ax: matplotlib.pyplot.axis, data: np.array, end: bool = True, window: int = 3
) -> None:
"""
Create a plot showing the rate of plotting over time. Can be computed
with respect to the plot start (this is rate of plot creation) or
with respect to the plot end (this is rate of plot completion).
Parameters:
ax: a matplotlib axis.
data: numpy arrary with [start times, end times].
end: T/F, compute plot creation or plot completion rate.
window: Window to compute rate over.
"""

def estimate_rate(data: np.array, window: int) -> np.array:
rate_list = []
window_list = []
# This takes care of when we dont have a full window
for i in range(window):
rate_list.append(data[i] - data[0])
window_list.append(i)
# This takes care of when we do
for i in range(len(data) - window):
rate_list.append(data[i + window] - data[i])
window_list.append(window)
rate_list, window_list = np.array(rate_list), np.array(window_list)
rate_list[rate_list == 0] = np.nan # This prevents div by zero error
return np.where(
np.logical_not(np.isnan(rate_list)), (window_list - 1) / rate_list, 0
)

# Estimate the rate of ending or the rate of starting
if end:
rate = estimate_rate(data[:, 1], window)
ax.plot(data[:, 1], rate)
else:
rate = estimate_rate(data[:, 0], window)
ax.plot(data[:, 0], rate)

ax.set_ylabel("Avg Plot Rate (plots/hour)")
ax.set_xlim(np.min(data[:, 0]) - 2, np.max(data[:, 1]) + 2)


def create_ax_plottime(
ax: matplotlib.pyplot.axis, data: np.array, window: int = 3
) -> None:
"""
Create a plot showing the average time to create a single plot. This is
computed using a moving average. Note that the plot may not be
very accurate for the beginning and ending windows.
Parameters:
ax: a matplotlib axis.
data: numpy arrary with [start times, end times].
window: Window to compute rate over.
"""

# Compute moving avg
kernel = np.ones(window) / window
data_tiled = np.vstack(
(
np.expand_dims(data[:, 1] - data[:, 0], axis=1),
np.tile(data[-1, 1] - data[-1, 0], (window - 1, 1)),
)
)
rolling_avg = np.convolve(data_tiled.squeeze(), kernel, mode="valid")

ax.plot(data[:, 1], rolling_avg)

ax.set_ylabel("Avg Plot Time (hours)")
ax.set_xlim(np.min(data[:, 0]) - 2, np.max(data[:, 1]) + 2)


def create_ax_plotcumulative(ax: matplotlib.pyplot.axis, data: np.array) -> None:
"""
Create a plot showing the cumulative number of plots over time.
Parameters:
ax: a matplotlib axis.
data: numpy arrary with [start times, end times].
"""
ax.plot(data[:, 1], range(data.shape[0]))

ax.set_ylabel("Total plots (plots)")
ax.set_xlim(np.min(data[:, 0]) - 2, np.max(data[:, 1]) + 2)


def graph(logdir: str, figfile: str, latest_k: int, window: int) -> None:
assert window >= 2, "Cannot compute moving average over a window less than 3"
assert os.path.isdir(logdir)

# Build a list of the logfiles
logdir = os.path.abspath(logdir)
logfilenames = [
os.path.join(logdir, l)
for l in os.listdir(logdir)
if os.path.splitext(l)[-1] == ".log"
]

assert len(logfilenames) > 0, "Directory contains no files {}".format(logdir)

# For each log file, extract the start, end, and duration
time_catter = []
for logfilename in logfilenames:
with open(logfilename) as file:
try:
plotter_type = plotman.plotters.get_plotter_from_log(lines=file)
except plotman.errors.UnableToIdentifyPlotterFromLogError:
continue

parser = plotter_type()

with open(logfilename, "rb") as binary_file:
read_bytes = binary_file.read()

parser.update(chunk=read_bytes)
info = parser.common_info()

# Extract timing information
if info.total_time_raw != 0:
time_catter.append(
[
info.started_at.timestamp(),
info.started_at.timestamp() + info.total_time_raw,
info.total_time_raw,
]
)

assert len(time_catter) > 0, "No valid log files found"

# This array will hold start and end data (in hours)
data_started_ended = np.array(time_catter) / (60 * 60)

# Shift the data so that it starts at zero
data_started_ended -= np.min(data_started_ended[:, 0])

# Sort the rows by start time
data_started_ended = data_started_ended[np.argsort(data_started_ended[:, 0])]

# Remove older entries
if latest_k is not None:
data_started_ended = data_started_ended[-latest_k:, :]

# Create figure
num_plots = 4
f, _ = plt.subplots(2, 1, figsize=(8, 10))
ax = plt.subplot(num_plots, 1, 1)
ax.set_title("Plot performance summary")

create_ax_dumbbell(ax, data_started_ended)

if data_started_ended.shape[0] > window:
ax = plt.subplot(num_plots, 1, 2)
create_ax_plotrate(ax, data_started_ended, end=True, window=window)

ax = plt.subplot(num_plots, 1, 3)
create_ax_plottime(ax, data_started_ended, window=window)

ax = plt.subplot(num_plots, 1, 4)
create_ax_plotcumulative(ax, data_started_ended)

ax.set_xlabel("Time (hours)")
f.savefig(figfile)
33 changes: 33 additions & 0 deletions src/plotman/plotman.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
plot_util,
reporting,
csv_exporter,
graph,
)
from plotman import resources as plotman_resources
from plotman.job import Job
Expand Down Expand Up @@ -157,6 +158,29 @@ def parse_args(self) -> typing.Any:
"logfile", type=str, nargs="+", help="logfile(s) to analyze"
)

p_graph = sp.add_parser("graph", help="create graph with plotting statistics")
p_graph.add_argument(
"figfile", type=str, help="graph file produced as output (.png, .jpg, etc.)"
)
p_graph.add_argument(
"--logdir",
type=str,
default=None,
help="directory containing multiple logfiles to graph",
)
p_graph.add_argument(
"--latest_k",
type=int,
default=None,
help="if passed, will only graph statistics for the latest k plots",
)
p_graph.add_argument(
"--window",
type=int,
default=3,
help="window size to compute moving average over",
)

args = parser.parse_args()
return args

Expand Down Expand Up @@ -296,6 +320,15 @@ def main() -> None:
args.logfile, args.clipterminals, args.bytmp, args.bybitfield
)

#
# Graphing of completed jobs
#
elif args.cmd == "graph":
# If no logdir was passed, use the dir specified in cfg (this will almost always be the case)
if args.logdir is None:
args.logdir = cfg.logging.plots
graph.graph(args.logdir, args.figfile, args.latest_k, args.window)

#
# Exports log metadata to CSV
#
Expand Down
Empty file modified util/listlogs
100755 → 100644
Empty file.