forked from ericaltendorf/plotman
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5bff3dd
commit 1f95717
Showing
4 changed files
with
303 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import importlib.resources | ||
|
||
from plotman._tests import resources | ||
from plotman.log_parser import PlotLogParser | ||
|
||
def test_should_correctly_parse(): | ||
with importlib.resources.path(resources, "2021-04-04T19_00_47.681088-0400.log") as p: | ||
logfile_path = p | ||
|
||
parser = PlotLogParser() | ||
info = parser.parse(logfile_path) | ||
|
||
assert info.plot_id == "3eb8a37981de1cc76187a36ed947ab4307943cf92967a7e166841186c7899e24" | ||
|
||
assert info.plot_size == 32 | ||
assert info.started_at == "Sun Apr 4 19:00:50 2021" | ||
assert info.buffer == 4000 | ||
assert info.buckets == 128 | ||
assert info.threads == 4 | ||
|
||
assert info.tmp_dir1 == "/farm/yards/901" | ||
assert info.tmp_dir2 == "/farm/yards/901" | ||
|
||
assert info.phase1_duration_raw == 17571.981 | ||
assert info.phase1_duration == 17572 | ||
assert info.phase1_duration_minutes == 293 | ||
assert info.phase1_duration_hours == 4.88 | ||
|
||
assert info.phase2_duration_raw == 6911.621 | ||
assert info.phase2_duration == 6912 | ||
assert info.phase2_duration_minutes == 115 | ||
assert info.phase2_duration_hours == 1.92 | ||
|
||
assert info.phase3_duration_raw == 14537.188 | ||
assert info.phase3_duration == 14537 | ||
assert info.phase3_duration_minutes == 242 | ||
assert info.phase3_duration_hours == 4.04 | ||
|
||
assert info.phase4_duration_raw == 924.288 | ||
assert info.phase4_duration == 924 | ||
assert info.phase4_duration_minutes == 15 | ||
assert info.phase4_duration_hours == 0.26 | ||
|
||
assert info.total_time_raw == 39945.080 | ||
assert info.total_time == 39945 | ||
assert info.total_time_minutes == 666 | ||
assert info.total_time_hours == 11.10 | ||
|
||
assert info.copy_time_raw == 501.696 | ||
assert info.copy_time == 502 | ||
assert info.copy_time_minutes == 8 | ||
assert info.copy_time_hours == 0.14 | ||
|
||
assert info.filename == "/farm/wagons/801/plot-k32-2021-04-04-19-00-3eb8a37981de1cc76187a36ed947ab4307943cf92967a7e166841186c7899e24.plot" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
import re | ||
from plotman.plotinfo import PlotInfo | ||
|
||
class PlotLogParser: | ||
"""Parser for a finished plotting job""" | ||
|
||
def parse(self, filename: str) -> PlotInfo: | ||
"""Parses a single log and returns its info""" | ||
entry = PlotInfo() | ||
|
||
matchers = [ | ||
self.ignore_line, | ||
self.plot_id, | ||
self.plot_start_date, | ||
self.plot_size, | ||
self.buffer_size, | ||
self.buckets, | ||
self.threads, | ||
self.plot_dirs, | ||
self.phase1_duration, | ||
self.phase2_duration, | ||
self.phase3_duration, | ||
self.phase4_duration, | ||
self.total_time, | ||
self.copy_time, | ||
self.filename | ||
] | ||
|
||
with open(filename, 'r') as f: | ||
for line in f: | ||
for matcher in matchers: | ||
if (matcher(line, entry)): | ||
break | ||
|
||
return entry | ||
|
||
# ID: 3eb8a37981de1cc76187a36ed947ab4307943cf92967a7e166841186c7899e24 | ||
def plot_id(self, line: str, entry: PlotInfo) -> bool: | ||
m = re.search(r'^ID: (.+)$', line) | ||
if m: | ||
entry.plot_id = m.group(1) | ||
return m != None | ||
|
||
# Renamed final file from "/farm/wagons/801/abc.plot.2.tmp" to "/farm/wagons/801/abc.plot" | ||
def filename(self, line: str, entry: PlotInfo) -> bool: | ||
m = re.search(r'^Renamed final file from ".+" to "(.+)"', line) | ||
if m: | ||
entry.filename = m.group(1) | ||
return m != None | ||
|
||
# Time for phase 1 = 17571.981 seconds. CPU (178.600%) Sun Apr 4 23:53:42 2021 | ||
def phase1_duration(self, line: str, entry: PlotInfo) -> bool: | ||
m = re.search(r"^Time for phase 1 = (\d+\.\d+) seconds", line) | ||
if m: | ||
entry.phase1_duration_raw = float(m.group(1)) | ||
return m != None | ||
|
||
# Time for phase 2 = 6911.621 seconds. CPU (71.780%) Mon Apr 5 01:48:54 2021 | ||
def phase2_duration(self, line: str, entry: PlotInfo) -> bool: | ||
m = re.search(r"^Time for phase 2 = (\d+\.\d+) seconds", line) | ||
if m: | ||
entry.phase2_duration_raw = float(m.group(1)) | ||
return m != None | ||
|
||
# Time for phase 3 = 14537.188 seconds. CPU (82.730%) Mon Apr 5 05:51:11 2021 | ||
def phase3_duration(self, line: str, entry: PlotInfo) -> bool: | ||
m = re.search(r"^Time for phase 3 = (\d+\.\d+) seconds", line) | ||
if m: | ||
entry.phase3_duration_raw = float(m.group(1)) | ||
return m != None | ||
|
||
# Time for phase 4 = 924.288 seconds. CPU (86.810%) Mon Apr 5 06:06:35 2021 | ||
def phase4_duration(self, line: str, entry: PlotInfo) -> bool: | ||
m = re.search(r"^Time for phase 4 = (\d+\.\d+) seconds", line) | ||
if m: | ||
entry.phase4_duration_raw = float(m.group(1)) | ||
return m != None | ||
|
||
# Total time = 39945.080 seconds. CPU (123.100%) Mon Apr 5 06:06:35 2021 | ||
def total_time(self, line: str, entry: PlotInfo) -> bool: | ||
m = re.search(r"^Total time = (\d+\.\d+) seconds", line) | ||
if m: | ||
entry.total_time_raw = float(m.group(1)) | ||
return m != None | ||
|
||
# Copy time = 501.696 seconds. CPU (23.860%) Sun May 9 22:52:41 2021 | ||
def copy_time(self, line: str, entry: PlotInfo) -> bool: | ||
m = re.search(r"^Copy time = (\d+\.\d+) seconds", line) | ||
if m: | ||
entry.copy_time_raw = float(m.group(1)) | ||
return m != None | ||
|
||
# Starting plotting progress into temporary dirs: /farm/yards/901 and /farm/yards/901 | ||
def plot_dirs(self, line: str, entry: PlotInfo) -> bool: | ||
m = re.search(r"^Starting plotting progress into temporary dirs: (.+) and (.+)$", line) | ||
if m: | ||
entry.tmp_dir1 = m.group(1) | ||
entry.tmp_dir2 = m.group(2) | ||
return m != None | ||
|
||
# Using 4 threads of stripe size 65536 | ||
def threads(self, line: str, entry: PlotInfo) -> bool: | ||
m = re.search(r"^Using (\d+) threads of stripe size (\d+)", line) | ||
if m: | ||
entry.threads = int(m.group(1)) | ||
return m != None | ||
|
||
# "^Using (\\d+) buckets" | ||
def buckets(self, line: str, entry: PlotInfo) -> bool: | ||
m = re.search(r"^Using (\d+) buckets", line) | ||
if m: | ||
entry.buckets = int(m.group(1)) | ||
return m != None | ||
|
||
# Buffer size is: 4000MiB | ||
def buffer_size(self, line: str, entry: PlotInfo) -> bool: | ||
m = re.search(r"^Buffer size is: (\d+)MiB", line) | ||
if m: | ||
entry.buffer = int(m.group(1)) | ||
return m != None | ||
|
||
# Plot size is: 32 | ||
def plot_size(self, line: str, entry: PlotInfo) -> bool: | ||
m = re.search(r'^Plot size is: (\d+)', line) | ||
if m: | ||
entry.plot_size = int(m.group(1)) | ||
return m != None | ||
|
||
# Starting phase 1/4: Forward Propagation into tmp files... Sun May 9 17:36:12 2021 | ||
def plot_start_date(self, line: str, entry: PlotInfo) -> bool: | ||
m = re.search(r'^Starting phase 1/4: Forward Propagation into tmp files\.\.\. (.+)', line) | ||
if m: | ||
entry.started_at = m.group(1) | ||
return m != None | ||
|
||
|
||
# Ignore lines starting with Bucket | ||
# Bucket 0 uniform sort. Ram: 3.250GiB, u_sort min: 0.563GiB, qs min: 0.281GiB. | ||
def ignore_line(self, line: str, _: PlotInfo) -> bool: | ||
return re.search(r'^\tBucket', line) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
import attr | ||
|
||
@attr.s(repr=True, init=False) | ||
class PlotInfo: | ||
"""Represents the results of a finished plot job""" | ||
started_at: str = "" | ||
plot_id: str = "" | ||
buckets: int = 0 | ||
threads: int = 0 | ||
buffer: int = 0 | ||
plot_size: int = 0 | ||
tmp_dir1: str = "" | ||
tmp_dir2: str = "" | ||
phase1_duration_raw: float = 0 | ||
phase2_duration_raw: float = 0 | ||
phase3_duration_raw: float = 0 | ||
phase4_duration_raw: float = 0 | ||
total_time_raw: float = 0 | ||
copy_time_raw: float = 0 | ||
filename: str = "" | ||
|
||
def is_empty(self) -> bool: | ||
"Data is considered empty if total_time is zero" | ||
return self.total_time == 0 | ||
|
||
# Phase 1 duration | ||
@property | ||
def phase1_duration(self) -> int: | ||
return round(self.phase1_duration_raw) | ||
|
||
@property | ||
def phase1_duration_minutes(self) -> int: | ||
return self.duration_to_minutes(self.phase1_duration_raw) | ||
|
||
@property | ||
def phase1_duration_hours(self) -> float: | ||
return self.duration_to_hours(self.phase1_duration_raw) | ||
|
||
# Phase 2 duration | ||
@property | ||
def phase2_duration(self) -> int: | ||
return round(self.phase2_duration_raw) | ||
|
||
@property | ||
def phase2_duration_minutes(self) -> int: | ||
return self.duration_to_minutes(self.phase2_duration_raw) | ||
|
||
@property | ||
def phase2_duration_hours(self) -> float: | ||
return self.duration_to_hours(self.phase2_duration_raw) | ||
|
||
# Phase 3 duration | ||
@property | ||
def phase3_duration(self) -> int: | ||
return round(self.phase3_duration_raw) | ||
|
||
@property | ||
def phase3_duration_minutes(self) -> int: | ||
return self.duration_to_minutes(self.phase3_duration_raw) | ||
|
||
@property | ||
def phase3_duration_hours(self) -> float: | ||
return self.duration_to_hours(self.phase3_duration_raw) | ||
|
||
# Phase 4 duration | ||
@property | ||
def phase4_duration(self) -> int: | ||
return round(self.phase4_duration_raw) | ||
|
||
@property | ||
def phase4_duration_minutes(self) -> int: | ||
return self.duration_to_minutes(self.phase4_duration_raw) | ||
|
||
@property | ||
def phase4_duration_hours(self) -> float: | ||
return self.duration_to_hours(self.phase4_duration_raw) | ||
|
||
# Total time | ||
@property | ||
def total_time(self) -> int: | ||
return round(self.total_time_raw) | ||
|
||
@property | ||
def total_time_minutes(self) -> int: | ||
return self.duration_to_minutes(self.total_time_raw) | ||
|
||
@property | ||
def total_time_hours(self) -> float: | ||
return self.duration_to_hours(self.total_time_raw) | ||
|
||
# Copy time | ||
@property | ||
def copy_time(self) -> int: | ||
return round(self.copy_time_raw) | ||
|
||
@property | ||
def copy_time_minutes(self) -> int: | ||
return self.duration_to_minutes(self.copy_time_raw) | ||
|
||
@property | ||
def copy_time_hours(self) -> float: | ||
return self.duration_to_hours(self.copy_time_raw) | ||
|
||
def duration_to_minutes(self, duration): | ||
return round(duration / 60) | ||
|
||
def duration_to_hours(self, duration): | ||
return round(duration / 60 / 60, 2) |