Skip to content

Commit d794bc6

Browse files
committed
Added example for concatenating + optionally DBC decoding MF4 files into pre-defined sub periods
1 parent 0d61284 commit d794bc6

File tree

5 files changed

+164
-1
lines changed

5 files changed

+164
-1
lines changed

examples/data-processing/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def restructure_data(df_phys, res, full_col_names=False, pgn_names=False):
9595

9696
df_phys_join = pd.merge_ordered(
9797
df_phys_join,
98-
data["Physical Value"].rename(col_name).resample(res).pad().dropna(),
98+
data["Physical Value"].rename(col_name).resample(res).ffill().dropna(),
9999
on="TimeStamp",
100100
fill_method="none",
101101
).set_index("TimeStamp")
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Concatenate MF4 files by period (+ optional DBC decoding)
2+
3+
This script lets you process MF4 log files across multiple CANedge devices. The script does the following:
4+
5+
1. List all log files for a list of devices within a specific 'total period'
6+
2. Specify a sub period length (e.g. 24 hours)
7+
3. Identify log files pertaining to each sub period, concatenate them and save the result
8+
4. Optionally, the output file can be DBC decoded before outputting
9+
5. Saved files are by default named based on the 1st and last timestamp, e.g. `221213-0612-to-221213-1506.mf4`
10+
11+
The data can be fetched from an absolute input path on local disk (e.g. the `LOG/` folder on an SD card) or on S3. The latter requires that you map your S3 input bucket using [TntDrive](https://canlogger.csselectronics.com/canedge-getting-started/transfer-data/server-tools/other-s3-tools/). The output files can be stored on your local disk or e.g. on another S3 bucket (also mapped via TntDrive).
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
"""
2+
About: List MF4 log files by period using the CANedge Python API
3+
and concatenate them into 'combined' MF4 files using the asammdf Python API.
4+
Optionally use TntDrive to map S3 server as local drive to work with S3 directly:
5+
https://canlogger.csselectronics.com/canedge-getting-started/transfer-data/server-tools/other-s3-tools/
6+
"""
7+
import canedge_browser
8+
from asammdf import MDF
9+
from datetime import datetime, timezone, timedelta
10+
from pathlib import Path
11+
from concatenate_utils import extract_mdf_start_stop_time, hour_rounder
12+
import sys
13+
14+
15+
# specify input path for MF4 files (e.g. "D:/LOG" for SD, "Z:" for mapped S3 bucket, ...)
16+
input_root = Path("Z:")
17+
18+
# specify output path (e.g. other mapped S3 bucket, local disk, ...)
19+
output_root = Path("C:/concatenate-mf4-by-period")
20+
21+
# specify which period you wish to process and the max period length of each concatenated log file
22+
period_start = datetime(year=2022, month=12, day=12, hour=2, tzinfo=timezone.utc)
23+
period_stop = datetime(year=2022, month=12, day=16, hour=2, tzinfo=timezone.utc)
24+
file_length_hours = 24
25+
26+
# specify devices to process (from the input_root folder)
27+
devices = ["2F6913DB", "00000000"]
28+
29+
# optionally DBC decode the data
30+
dbc_path = input_root / "dbc_files"
31+
dbc_files = {"CAN": [(dbc, 0) for dbc in list(dbc_path.glob("*" + ".DBC"))]}
32+
enable_dbc_decoding = False
33+
34+
# ----------------------------------------
35+
fs = canedge_browser.LocalFileSystem(base_path=input_root)
36+
37+
for device in devices:
38+
cnt_input_files = 0
39+
cnt_output_files = 0
40+
cnt_sub_period = 0
41+
sub_period_start = period_start
42+
sub_period_stop = period_start
43+
files_to_skip = []
44+
45+
log_files_total = canedge_browser.get_log_files(fs, device, start_date=period_start,stop_date=period_stop)
46+
log_files_total = [Path(input_root,log_file) for log_file in log_files_total]
47+
print(f"\n-----------\nProcessing device {device} | sub period length: {file_length_hours} hours | start: {period_start} | stop: {period_stop} | {len(log_files_total)} log file(s): ",log_files_total)
48+
49+
# check whether to update sub_period_start to equal 2nd log file start for efficiency
50+
mdf = MDF(log_files_total[0])
51+
mdf_start, mdf_stop = extract_mdf_start_stop_time(mdf)
52+
53+
if mdf_stop < sub_period_start:
54+
print("First log file is before period start (skip): ", log_files_total[0])
55+
files_to_skip.append(log_files_total[0])
56+
if len(log_files_total) == 1:
57+
continue
58+
elif len(log_files_total) > 1:
59+
mdf = MDF(log_files_total[1])
60+
mdf_start, mdf_stop = extract_mdf_start_stop_time(mdf)
61+
sub_period_start = hour_rounder(mdf_start)
62+
print(f"Period start updated to {sub_period_start}")
63+
64+
# process each sub period for the device
65+
while sub_period_stop <= period_stop:
66+
cnt_sub_period += 1
67+
sub_period_stop = sub_period_start + timedelta(hours=file_length_hours)
68+
69+
# list log files for the sub period
70+
log_files = canedge_browser.get_log_files(fs, device, start_date=sub_period_start,stop_date=sub_period_stop)
71+
log_files = [Path(input_root,log_file) for log_file in log_files]
72+
log_files = [log_file for log_file in log_files if log_file not in files_to_skip]
73+
if len(log_files) > 0:
74+
print(f"\n- Sub period #{cnt_sub_period} \t\t\t| start: {sub_period_start} | stop: {sub_period_stop} | {len(log_files)} log file(s): ", log_files)
75+
76+
if len(log_files) == 0:
77+
sub_period_start = sub_period_stop
78+
continue
79+
80+
# concatenate all sub period files and identify the delta sec to start/stop
81+
mdf = MDF.concatenate(log_files)
82+
mdf_start, mdf_stop = extract_mdf_start_stop_time(mdf)
83+
mdf_header_start = mdf.header.start_time
84+
start_delta = (sub_period_start - mdf_header_start).total_seconds()
85+
stop_delta = (sub_period_stop - mdf_header_start).total_seconds()
86+
print(f"- Concatenated MF4 created (pre cut)\t| start: {mdf_start} | stop: {mdf_stop}")
87+
88+
# cut the log file to only include intended period
89+
mdf = mdf.cut(start=start_delta, stop=stop_delta, whence=0,include_ends=False, time_from_zero=False)
90+
mdf_start, mdf_stop = extract_mdf_start_stop_time(mdf)
91+
92+
# convert the start/stop time to string format for file-saving
93+
mdf_start_str = mdf_start.strftime(f"%y%m%d-%H%M")
94+
mdf_stop_str = mdf_stop.strftime(f"%y%m%d-%H%M")
95+
output_file_name = f"{device}/{mdf_start_str}-to-{mdf_stop_str}.MF4"
96+
output_path = output_root / output_file_name
97+
98+
# DBC decode the data before saving
99+
if enable_dbc_decoding:
100+
mdf = mdf.extract_bus_logging(dbc_files)
101+
102+
# save the cut MF4 to local disk
103+
mdf.save(output_path, overwrite=True)
104+
print(f"- Concatenated MF4 saved (cut)\t\t| start: {mdf_start} | stop: {mdf_stop} | {output_path}")
105+
106+
cnt_output_files += 1
107+
sub_period_start = sub_period_stop
108+
109+
# check if the last log file is fully within sub period (i.e. skip it during next cycle)
110+
if mdf_stop < sub_period_stop:
111+
files_to_skip.append(log_files[-1])
112+
113+
if log_files[-1] == log_files_total[-1]:
114+
print(f"- Completed processing device {device}")
115+
break
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
def extract_mdf_start_stop_time(mdf):
2+
from datetime import timedelta
3+
4+
# function to identify start/stop timestamp of concatenated log file
5+
session_start = mdf.header.start_time
6+
delta_seconds_start = mdf.select(["CAN_DataFrame.BusChannel"])[0].timestamps[0]
7+
delta_seconds_stop = mdf.select(["CAN_DataFrame.BusChannel"])[0].timestamps[-1]
8+
mdf_start = session_start + timedelta(seconds=delta_seconds_start)
9+
mdf_stop = session_start + timedelta(seconds=delta_seconds_stop)
10+
11+
return mdf_start, mdf_stop
12+
13+
def hour_rounder(t):
14+
from datetime import timedelta
15+
16+
# Rounds to nearest hour by adding a timedelta hour if minute >= 30
17+
return (t.replace(second=0, microsecond=0, minute=0, hour=t.hour)
18+
+timedelta(hours=t.minute//30))
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
asammdf==7.2.0
2+
attrs==22.1.0
3+
canedge-browser==0.0.8
4+
canmatrix==0.9.5
5+
click==8.1.3
6+
colorama==0.4.6
7+
fsspec==2022.11.0
8+
future==0.18.2
9+
isal==1.1.0
10+
lxml==4.9.2
11+
lz4==4.0.2
12+
mdf-iter==0.0.7
13+
numexpr==2.8.4
14+
numpy==1.23.5
15+
pandas==1.5.2
16+
python-dateutil==2.8.2
17+
pytz==2022.7
18+
six==1.16.0
19+
typing-extensions==4.4.0

0 commit comments

Comments
 (0)