Added example for concatenating + optionally DBC decoding MF4 files into pre-defined sub periods

MatinF · MatinF · commit d794bc685a2c · 2022-12-18T12:46:46.000+01:00
diff --git a/examples/data-processing/utils.py b/examples/data-processing/utils.py
@@ -95,7 +95,7 @@ def restructure_data(df_phys, res, full_col_names=False, pgn_names=False):
 
                 df_phys_join = pd.merge_ordered(
                     df_phys_join,
-                    data["Physical Value"].rename(col_name).resample(res).pad().dropna(),
+                    data["Physical Value"].rename(col_name).resample(res).ffill().dropna(),
                     on="TimeStamp",
                     fill_method="none",
                 ).set_index("TimeStamp")
diff --git a/examples/other/concatenate-mf4-by-period/README.md b/examples/other/concatenate-mf4-by-period/README.md
@@ -0,0 +1,11 @@
+# Concatenate MF4 files by period (+ optional DBC decoding)
+
+This script lets you process MF4 log files across multiple CANedge devices. The script does the following:
+
+1. List all log files for a list of devices within a specific 'total period'
+2. Specify a sub period length (e.g. 24 hours)
+3. Identify log files pertaining to each sub period, concatenate them and save the result
+4. Optionally, the output file can be DBC decoded before outputting 
+5. Saved files are by default named based on the 1st and last timestamp, e.g. `221213-0612-to-221213-1506.mf4`
+
+The data can be fetched from an absolute input path on local disk (e.g. the `LOG/` folder on an SD card) or on S3. The latter requires that you map your S3 input bucket using [TntDrive](https://canlogger.csselectronics.com/canedge-getting-started/transfer-data/server-tools/other-s3-tools/). The output files can be stored on your local disk or e.g. on another S3 bucket (also mapped via TntDrive).
diff --git a/examples/other/concatenate-mf4-by-period/concatenate_mf4_by_period.py b/examples/other/concatenate-mf4-by-period/concatenate_mf4_by_period.py
@@ -0,0 +1,115 @@
+"""
+About: List MF4 log files by period using the CANedge Python API
+and concatenate them into 'combined' MF4 files using the asammdf Python API.
+Optionally use TntDrive to map S3 server as local drive to work with S3 directly:
+https://canlogger.csselectronics.com/canedge-getting-started/transfer-data/server-tools/other-s3-tools/
+"""
+import canedge_browser
+from asammdf import MDF
+from datetime import datetime, timezone, timedelta
+from pathlib import Path
+from concatenate_utils import extract_mdf_start_stop_time, hour_rounder
+import sys
+
+
+# specify input path for MF4 files (e.g. "D:/LOG" for SD, "Z:" for mapped S3 bucket, ...)
+input_root = Path("Z:")
+
+# specify output path (e.g. other mapped S3 bucket, local disk, ...)
+output_root = Path("C:/concatenate-mf4-by-period")
+
+# specify which period you wish to process and the max period length of each concatenated log file
+period_start = datetime(year=2022, month=12, day=12, hour=2, tzinfo=timezone.utc)
+period_stop = datetime(year=2022, month=12, day=16, hour=2, tzinfo=timezone.utc)
+file_length_hours = 24
+
+# specify devices to process (from the input_root folder)
+devices = ["2F6913DB", "00000000"]
+
+# optionally DBC decode the data
+dbc_path = input_root / "dbc_files"
+dbc_files = {"CAN": [(dbc, 0) for dbc in list(dbc_path.glob("*" + ".DBC"))]}
+enable_dbc_decoding = False
+
+# ----------------------------------------
+fs = canedge_browser.LocalFileSystem(base_path=input_root)
+
+for device in devices:
+    cnt_input_files = 0
+    cnt_output_files = 0
+    cnt_sub_period = 0
+    sub_period_start = period_start
+    sub_period_stop = period_start
+    files_to_skip = []
+
+    log_files_total = canedge_browser.get_log_files(fs, device, start_date=period_start,stop_date=period_stop)
+    log_files_total = [Path(input_root,log_file) for log_file in log_files_total]
+    print(f"\n-----------\nProcessing device {device} | sub period length: {file_length_hours} hours | start: {period_start} | stop: {period_stop} | {len(log_files_total)} log file(s): ",log_files_total)
+
+    # check whether to update sub_period_start to equal 2nd log file start for efficiency
+    mdf = MDF(log_files_total[0])
+    mdf_start, mdf_stop = extract_mdf_start_stop_time(mdf)
+
+    if mdf_stop < sub_period_start:
+        print("First log file is before period start (skip): ", log_files_total[0])
+        files_to_skip.append(log_files_total[0])
+        if len(log_files_total) == 1:
+            continue
+        elif len(log_files_total) > 1:
+            mdf = MDF(log_files_total[1])
+            mdf_start, mdf_stop = extract_mdf_start_stop_time(mdf)
+            sub_period_start = hour_rounder(mdf_start)
+            print(f"Period start updated to {sub_period_start}")
+
+    # process each sub period for the device
+    while sub_period_stop <= period_stop:
+        cnt_sub_period += 1
+        sub_period_stop = sub_period_start + timedelta(hours=file_length_hours)
+
+        # list log files for the sub period
+        log_files = canedge_browser.get_log_files(fs, device, start_date=sub_period_start,stop_date=sub_period_stop)
+        log_files = [Path(input_root,log_file) for log_file in log_files]
+        log_files = [log_file for log_file in log_files if log_file not in files_to_skip]
+        if len(log_files) > 0:
+            print(f"\n- Sub period #{cnt_sub_period} \t\t\t| start: {sub_period_start} | stop: {sub_period_stop} | {len(log_files)} log file(s): ", log_files)
+
+        if len(log_files) == 0:
+            sub_period_start = sub_period_stop
+            continue
+
+        # concatenate all sub period files and identify the delta sec to start/stop
+        mdf = MDF.concatenate(log_files)
+        mdf_start, mdf_stop = extract_mdf_start_stop_time(mdf)
+        mdf_header_start = mdf.header.start_time
+        start_delta = (sub_period_start - mdf_header_start).total_seconds()
+        stop_delta = (sub_period_stop - mdf_header_start).total_seconds()
+        print(f"- Concatenated MF4 created (pre cut)\t| start: {mdf_start} | stop: {mdf_stop}")
+
+        # cut the log file to only include intended period
+        mdf = mdf.cut(start=start_delta, stop=stop_delta, whence=0,include_ends=False, time_from_zero=False)
+        mdf_start, mdf_stop = extract_mdf_start_stop_time(mdf)
+
+        # convert the start/stop time to string format for file-saving
+        mdf_start_str = mdf_start.strftime(f"%y%m%d-%H%M")
+        mdf_stop_str = mdf_stop.strftime(f"%y%m%d-%H%M")
+        output_file_name = f"{device}/{mdf_start_str}-to-{mdf_stop_str}.MF4"
+        output_path = output_root / output_file_name
+
+        # DBC decode the data before saving
+        if enable_dbc_decoding:
+            mdf = mdf.extract_bus_logging(dbc_files)
+
+        # save the cut MF4 to local disk
+        mdf.save(output_path, overwrite=True)
+        print(f"- Concatenated MF4 saved (cut)\t\t| start: {mdf_start} | stop: {mdf_stop} | {output_path}")
+
+        cnt_output_files += 1
+        sub_period_start = sub_period_stop
+
+        # check if the last log file is fully within sub period (i.e. skip it during next cycle)
+        if mdf_stop < sub_period_stop:
+            files_to_skip.append(log_files[-1])
+
+            if log_files[-1] == log_files_total[-1]:
+                print(f"- Completed processing device {device}")
+                break
diff --git a/examples/other/concatenate-mf4-by-period/concatenate_utils.py b/examples/other/concatenate-mf4-by-period/concatenate_utils.py
@@ -0,0 +1,18 @@
+def extract_mdf_start_stop_time(mdf):
+    from datetime import timedelta
+
+    # function to identify start/stop timestamp of concatenated log file
+    session_start = mdf.header.start_time
+    delta_seconds_start = mdf.select(["CAN_DataFrame.BusChannel"])[0].timestamps[0]
+    delta_seconds_stop = mdf.select(["CAN_DataFrame.BusChannel"])[0].timestamps[-1]
+    mdf_start = session_start + timedelta(seconds=delta_seconds_start)
+    mdf_stop = session_start + timedelta(seconds=delta_seconds_stop)
+
+    return mdf_start, mdf_stop
+
+def hour_rounder(t):
+    from datetime import timedelta
+
+    # Rounds to nearest hour by adding a timedelta hour if minute >= 30
+    return (t.replace(second=0, microsecond=0, minute=0, hour=t.hour)
+               +timedelta(hours=t.minute//30))
diff --git a/examples/other/concatenate-mf4-by-period/requirements.txt b/examples/other/concatenate-mf4-by-period/requirements.txt
@@ -0,0 +1,19 @@
+asammdf==7.2.0
+attrs==22.1.0
+canedge-browser==0.0.8
+canmatrix==0.9.5
+click==8.1.3
+colorama==0.4.6
+fsspec==2022.11.0
+future==0.18.2
+isal==1.1.0
+lxml==4.9.2
+lz4==4.0.2
+mdf-iter==0.0.7
+numexpr==2.8.4
+numpy==1.23.5
+pandas==1.5.2
+python-dateutil==2.8.2
+pytz==2022.7
+six==1.16.0
+typing-extensions==4.4.0