Add active data selector

Jaeyoung-Lim · Jaeyoung-Lim · commit 61602f2442c1 · 2022-05-17T12:29:08.000+02:00
F


Unify data selection options
diff --git a/Makefile b/Makefile
@@ -5,7 +5,7 @@ registry?=ethzasl/data-driven-dynamics
 model?=quadrotor_model
 log?=${root_dir}/resources/${model}.ulg
 config?=${root_dir}/Tools/parametric_model/configs/${model}.yaml
-data_selection?=False
+data_selection?=none
 plot?=True
 
 submodulesupdate:
diff --git a/README.md b/README.md
@@ -84,7 +84,7 @@ source setup.bash
 Generate the parametric model using a log file (ulog or csv):
 
 ```
-make estimate-model [model=<modeltype>] [config=<config_file_path>] [data_selection=<True/False>] [plot=<True/False>] log=<log_file_path>
+make estimate-model [model=<modeltype>] [config=<config_file_path>] [data_selection=<none|interwactive|auto>] [plot=<True/False>] log=<log_file_path>
 ```
 
 ### Pipeline Arguments
@@ -106,7 +106,10 @@ The Log file contains all data needed for the system identification of the speci
 
 #### Data Selection
 
-The data_selection argument is optional (per default False) and can be used to visually select subportions of the data, using the [Visual Dataframe Selector](https://github.com/manumerous/visual_dataframe_selector), before running the model estimation. It is also possible to save the selected subportion of data to a csv file in order to use this exact dataset multiple times.
+The data_selection argument is optional (per default none) and can be used to visually select subportions of the data.
+- none(default): Data selection is disabled, and the whole section of the log is used
+- interactive: Data is selected interactively using the [Visual Dataframe Selector](https://github.com/manumerous/visual_dataframe_selector), before running the model estimation. It is also possible to save the selected subportion of data to a csv file in order to use this exact dataset multiple times.
+- auto: Data is selected automatically (Beta)
 
 ### Results
 
diff --git a/Tools/parametric_model/active_dataframe_selector/data_selector.py b/Tools/parametric_model/active_dataframe_selector/data_selector.py
@@ -0,0 +1,12 @@
+class ActiveDataSelector():
+    def __init__(self, data_df):
+        self.data_df = data_df
+
+    def select_dataframes(self, ratio = 10):
+        idx = self.data_df.sort_values(by=["fisher_information_force"]).index[0:self.data_df.shape[0]*10//100]
+        idx = idx.append(self.data_df.sort_values(by=["fisher_information_rot"]).index[0:self.data_df.shape[0]*10//100])
+        idx = idx.unique()
+        idx = idx.sort_values()
+        self.data_df = self.data_df.loc[idx]
+        self.data_df.reset_index(drop=True)
+        return self.data_df
diff --git a/Tools/parametric_model/generate_parametric_model.py b/Tools/parametric_model/generate_parametric_model.py
@@ -56,12 +56,10 @@ def str2bool(v):
         raise argparse.ArgumentTypeError('Boolean value expected.')
 
 
-def start_model_estimation(config, log_path, data_selection=False, plot=False):
+def start_model_estimation(config, log_path, data_selection="none", plot=False):
     print("Visual Data selection enabled: ", data_selection)
 
     # Flag for enabling automatic data selection.
-    # TODO: Unify data selection type with auto and manual
-    auto_data_selection=False
 
     data_handler = DataHandler(config)
     data_handler.loadLogs(log_path)
@@ -97,22 +95,20 @@ def start_model_estimation(config, log_path, data_selection=False, plot=False):
     model.compute_fisher_information()
 
     # Interactive data selection
-    if data_selection:
+    if data_selection=="interactive":
         from visual_dataframe_selector.data_selector import select_visual_data
         model.data_df = select_visual_data(model.data_df,visual_dataframe_selector_config_dict)
         model.n_samples = model.data_df.shape[0]
     # Automatic data selection (WIP)
-    elif auto_data_selection:
+    elif data_selection=="auto":
+        from active_dataframe_selector.data_selector import ActiveDataSelector
         # The goal is to identify automatically the most relevant parts of a log.
         # Currently the draft is designed to choose the most informative 10% of the logs with regards to
         # force and moment parameters. This threshold is currently not validated at all and the percentage
         # can vary drastically from log to log. 
-        idx = model.data_df.sort_values(by=["fisher_information_force"]).index[0:model.data_df.shape[0]*10//100]
-        idx = idx.append(model.data_df.sort_values(by=["fisher_information_rot"]).index[0:model.data_df.shape[0]*10//100])
-        idx = idx.unique()
-        idx = idx.sort_values()
-        model.data_df = model.data_df.loc[idx]
-        model.data_df.reset_index(drop=True)
+
+        data_selector = ActiveDataSelector(model.data_df)
+        model.data_df = data_selector.select_dataframes(10)
         model.n_samples = model.data_df.shape[0]
 
     model.estimate_model()
@@ -129,8 +125,8 @@ def start_model_estimation(config, log_path, data_selection=False, plot=False):
         description='Estimate dynamics model from flight log.')
     parser.add_argument('log_path', metavar='log_path', type=str,
                         help='The path of the log to process relative to the project directory.')
-    parser.add_argument('--data_selection', metavar='data_selection', type=str2bool, default=False,
-                        help='the path of the log to process relative to the project directory.')
+    parser.add_argument('--data_selection', metavar='data_selection', type=str, default="none",
+                        help='Data selection scheme none | interactive | auto (Beta)')
     parser.add_argument('--config', metavar='config', type=str, default='configs/quadrotor_model.yaml',
                         help='Configuration file path for pipeline configurations')
     parser.add_argument('--plot', metavar='plot', type=str2bool, default='True',