Skip to content

Commit bd77271

Browse files
committed
1 parent effed79 commit bd77271

33 files changed

+378
-367
lines changed
Binary file not shown.

master/_downloads/25408d8d92a873e370d4750ba414b84a/benchmark_lazy_eager_loading.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
},
6363
"outputs": [],
6464
"source": [
65-
"def load_example_data(preload, window_len_s, n_subjects=10):\n \"\"\"Create windowed dataset from subjects of the TUH Abnormal dataset.\n\n Parameters\n ----------\n preload: bool\n If True, use eager loading, otherwise use lazy loading.\n n_subjects: int\n Number of subjects to load.\n\n Returns\n -------\n windows_ds: BaseConcatDataset\n Windowed data.\n\n .. warning::\n The recordings from the TUH Abnormal corpus do not all share the same\n sampling rate. The following assumes that the files have already been\n resampled to a common sampling rate.\n \"\"\"\n subject_ids = list(range(n_subjects))\n ds = TUHAbnormal(\n TUH_PATH, subject_ids=subject_ids, target_name='pathological',\n preload=preload)\n\n fs = ds.datasets[0].raw.info['sfreq']\n window_len_samples = int(fs * window_len_s)\n window_stride_samples = int(fs * 4)\n # window_stride_samples = int(fs * window_len_s)\n windows_ds = create_fixed_length_windows(\n ds, start_offset_samples=0, stop_offset_samples=None,\n window_size_samples=window_len_samples,\n window_stride_samples=window_stride_samples, drop_last_window=True,\n preload=preload, drop_bad_windows=True)\n\n # Drop bad epochs\n # XXX: This could be parallelized.\n # XXX: Also, this could be implemented in the Dataset object itself.\n for ds in windows_ds.datasets:\n ds.windows.drop_bad()\n assert ds.windows.preload == preload\n\n return windows_ds\n\n\ndef create_example_model(n_channels, n_classes, window_len_samples,\n kind='shallow', cuda=False):\n \"\"\"Create model, loss and optimizer.\n\n Parameters\n ----------\n n_channels : int\n Number of channels in the input\n n_times : int\n Window length in the input\n n_classes : int\n Number of classes in the output\n kind : str\n 'shallow' or 'deep'\n cuda : bool\n If True, move the model to a CUDA device.\n\n Returns\n -------\n model : torch.nn.Module\n Model to train.\n loss :\n Loss function\n optimizer :\n Optimizer\n \"\"\"\n if kind == 'shallow':\n model = ShallowFBCSPNet(\n n_channels, n_classes, input_window_samples=window_len_samples,\n n_filters_time=40, filter_time_length=25, n_filters_spat=40,\n pool_time_length=75, pool_time_stride=15, final_conv_length='auto',\n split_first_layer=True, batch_norm=True, batch_norm_alpha=0.1,\n drop_prob=0.5)\n elif kind == 'deep':\n model = Deep4Net(\n n_channels, n_classes, input_window_samples=window_len_samples,\n final_conv_length='auto', n_filters_time=25, n_filters_spat=25,\n filter_time_length=10, pool_time_length=3, pool_time_stride=3,\n n_filters_2=50, filter_length_2=10, n_filters_3=100,\n filter_length_3=10, n_filters_4=200, filter_length_4=10,\n first_pool_mode=\"max\", later_pool_mode=\"max\", drop_prob=0.5,\n double_time_convs=False, split_first_layer=True, batch_norm=True,\n batch_norm_alpha=0.1, stride_before_pool=False)\n else:\n raise ValueError\n\n if cuda:\n model.cuda()\n\n optimizer = optim.Adam(model.parameters())\n loss = nn.NLLLoss()\n\n return model, loss, optimizer\n\n\ndef run_training(model, dataloader, loss, optimizer, n_epochs=1, cuda=False):\n \"\"\"Run training loop.\n\n Parameters\n ----------\n model : torch.nn.Module\n Model to train.\n dataloader : torch.utils.data.Dataloader\n Data loader which will serve examples to the model during training.\n loss :\n Loss function.\n optimizer :\n Optimizer.\n n_epochs : int\n Number of epochs to train the model for.\n cuda : bool\n If True, move X and y to CUDA device.\n\n Returns\n -------\n model : torch.nn.Module\n Trained model.\n \"\"\"\n for i in range(n_epochs):\n loss_vals = list()\n for X, y, _ in dataloader:\n model.train()\n model.zero_grad()\n\n y = y.long()\n if cuda:\n X, y = X.cuda(), y.cuda()\n\n loss_val = loss(model(X), y)\n loss_vals.append(loss_val.item())\n\n loss_val.backward()\n optimizer.step()\n\n print(f'Epoch {i + 1} - mean training loss: {np.mean(loss_vals)}')\n\n return model"
65+
"def load_example_data(preload, window_len_s, n_recordings=10):\n \"\"\"Create windowed dataset from subjects of the TUH Abnormal dataset.\n\n Parameters\n ----------\n preload: bool\n If True, use eager loading, otherwise use lazy loading.\n window_len_s: int\n Window length in seconds.\n n_recordings: list of int\n Number of recordings to load.\n\n Returns\n -------\n windows_ds: BaseConcatDataset\n Windowed data.\n\n .. warning::\n The recordings from the TUH Abnormal corpus do not all share the same\n sampling rate. The following assumes that the files have already been\n resampled to a common sampling rate.\n \"\"\"\n\n recording_ids = list(range(n_recordings))\n\n ds = TUHAbnormal(\n TUH_PATH, recording_ids=recording_ids,\n target_name='pathological',\n preload=preload)\n\n fs = ds.datasets[0].raw.info['sfreq']\n window_len_samples = int(fs * window_len_s)\n window_stride_samples = int(fs * 4)\n # window_stride_samples = int(fs * window_len_s)\n windows_ds = create_fixed_length_windows(\n ds, start_offset_samples=0, stop_offset_samples=None,\n window_size_samples=window_len_samples,\n window_stride_samples=window_stride_samples, drop_last_window=True,\n preload=preload, drop_bad_windows=True)\n\n # Drop bad epochs\n # XXX: This could be parallelized.\n # XXX: Also, this could be implemented in the Dataset object itself.\n for ds in windows_ds.datasets:\n ds.windows.drop_bad()\n assert ds.windows.preload == preload\n\n return windows_ds\n\n\ndef create_example_model(n_channels, n_classes, window_len_samples,\n kind='shallow', cuda=False):\n \"\"\"Create model, loss and optimizer.\n\n Parameters\n ----------\n n_channels : int\n Number of channels in the input\n n_times : int\n Window length in the input\n n_classes : int\n Number of classes in the output\n kind : str\n 'shallow' or 'deep'\n cuda : bool\n If True, move the model to a CUDA device.\n\n Returns\n -------\n model : torch.nn.Module\n Model to train.\n loss :\n Loss function\n optimizer :\n Optimizer\n \"\"\"\n if kind == 'shallow':\n model = ShallowFBCSPNet(\n n_channels, n_classes, input_window_samples=window_len_samples,\n n_filters_time=40, filter_time_length=25, n_filters_spat=40,\n pool_time_length=75, pool_time_stride=15, final_conv_length='auto',\n split_first_layer=True, batch_norm=True, batch_norm_alpha=0.1,\n drop_prob=0.5)\n elif kind == 'deep':\n model = Deep4Net(\n n_channels, n_classes, input_window_samples=window_len_samples,\n final_conv_length='auto', n_filters_time=25, n_filters_spat=25,\n filter_time_length=10, pool_time_length=3, pool_time_stride=3,\n n_filters_2=50, filter_length_2=10, n_filters_3=100,\n filter_length_3=10, n_filters_4=200, filter_length_4=10,\n first_pool_mode=\"max\", later_pool_mode=\"max\", drop_prob=0.5,\n double_time_convs=False, split_first_layer=True, batch_norm=True,\n batch_norm_alpha=0.1, stride_before_pool=False)\n else:\n raise ValueError\n\n if cuda:\n model.cuda()\n\n optimizer = optim.Adam(model.parameters())\n loss = nn.NLLLoss()\n\n return model, loss, optimizer\n\n\ndef run_training(model, dataloader, loss, optimizer, n_epochs=1, cuda=False):\n \"\"\"Run training loop.\n\n Parameters\n ----------\n model : torch.nn.Module\n Model to train.\n dataloader : torch.utils.data.Dataloader\n Data loader which will serve examples to the model during training.\n loss :\n Loss function.\n optimizer :\n Optimizer.\n n_epochs : int\n Number of epochs to train the model for.\n cuda : bool\n If True, move X and y to CUDA device.\n\n Returns\n -------\n model : torch.nn.Module\n Trained model.\n \"\"\"\n for i in range(n_epochs):\n loss_vals = list()\n for X, y, _ in dataloader:\n model.train()\n model.zero_grad()\n\n y = y.long()\n if cuda:\n X, y = X.cuda(), y.cuda()\n\n loss_val = loss(model(X), y)\n loss_vals.append(loss_val.item())\n\n loss_val.backward()\n optimizer.step()\n\n print(f'Epoch {i + 1} - mean training loss: {np.mean(loss_vals)}')\n\n return model"
6666
]
6767
},
6868
{
@@ -80,7 +80,7 @@
8080
},
8181
"outputs": [],
8282
"source": [
83-
"PRELOAD = [True, False] # True -> eager loading; False -> lazy loading\nN_SUBJECTS = [10] # Number of recordings to load from the TUH Abnormal corpus\nWINDOW_LEN_S = [2, 4, 15] # Window length, in seconds\nN_EPOCHS = [2] # Number of epochs to train the model for\nBATCH_SIZE = [64, 256] # Training minibatch size\nMODEL = ['shallow', 'deep']\n\nNUM_WORKERS = [8, 0] # number of processes used by pytorch's Dataloader\nPIN_MEMORY = [False] # whether to use pinned memory\nCUDA = [True, False] if torch.cuda.is_available() else [False] # whether to use a CUDA device\n\nN_REPETITIONS = 3 # Number of times to repeat the experiment (to get better time estimates)"
83+
"PRELOAD = [True, False] # True -> eager loading; False -> lazy loading\nN_RECORDINGS = [10] # Number of recordings to load from the TUH Abnormal corpus\nWINDOW_LEN_S = [2, 4, 15] # Window length, in seconds\nN_EPOCHS = [2] # Number of epochs to train the model for\nBATCH_SIZE = [64, 256] # Training minibatch size\nMODEL = ['shallow', 'deep']\n\nNUM_WORKERS = [8, 0] # number of processes used by pytorch's Dataloader\nPIN_MEMORY = [False] # whether to use pinned memory\nCUDA = [True, False] if torch.cuda.is_available() else [False] # whether to use a CUDA device\n\nN_REPETITIONS = 3 # Number of times to repeat the experiment (to get better time estimates)"
8484
]
8585
},
8686
{
@@ -116,7 +116,7 @@
116116
},
117117
"outputs": [],
118118
"source": [
119-
"all_results = list()\nfor (i, preload, n_subjects, win_len_s, n_epochs, batch_size, model_kind,\n num_workers, pin_memory, cuda) in product(\n range(N_REPETITIONS), PRELOAD, N_SUBJECTS, WINDOW_LEN_S, N_EPOCHS,\n BATCH_SIZE, MODEL, NUM_WORKERS, PIN_MEMORY, CUDA):\n\n results = {\n 'repetition': i,\n 'preload': preload,\n 'n_subjects': n_subjects,\n 'win_len_s': win_len_s,\n 'n_epochs': n_epochs,\n 'batch_size': batch_size,\n 'model_kind': model_kind,\n 'num_workers': num_workers,\n 'pin_memory': pin_memory,\n 'cuda': cuda\n }\n print(f'\\nRepetition {i + 1}/{N_REPETITIONS}:\\n{results}')\n\n # Load the dataset\n data_loading_start = time.time()\n dataset = load_example_data(preload, win_len_s, n_subjects=n_subjects)\n data_loading_end = time.time()\n\n # Create the data loader\n training_setup_start = time.time()\n dataloader = DataLoader(\n dataset, batch_size=batch_size, shuffle=False, pin_memory=pin_memory,\n num_workers=num_workers, worker_init_fn=None)\n\n # Instantiate model and optimizer\n n_channels = len(dataset.datasets[0].windows.ch_names)\n n_times = len(dataset.datasets[0].windows.times)\n n_classes = 2\n model, loss, optimizer = create_example_model(\n n_channels, n_classes, n_times, kind=model_kind, cuda=cuda)\n training_setup_end = time.time()\n\n # Start training loop\n model_training_start = time.time()\n trained_model = run_training(\n model, dataloader, loss, optimizer, n_epochs=n_epochs, cuda=cuda)\n model_training_end = time.time()\n\n del dataset, model, loss, optimizer, trained_model\n\n # Record timing results\n results['data_preparation'] = data_loading_end - data_loading_start\n results['training_setup'] = training_setup_end - training_setup_start\n results['model_training'] = model_training_end - model_training_start\n all_results.append(results)"
119+
"all_results = list()\nfor (i, preload, n_recordings, win_len_s, n_epochs, batch_size, model_kind,\n num_workers, pin_memory, cuda) in product(\n range(N_REPETITIONS), PRELOAD, N_RECORDINGS, WINDOW_LEN_S, N_EPOCHS,\n BATCH_SIZE, MODEL, NUM_WORKERS, PIN_MEMORY, CUDA):\n\n results = {\n 'repetition': i,\n 'preload': preload,\n 'n_recordings': n_recordings,\n 'win_len_s': win_len_s,\n 'n_epochs': n_epochs,\n 'batch_size': batch_size,\n 'model_kind': model_kind,\n 'num_workers': num_workers,\n 'pin_memory': pin_memory,\n 'cuda': cuda\n }\n print(f'\\nRepetition {i + 1}/{N_REPETITIONS}:\\n{results}')\n\n # Load the dataset\n data_loading_start = time.time()\n dataset = load_example_data(preload, win_len_s, n_recordings=n_recordings)\n data_loading_end = time.time()\n\n # Create the data loader\n training_setup_start = time.time()\n dataloader = DataLoader(\n dataset, batch_size=batch_size, shuffle=False, pin_memory=pin_memory,\n num_workers=num_workers, worker_init_fn=None)\n\n # Instantiate model and optimizer\n n_channels = len(dataset.datasets[0].windows.ch_names)\n n_times = len(dataset.datasets[0].windows.times)\n n_classes = 2\n model, loss, optimizer = create_example_model(\n n_channels, n_classes, n_times, kind=model_kind, cuda=cuda)\n training_setup_end = time.time()\n\n # Start training loop\n model_training_start = time.time()\n trained_model = run_training(\n model, dataloader, loss, optimizer, n_epochs=n_epochs, cuda=cuda)\n model_training_end = time.time()\n\n del dataset, model, loss, optimizer, trained_model\n\n # Record timing results\n results['data_preparation'] = data_loading_end - data_loading_start\n results['training_setup'] = training_setup_end - training_setup_start\n results['model_training'] = model_training_end - model_training_start\n all_results.append(results)"
120120
]
121121
},
122122
{

master/_downloads/2a53ed6bee60a29af3f760043d35ed09/benchmark_lazy_eager_loading.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -68,15 +68,17 @@
6868
# Each one of these steps will be timed, so we can report the total time taken
6969
# to prepare the data and train the model.
7070

71-
def load_example_data(preload, window_len_s, n_subjects=10):
71+
def load_example_data(preload, window_len_s, n_recordings=10):
7272
"""Create windowed dataset from subjects of the TUH Abnormal dataset.
7373
7474
Parameters
7575
----------
7676
preload: bool
7777
If True, use eager loading, otherwise use lazy loading.
78-
n_subjects: int
79-
Number of subjects to load.
78+
window_len_s: int
79+
Window length in seconds.
80+
n_recordings: list of int
81+
Number of recordings to load.
8082
8183
Returns
8284
-------
@@ -88,9 +90,12 @@ def load_example_data(preload, window_len_s, n_subjects=10):
8890
sampling rate. The following assumes that the files have already been
8991
resampled to a common sampling rate.
9092
"""
91-
subject_ids = list(range(n_subjects))
93+
94+
recording_ids = list(range(n_recordings))
95+
9296
ds = TUHAbnormal(
93-
TUH_PATH, subject_ids=subject_ids, target_name='pathological',
97+
TUH_PATH, recording_ids=recording_ids,
98+
target_name='pathological',
9499
preload=preload)
95100

96101
fs = ds.datasets[0].raw.info['sfreq']
@@ -216,7 +221,7 @@ def run_training(model, dataloader, loss, optimizer, n_epochs=1, cuda=False):
216221
# Next, we define the different hyperparameters that we want to compare:
217222

218223
PRELOAD = [True, False] # True -> eager loading; False -> lazy loading
219-
N_SUBJECTS = [10] # Number of recordings to load from the TUH Abnormal corpus
224+
N_RECORDINGS = [10] # Number of recordings to load from the TUH Abnormal corpus
220225
WINDOW_LEN_S = [2, 4, 15] # Window length, in seconds
221226
N_EPOCHS = [2] # Number of epochs to train the model for
222227
BATCH_SIZE = [64, 256] # Training minibatch size
@@ -239,15 +244,15 @@ def run_training(model, dataloader, loss, optimizer, n_epochs=1, cuda=False):
239244
# we set above to evaluate their execution time:
240245

241246
all_results = list()
242-
for (i, preload, n_subjects, win_len_s, n_epochs, batch_size, model_kind,
247+
for (i, preload, n_recordings, win_len_s, n_epochs, batch_size, model_kind,
243248
num_workers, pin_memory, cuda) in product(
244-
range(N_REPETITIONS), PRELOAD, N_SUBJECTS, WINDOW_LEN_S, N_EPOCHS,
249+
range(N_REPETITIONS), PRELOAD, N_RECORDINGS, WINDOW_LEN_S, N_EPOCHS,
245250
BATCH_SIZE, MODEL, NUM_WORKERS, PIN_MEMORY, CUDA):
246251

247252
results = {
248253
'repetition': i,
249254
'preload': preload,
250-
'n_subjects': n_subjects,
255+
'n_recordings': n_recordings,
251256
'win_len_s': win_len_s,
252257
'n_epochs': n_epochs,
253258
'batch_size': batch_size,
@@ -260,7 +265,7 @@ def run_training(model, dataloader, loss, optimizer, n_epochs=1, cuda=False):
260265

261266
# Load the dataset
262267
data_loading_start = time.time()
263-
dataset = load_example_data(preload, win_len_s, n_subjects=n_subjects)
268+
dataset = load_example_data(preload, win_len_s, n_recordings=n_recordings)
264269
data_loading_end = time.time()
265270

266271
# Create the data loader
Binary file not shown.
Loading
Loading
Loading
Loading

0 commit comments

Comments
 (0)