|
62 | 62 | },
|
63 | 63 | "outputs": [],
|
64 | 64 | "source": [
|
65 |
| - "def load_example_data(preload, window_len_s, n_subjects=10):\n \"\"\"Create windowed dataset from subjects of the TUH Abnormal dataset.\n\n Parameters\n ----------\n preload: bool\n If True, use eager loading, otherwise use lazy loading.\n n_subjects: int\n Number of subjects to load.\n\n Returns\n -------\n windows_ds: BaseConcatDataset\n Windowed data.\n\n .. warning::\n The recordings from the TUH Abnormal corpus do not all share the same\n sampling rate. The following assumes that the files have already been\n resampled to a common sampling rate.\n \"\"\"\n subject_ids = list(range(n_subjects))\n ds = TUHAbnormal(\n TUH_PATH, subject_ids=subject_ids, target_name='pathological',\n preload=preload)\n\n fs = ds.datasets[0].raw.info['sfreq']\n window_len_samples = int(fs * window_len_s)\n window_stride_samples = int(fs * 4)\n # window_stride_samples = int(fs * window_len_s)\n windows_ds = create_fixed_length_windows(\n ds, start_offset_samples=0, stop_offset_samples=None,\n window_size_samples=window_len_samples,\n window_stride_samples=window_stride_samples, drop_last_window=True,\n preload=preload, drop_bad_windows=True)\n\n # Drop bad epochs\n # XXX: This could be parallelized.\n # XXX: Also, this could be implemented in the Dataset object itself.\n for ds in windows_ds.datasets:\n ds.windows.drop_bad()\n assert ds.windows.preload == preload\n\n return windows_ds\n\n\ndef create_example_model(n_channels, n_classes, window_len_samples,\n kind='shallow', cuda=False):\n \"\"\"Create model, loss and optimizer.\n\n Parameters\n ----------\n n_channels : int\n Number of channels in the input\n n_times : int\n Window length in the input\n n_classes : int\n Number of classes in the output\n kind : str\n 'shallow' or 'deep'\n cuda : bool\n If True, move the model to a CUDA device.\n\n Returns\n -------\n model : torch.nn.Module\n Model to train.\n loss :\n Loss function\n optimizer :\n Optimizer\n \"\"\"\n if kind == 'shallow':\n model = ShallowFBCSPNet(\n n_channels, n_classes, input_window_samples=window_len_samples,\n n_filters_time=40, filter_time_length=25, n_filters_spat=40,\n pool_time_length=75, pool_time_stride=15, final_conv_length='auto',\n split_first_layer=True, batch_norm=True, batch_norm_alpha=0.1,\n drop_prob=0.5)\n elif kind == 'deep':\n model = Deep4Net(\n n_channels, n_classes, input_window_samples=window_len_samples,\n final_conv_length='auto', n_filters_time=25, n_filters_spat=25,\n filter_time_length=10, pool_time_length=3, pool_time_stride=3,\n n_filters_2=50, filter_length_2=10, n_filters_3=100,\n filter_length_3=10, n_filters_4=200, filter_length_4=10,\n first_pool_mode=\"max\", later_pool_mode=\"max\", drop_prob=0.5,\n double_time_convs=False, split_first_layer=True, batch_norm=True,\n batch_norm_alpha=0.1, stride_before_pool=False)\n else:\n raise ValueError\n\n if cuda:\n model.cuda()\n\n optimizer = optim.Adam(model.parameters())\n loss = nn.NLLLoss()\n\n return model, loss, optimizer\n\n\ndef run_training(model, dataloader, loss, optimizer, n_epochs=1, cuda=False):\n \"\"\"Run training loop.\n\n Parameters\n ----------\n model : torch.nn.Module\n Model to train.\n dataloader : torch.utils.data.Dataloader\n Data loader which will serve examples to the model during training.\n loss :\n Loss function.\n optimizer :\n Optimizer.\n n_epochs : int\n Number of epochs to train the model for.\n cuda : bool\n If True, move X and y to CUDA device.\n\n Returns\n -------\n model : torch.nn.Module\n Trained model.\n \"\"\"\n for i in range(n_epochs):\n loss_vals = list()\n for X, y, _ in dataloader:\n model.train()\n model.zero_grad()\n\n y = y.long()\n if cuda:\n X, y = X.cuda(), y.cuda()\n\n loss_val = loss(model(X), y)\n loss_vals.append(loss_val.item())\n\n loss_val.backward()\n optimizer.step()\n\n print(f'Epoch {i + 1} - mean training loss: {np.mean(loss_vals)}')\n\n return model" |
| 65 | + "def load_example_data(preload, window_len_s, n_recordings=10):\n \"\"\"Create windowed dataset from subjects of the TUH Abnormal dataset.\n\n Parameters\n ----------\n preload: bool\n If True, use eager loading, otherwise use lazy loading.\n window_len_s: int\n Window length in seconds.\n n_recordings: list of int\n Number of recordings to load.\n\n Returns\n -------\n windows_ds: BaseConcatDataset\n Windowed data.\n\n .. warning::\n The recordings from the TUH Abnormal corpus do not all share the same\n sampling rate. The following assumes that the files have already been\n resampled to a common sampling rate.\n \"\"\"\n\n recording_ids = list(range(n_recordings))\n\n ds = TUHAbnormal(\n TUH_PATH, recording_ids=recording_ids,\n target_name='pathological',\n preload=preload)\n\n fs = ds.datasets[0].raw.info['sfreq']\n window_len_samples = int(fs * window_len_s)\n window_stride_samples = int(fs * 4)\n # window_stride_samples = int(fs * window_len_s)\n windows_ds = create_fixed_length_windows(\n ds, start_offset_samples=0, stop_offset_samples=None,\n window_size_samples=window_len_samples,\n window_stride_samples=window_stride_samples, drop_last_window=True,\n preload=preload, drop_bad_windows=True)\n\n # Drop bad epochs\n # XXX: This could be parallelized.\n # XXX: Also, this could be implemented in the Dataset object itself.\n for ds in windows_ds.datasets:\n ds.windows.drop_bad()\n assert ds.windows.preload == preload\n\n return windows_ds\n\n\ndef create_example_model(n_channels, n_classes, window_len_samples,\n kind='shallow', cuda=False):\n \"\"\"Create model, loss and optimizer.\n\n Parameters\n ----------\n n_channels : int\n Number of channels in the input\n n_times : int\n Window length in the input\n n_classes : int\n Number of classes in the output\n kind : str\n 'shallow' or 'deep'\n cuda : bool\n If True, move the model to a CUDA device.\n\n Returns\n -------\n model : torch.nn.Module\n Model to train.\n loss :\n Loss function\n optimizer :\n Optimizer\n \"\"\"\n if kind == 'shallow':\n model = ShallowFBCSPNet(\n n_channels, n_classes, input_window_samples=window_len_samples,\n n_filters_time=40, filter_time_length=25, n_filters_spat=40,\n pool_time_length=75, pool_time_stride=15, final_conv_length='auto',\n split_first_layer=True, batch_norm=True, batch_norm_alpha=0.1,\n drop_prob=0.5)\n elif kind == 'deep':\n model = Deep4Net(\n n_channels, n_classes, input_window_samples=window_len_samples,\n final_conv_length='auto', n_filters_time=25, n_filters_spat=25,\n filter_time_length=10, pool_time_length=3, pool_time_stride=3,\n n_filters_2=50, filter_length_2=10, n_filters_3=100,\n filter_length_3=10, n_filters_4=200, filter_length_4=10,\n first_pool_mode=\"max\", later_pool_mode=\"max\", drop_prob=0.5,\n double_time_convs=False, split_first_layer=True, batch_norm=True,\n batch_norm_alpha=0.1, stride_before_pool=False)\n else:\n raise ValueError\n\n if cuda:\n model.cuda()\n\n optimizer = optim.Adam(model.parameters())\n loss = nn.NLLLoss()\n\n return model, loss, optimizer\n\n\ndef run_training(model, dataloader, loss, optimizer, n_epochs=1, cuda=False):\n \"\"\"Run training loop.\n\n Parameters\n ----------\n model : torch.nn.Module\n Model to train.\n dataloader : torch.utils.data.Dataloader\n Data loader which will serve examples to the model during training.\n loss :\n Loss function.\n optimizer :\n Optimizer.\n n_epochs : int\n Number of epochs to train the model for.\n cuda : bool\n If True, move X and y to CUDA device.\n\n Returns\n -------\n model : torch.nn.Module\n Trained model.\n \"\"\"\n for i in range(n_epochs):\n loss_vals = list()\n for X, y, _ in dataloader:\n model.train()\n model.zero_grad()\n\n y = y.long()\n if cuda:\n X, y = X.cuda(), y.cuda()\n\n loss_val = loss(model(X), y)\n loss_vals.append(loss_val.item())\n\n loss_val.backward()\n optimizer.step()\n\n print(f'Epoch {i + 1} - mean training loss: {np.mean(loss_vals)}')\n\n return model" |
66 | 66 | ]
|
67 | 67 | },
|
68 | 68 | {
|
|
80 | 80 | },
|
81 | 81 | "outputs": [],
|
82 | 82 | "source": [
|
83 |
| - "PRELOAD = [True, False] # True -> eager loading; False -> lazy loading\nN_SUBJECTS = [10] # Number of recordings to load from the TUH Abnormal corpus\nWINDOW_LEN_S = [2, 4, 15] # Window length, in seconds\nN_EPOCHS = [2] # Number of epochs to train the model for\nBATCH_SIZE = [64, 256] # Training minibatch size\nMODEL = ['shallow', 'deep']\n\nNUM_WORKERS = [8, 0] # number of processes used by pytorch's Dataloader\nPIN_MEMORY = [False] # whether to use pinned memory\nCUDA = [True, False] if torch.cuda.is_available() else [False] # whether to use a CUDA device\n\nN_REPETITIONS = 3 # Number of times to repeat the experiment (to get better time estimates)" |
| 83 | + "PRELOAD = [True, False] # True -> eager loading; False -> lazy loading\nN_RECORDINGS = [10] # Number of recordings to load from the TUH Abnormal corpus\nWINDOW_LEN_S = [2, 4, 15] # Window length, in seconds\nN_EPOCHS = [2] # Number of epochs to train the model for\nBATCH_SIZE = [64, 256] # Training minibatch size\nMODEL = ['shallow', 'deep']\n\nNUM_WORKERS = [8, 0] # number of processes used by pytorch's Dataloader\nPIN_MEMORY = [False] # whether to use pinned memory\nCUDA = [True, False] if torch.cuda.is_available() else [False] # whether to use a CUDA device\n\nN_REPETITIONS = 3 # Number of times to repeat the experiment (to get better time estimates)" |
84 | 84 | ]
|
85 | 85 | },
|
86 | 86 | {
|
|
116 | 116 | },
|
117 | 117 | "outputs": [],
|
118 | 118 | "source": [
|
119 |
| - "all_results = list()\nfor (i, preload, n_subjects, win_len_s, n_epochs, batch_size, model_kind,\n num_workers, pin_memory, cuda) in product(\n range(N_REPETITIONS), PRELOAD, N_SUBJECTS, WINDOW_LEN_S, N_EPOCHS,\n BATCH_SIZE, MODEL, NUM_WORKERS, PIN_MEMORY, CUDA):\n\n results = {\n 'repetition': i,\n 'preload': preload,\n 'n_subjects': n_subjects,\n 'win_len_s': win_len_s,\n 'n_epochs': n_epochs,\n 'batch_size': batch_size,\n 'model_kind': model_kind,\n 'num_workers': num_workers,\n 'pin_memory': pin_memory,\n 'cuda': cuda\n }\n print(f'\\nRepetition {i + 1}/{N_REPETITIONS}:\\n{results}')\n\n # Load the dataset\n data_loading_start = time.time()\n dataset = load_example_data(preload, win_len_s, n_subjects=n_subjects)\n data_loading_end = time.time()\n\n # Create the data loader\n training_setup_start = time.time()\n dataloader = DataLoader(\n dataset, batch_size=batch_size, shuffle=False, pin_memory=pin_memory,\n num_workers=num_workers, worker_init_fn=None)\n\n # Instantiate model and optimizer\n n_channels = len(dataset.datasets[0].windows.ch_names)\n n_times = len(dataset.datasets[0].windows.times)\n n_classes = 2\n model, loss, optimizer = create_example_model(\n n_channels, n_classes, n_times, kind=model_kind, cuda=cuda)\n training_setup_end = time.time()\n\n # Start training loop\n model_training_start = time.time()\n trained_model = run_training(\n model, dataloader, loss, optimizer, n_epochs=n_epochs, cuda=cuda)\n model_training_end = time.time()\n\n del dataset, model, loss, optimizer, trained_model\n\n # Record timing results\n results['data_preparation'] = data_loading_end - data_loading_start\n results['training_setup'] = training_setup_end - training_setup_start\n results['model_training'] = model_training_end - model_training_start\n all_results.append(results)" |
| 119 | + "all_results = list()\nfor (i, preload, n_recordings, win_len_s, n_epochs, batch_size, model_kind,\n num_workers, pin_memory, cuda) in product(\n range(N_REPETITIONS), PRELOAD, N_RECORDINGS, WINDOW_LEN_S, N_EPOCHS,\n BATCH_SIZE, MODEL, NUM_WORKERS, PIN_MEMORY, CUDA):\n\n results = {\n 'repetition': i,\n 'preload': preload,\n 'n_recordings': n_recordings,\n 'win_len_s': win_len_s,\n 'n_epochs': n_epochs,\n 'batch_size': batch_size,\n 'model_kind': model_kind,\n 'num_workers': num_workers,\n 'pin_memory': pin_memory,\n 'cuda': cuda\n }\n print(f'\\nRepetition {i + 1}/{N_REPETITIONS}:\\n{results}')\n\n # Load the dataset\n data_loading_start = time.time()\n dataset = load_example_data(preload, win_len_s, n_recordings=n_recordings)\n data_loading_end = time.time()\n\n # Create the data loader\n training_setup_start = time.time()\n dataloader = DataLoader(\n dataset, batch_size=batch_size, shuffle=False, pin_memory=pin_memory,\n num_workers=num_workers, worker_init_fn=None)\n\n # Instantiate model and optimizer\n n_channels = len(dataset.datasets[0].windows.ch_names)\n n_times = len(dataset.datasets[0].windows.times)\n n_classes = 2\n model, loss, optimizer = create_example_model(\n n_channels, n_classes, n_times, kind=model_kind, cuda=cuda)\n training_setup_end = time.time()\n\n # Start training loop\n model_training_start = time.time()\n trained_model = run_training(\n model, dataloader, loss, optimizer, n_epochs=n_epochs, cuda=cuda)\n model_training_end = time.time()\n\n del dataset, model, loss, optimizer, trained_model\n\n # Record timing results\n results['data_preparation'] = data_loading_end - data_loading_start\n results['training_setup'] = training_setup_end - training_setup_start\n results['model_training'] = model_training_end - model_training_start\n all_results.append(results)" |
120 | 120 | ]
|
121 | 121 | },
|
122 | 122 | {
|
|
0 commit comments