-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
115 lines (91 loc) · 4.55 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import numpy as np
import torch
from torch.utils import data
from sklearn.model_selection import KFold
from scripts import dataset, model, plot
LEVEL_VALUES = [
# Data
['PeptideComparison_1', 'PeptideComparison_2', 'PeptideComparison_3', 'PeptideComparison_4', 'PeptideComparison_5', 'TCellNumber_1', 'Activation_1', 'PeptideComparison_6', 'PeptideComparison_7', 'PeptideComparison_8', 'PeptideComparison_9', 'TCellNumber_2', 'TCellNumber_3', 'Activation_2', 'TCellNumber_4', 'Activation_3'],
# T cell counts
['200k', '100k', '80k', '32k', '30k', '16k', '10k', '8k', '3k', '2k'],
# Peptides
['N4', 'Q4', 'T4', 'V4', 'G4', 'E1'], # ['A2', 'Y3', 'A8', 'Q7']
# Concentrations
['1uM', '300nM', '100nM', '30nM', '10nM', '3nM', '1nM', '100pM', '10pM'],
# Time points
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71],
# Cytokines
['IFNg', 'IL-10', 'IL-17A', 'IL-2', 'IL-4', 'IL-6', 'TNFa'],
# Features
['concentration', 'derivative', 'integral']
]
params = {
'max_epochs': 150,
'df': 'all',
'save': True,
'learning_rate': [0.005, 0.001, 0.0005, 0.0001],
}
TIME_SERIES_LENGTH = 64
def MAPE_loss(input: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
"""
Calculates the mean absolute percentage difference between the input and the target.
"""
return torch.mean(torch.abs((target - input) / target))
def train_model(df: dataset.CytokineDataset, kfold: KFold, pred: str = 'series',
criterion: str = None, ckpt_dir: str = 'model'):
print(f'Training on dataset {params["df"]} with {len(df)} points ({torch.seed()})\n')
losses = {}
for i, (train_index, test_index) in enumerate(kfold.split(df)):
print(f'---------------- Fold {i + 1} ----------------')
for lr in params['learning_rate']:
print(f'\tLearning rate : {lr}')
train_loader = data.DataLoader(data.Subset(df, train_index), batch_size=4, shuffle=True)
test_loader = data.DataLoader(data.Subset(df, test_index), batch_size=4, shuffle=True)
nn = model.CytokineModel(input=len(df.antigens), h1=16, h2=4, pred=pred)
if criterion == 'MAPE':
criterion = MAPE_loss
else:
criterion = torch.nn.MSELoss('mean')
optimizer = torch.optim.Adam(nn.parameters(), lr)
losses[lr] = model.train(nn, train_loader, test_loader, criterion, optimizer,
ckpt_dir=ckpt_dir, **params)
plot.plot_loss(losses, (params['df'], i + 1))
if __name__ == '__main__':
import matplotlib.pyplot as plt
# Fix the seed for repeatability in training and testing.
np.random.seed(0)
torch.manual_seed(5667615556836105505)
# Visualize subsets of the dataset after all pre-processing has been performed.
for f in [f'PeptideComparison_{i}' for i in range(7, 10)]:
df = dataset.CytokineDataset(files=[f])
plot.plot_dataset(df, 'IL-17A')
datasets = [f'PeptideComparison_{i}' for i in range(1, 10)]
datasets.extend(['TCellNumber_1', 'TCellNumber_3', 'Activation_1', 'Activation_3',
'HighMI_1-1', 'HighMI_1-2', 'HighMI_1-3', 'HighMI_1-4'])
df = dataset.CytokineDataset(files=datasets, antigens=['V4'])
plot.plot_dataset(df, 'IL-17A')
# Get the k-folds of the dataset.
train_per = 0.7
val_per = 0.15
n_splits = 4
kf = KFold(n_splits=n_splits, shuffle=True, random_state=torch.seed() >> 32)
# Train the models on all available data.
train_model(df, kf, pred='series')
train_model(df, kf, pred='point')
nn = torch.load('model/series/nn-0.0005-all/eph-40.pth')
nn.eval()
with torch.no_grad():
for n in [10, 42, 66, 101, 123, 148, 200]:
x, y, r = df[n]
pred = nn(x)
pred = torch.reshape(pred, (5, TIME_SERIES_LENGTH)).detach().numpy()
r = r.detach().numpy()
for i, c in enumerate(['blue', 'green', 'red', 'orange', 'purple']):
plt.plot(pred.T[:, i], color=c)
plt.plot(r.T[:, i], alpha=0.5, color=c)
plt.title(f'Target and Prediction ({n})')
plt.show()
nn = torch.load('model/point/nn-0.0005-all/eph-80.pth')
preds = model.evaluate(nn, data.DataLoader(df, batch_size=1, shuffle=True))
plot.plot_pred_concentration(preds, df)
print('hello world!')