Skip to content

Commit bb1ccc2

Browse files
authored
✨ Improve week 02 seminar and return old hometask (#25)
1 parent eb7c257 commit bb1ccc2

19 files changed

+3211
-0
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Week 2: Experiment tracking and testing
2+
3+
[//]: # (* Lecture: [slides](./lecture.pdf))
4+
* Lecture: TBD
5+
* Seminar: see the [example_project](./example_project) directory
6+
* Homework: see [homework/README.md](homework/README.md)
7+
8+
## Further reading
9+
* Tools for experiment tracking: [Aim](https://github.com/aimhubio/aim), [Comet](https://www.comet.ml/site/), [Neptune](https://neptune.ai/), [Sacred](https://github.com/IDSIA/sacred), [Weights and Biases](https://wandb.ai/), [ClearML](https://clear.ml/)
10+
* [DVC](https://dvc.org/) and [Pachyderm](https://www.pachyderm.com/) for artifact versioning
11+
* [Hydra documentation](https://hydra.cc/docs/intro/)
12+
* [Unittest](https://docs.python.org/3/library/unittest.html) built-in module
13+
* [Doctest](https://docs.python.org/3/library/doctest.html) built-in module (useful for testing docstrings!)
14+
* [Pytest](https://github.com/pytest-dev/pytest/) repository
15+
* Pytest plugins: [pytest-xdist](https://pypi.org/project/pytest-xdist/) for parallel execution, [pytest-cov](https://pytest-cov.readthedocs.io/en/latest/readme.html) for coverage reports.
16+
* [Hypothesis quick start guide](https://hypothesis.readthedocs.io/en/latest/quickstart.html) and [integration with pytest](https://hypothesis.readthedocs.io/en/latest/details.html#the-hypothesis-pytest-plugin)
17+
* [Full Stack Deep Learning "Troubleshooting & Testing" lecture](https://fullstackdeeplearning.com/course/2022/lecture-3-troubleshooting-and-testing/#4-resources)
18+
* [Made With ML MLOps Course, "Testing Machine Learning Systems: Code, Data and Models"](https://madewithml.com/courses/mlops/testing/)
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import json
2+
from argparse import ArgumentParser
3+
4+
import torch
5+
import torchvision.transforms as transforms
6+
from torchvision.datasets import CIFAR10
7+
from torchvision.models import resnet18
8+
9+
from hparams import config
10+
11+
12+
def main(args):
13+
transform = transforms.Compose([
14+
transforms.ToTensor(),
15+
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
16+
])
17+
18+
test_dataset = CIFAR10(root='CIFAR10/test',
19+
train=False,
20+
transform=transform,
21+
download=False,
22+
)
23+
24+
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
25+
batch_size=config["batch_size"])
26+
27+
device = torch.device("cuda")
28+
29+
model = resnet18(pretrained=False, num_classes=10)
30+
model.load_state_dict(torch.load("model.pt"))
31+
model.to(device)
32+
33+
correct = 0.0
34+
35+
for test_images, test_labels in test_loader:
36+
test_images = test_images.to(device)
37+
test_labels = test_labels.to(device)
38+
39+
with torch.inference_mode():
40+
outputs = model(test_images)
41+
preds = torch.argmax(outputs, 1)
42+
correct += (preds == test_labels).sum()
43+
44+
accuracy = correct / len(test_dataset)
45+
46+
with open("final_metrics.json", "w+") as f:
47+
json.dump({"accuracy": accuracy.item()}, f)
48+
print("\n", file=f)
49+
50+
51+
if __name__ == '__main__':
52+
parser = ArgumentParser()
53+
args = parser.parse_args()
54+
main(args)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
stages:
2+
prepare_data:
3+
cmd: python prepare_data.py
4+
deps:
5+
- prepare_data.py
6+
outs:
7+
- CIFAR10
8+
train:
9+
cmd: python train.py
10+
deps:
11+
- CIFAR10
12+
- hparams.py
13+
- train.py
14+
outs:
15+
- model.pt
16+
compute_metrics:
17+
cmd: python compute_metrics.py
18+
deps:
19+
- CIFAR10
20+
- compute_metrics.py
21+
- model.pt
22+
metrics:
23+
- final_metrics.json:
24+
cache: false
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
config = dict(
2+
batch_size=64,
3+
learning_rate=1e-5,
4+
weight_decay=0.01,
5+
epochs=2,
6+
zero_init_residual=False,
7+
)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from torchvision.datasets import CIFAR10
2+
3+
if __name__ == "__main__":
4+
train_dataset = CIFAR10("CIFAR10/train", download=True)
5+
test_dataset = CIFAR10("CIFAR10/test", download=True)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
[project]
2+
name = "homework"
3+
version = "0.1.0"
4+
description = "Sample Text"
5+
authors = [ "YZ <[email protected]>" ]
6+
requires-python = ">=3.10"
7+
readme = "README.md"
8+
9+
dependencies = [
10+
"torch==2.1.2",
11+
"torchvision==0.16.2",
12+
"wandb>=0.13.10",
13+
"tqdm==4.66.1",
14+
"numpy==1.26.4",
15+
"dvc==2.44.0",
16+
"hydra-core==1.3.1",
17+
"omegaconf==2.3.0",
18+
]
19+
20+
[tool.uv]
21+
dev-dependencies = [
22+
"pytest==7.4.4",
23+
"pytest-cov==4.1.0",
24+
]
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
import torch
2+
import pytest
3+
4+
from train import compute_accuracy
5+
6+
def test_arange_elems():
7+
arr = torch.arange(0, 10, dtype=torch.float)
8+
assert torch.allclose(arr[-1], torch.tensor([9]).float()) #
9+
10+
def test_div_zero():
11+
a = torch.zeros(1,dtype=torch.long)
12+
b = torch.ones(1,dtype=torch.long)
13+
14+
assert not torch.isfinite(b/a)
15+
16+
17+
def test_div_zero_python():
18+
with pytest.raises(ZeroDivisionError):
19+
1/0 #
20+
21+
def test_accuracy():
22+
preds = torch.randint(0,2,size=(100,))
23+
targets = preds.clone()
24+
25+
assert compute_accuracy(preds, targets) == 1.0
26+
27+
preds = torch.tensor([1,2,3,0,0,0])
28+
targets = torch.tensor([1,2,3,4,5,6])
29+
30+
assert compute_accuracy(preds, targets) == 0.5 # This is bad - why?
31+
32+
@pytest.mark.parametrize("preds,targets,result",[
33+
(torch.tensor([1,2,3]),torch.tensor([1,2,3]), 1.0),
34+
(torch.tensor([1,2,3]),torch.tensor([0,0,0]), 0.0),
35+
(torch.tensor([1,2,3]),torch.tensor([1,2,0]), 2/3),
36+
])
37+
def test_accuracy_parametrized(preds, targets, result):
38+
assert torch.allclose(compute_accuracy(preds, targets), torch.tensor([result]), rtol=0, atol=1e-5)
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import torch
2+
import torch.nn as nn
3+
import torchvision.transforms as transforms
4+
import wandb
5+
from torchvision.datasets import CIFAR10
6+
from torchvision.models import resnet18
7+
from tqdm import tqdm, trange
8+
9+
from hparams import config
10+
11+
def compute_accuracy(preds, targets):
12+
result = (targets == preds).float().sum()
13+
return result
14+
15+
16+
def main():
17+
wandb.init(config=config, project="effdl_example", name="baseline")
18+
19+
transform = transforms.Compose([
20+
transforms.ToTensor(),
21+
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
22+
transforms.Resize((224, 224)),
23+
])
24+
25+
train_dataset = CIFAR10(root='CIFAR10/train',
26+
train=True,
27+
transform=transform,
28+
download=False,
29+
)
30+
31+
test_dataset = CIFAR10(root='CIFAR10/test',
32+
train=False,
33+
transform=transform,
34+
download=False,
35+
)
36+
37+
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
38+
batch_size=config["batch_size"],
39+
shuffle=True)
40+
41+
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
42+
batch_size=config["batch_size"])
43+
44+
device = torch.device("cuda")
45+
46+
model = resnet18(pretrained=False, num_classes=10, zero_init_residual=config["zero_init_residual"])
47+
model.to(device)
48+
wandb.watch(model)
49+
50+
criterion = nn.CrossEntropyLoss()
51+
optimizer = torch.optim.AdamW(model.parameters(), lr=config["learning_rate"], weight_decay=config["weight_decay"])
52+
53+
for epoch in trange(config["epochs"]):
54+
for i, (images, labels) in enumerate(tqdm(train_loader)):
55+
images = images.to(device)
56+
labels = labels.to(device)
57+
58+
outputs = model(images)
59+
loss = criterion(outputs, labels)
60+
61+
loss.backward()
62+
optimizer.step()
63+
optimizer.zero_grad()
64+
65+
if i % 100 == 0:
66+
all_preds = []
67+
all_labels = []
68+
69+
for test_images, test_labels in test_loader:
70+
test_images = test_images.to(device)
71+
test_labels = test_labels.to(device)
72+
73+
with torch.inference_mode():
74+
outputs = model(test_images)
75+
preds = torch.argmax(outputs, 1)
76+
77+
all_preds.append(preds)
78+
all_labels.append(test_labels)
79+
80+
accuracy = compute_accuracy(torch.cat(all_preds), torch.cat(all_labels))
81+
82+
metrics = {'test_acc': accuracy, 'train_loss': loss}
83+
wandb.log(metrics, step=epoch * len(train_dataset) + (i + 1) * config["batch_size"])
84+
torch.save(model.state_dict(), "model.pt")
85+
86+
with open("run_id.txt", "w+") as f:
87+
print(wandb.run.id, file=f)
88+
89+
90+
if __name__ == '__main__':
91+
main()

0 commit comments

Comments
 (0)