mryab
diff --git a/‎README.md
+3-1 b/‎README.md
+3-1
diff --git a/‎week02_management_and_testing/README.md
+17 b/‎week02_management_and_testing/README.md
+17
diff --git a/‎week02_management_and_testing/example_project/compute_metrics.py
+54 b/‎week02_management_and_testing/example_project/compute_metrics.py
+54
diff --git a/‎week02_management_and_testing/example_project/dvc.yaml
+24 b/‎week02_management_and_testing/example_project/dvc.yaml
+24
diff --git a/‎week02_management_and_testing/example_project/hparams.py
+7 b/‎week02_management_and_testing/example_project/hparams.py
+7
diff --git a/‎week02_management_and_testing/example_project/prepare_data.py
+5 b/‎week02_management_and_testing/example_project/prepare_data.py
+5
diff --git a/‎week02_management_and_testing/example_project/test_basic.py
+38 b/‎week02_management_and_testing/example_project/test_basic.py
+38
diff --git a/‎week02_management_and_testing/example_project/train.py
+91 b/‎week02_management_and_testing/example_project/train.py
+91
diff --git a/‎week02_management_and_testing/homework/README.md
+92 b/‎week02_management_and_testing/homework/README.md
+92
diff --git a/‎week02_management_and_testing/homework/main.py
+40 b/‎week02_management_and_testing/homework/main.py
+40
diff --git a/‎week02_management_and_testing/homework/modeling/__init__.py b/‎week02_management_and_testing/homework/modeling/__init__.py
@@ -7,7 +7,9 @@ __This branch corresponds to the ongoing 2024 course. If you want to see full ma
 - [__Week 1:__](./week01_intro) __Introduction__
   - Lecture: Course overview and organizational details. Core concepts of the GPU architecture and CUDA API.
   - Seminar: CUDA operations in PyTorch. Introduction to benchmarking.
-- __Week 2:__ __Experiment tracking, model and data versioning, testing DL code in Python__
+- [__Week 2:__](./week02_management_and_testing) __Experiment tracking, model and data versioning, testing DL code in Python__
+  - Lecture: Experiment management basics and pipeline versioning. Configuring Python applications. Intro to regular and property-based testing.
+  - Seminar: Example DVC+Weights & Biases project walkthrough. Intro to testing with pytest.
 - __Week 3:__ __Training optimizations, profiling DL code__
 - __Week 4:__ __Basics of distributed ML__
 - __Week 5:__ __Data-parallel training and All-Reduce__
 
@@ -0,0 +1,17 @@
+# Week 2: Experiment tracking and testing
+
+* Lecture: [slides](./lecture.pdf)
+* Seminar: see the [example_project](./example_project) directory
+* Homework: see [homework/README.md](homework/README.md)
+
+## Further reading
+* Tools for experiment tracking: [Aim](https://github.com/aimhubio/aim), [Comet](https://www.comet.ml/site/), [Neptune](https://neptune.ai/), [Sacred](https://github.com/IDSIA/sacred), [Weights and Biases](https://wandb.ai/), [ClearML](https://clear.ml/)
+* [DVC](https://dvc.org/) and [Pachyderm](https://www.pachyderm.com/) for artifact versioning
+* [Hydra documentation](https://hydra.cc/docs/intro/)
+* [Unittest](https://docs.python.org/3/library/unittest.html) built-in module
+* [Doctest](https://docs.python.org/3/library/doctest.html) built-in module (useful for testing docstrings!)
+* [Pytest](https://github.com/pytest-dev/pytest/) repository
+* Pytest plugins: [pytest-xdist](https://pypi.org/project/pytest-xdist/) for parallel execution, [pytest-cov](https://pytest-cov.readthedocs.io/en/latest/readme.html) for coverage reports.
+* [Hypothesis quick start guide](https://hypothesis.readthedocs.io/en/latest/quickstart.html) and [integration with pytest](https://hypothesis.readthedocs.io/en/latest/details.html#the-hypothesis-pytest-plugin)
+* [Full Stack Deep Learning "Troubleshooting & Testing" lecture](https://fullstackdeeplearning.com/course/2022/lecture-3-troubleshooting-and-testing/#4-resources)
+* [Made With ML MLOps Course, "Testing Machine Learning Systems: Code, Data and Models"](https://madewithml.com/courses/mlops/testing/)
@@ -0,0 +1,54 @@
+import json
+from argparse import ArgumentParser
+
+import torch
+import torchvision.transforms as transforms
+from torchvision.datasets import CIFAR10
+from torchvision.models import resnet18
+
+from hparams import config
+
+
+def main(args):
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
+    ])
+
+    test_dataset = CIFAR10(root='CIFAR10/test',
+                           train=False,
+                           transform=transform,
+                           download=False,
+                           )
+
+    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
+                                              batch_size=config["batch_size"])
+
+    device = torch.device("cuda")
+
+    model = resnet18(pretrained=False, num_classes=10)
+    model.load_state_dict(torch.load("model.pt"))
+    model.to(device)
+
+    correct = 0.0
+
+    for test_images, test_labels in test_loader:
+        test_images = test_images.to(device)
+        test_labels = test_labels.to(device)
+
+        with torch.inference_mode():
+            outputs = model(test_images)
+            preds = torch.argmax(outputs, 1)
+            correct += (preds == test_labels).sum()
+
+    accuracy = correct / len(test_dataset)
+
+    with open("final_metrics.json", "w+") as f:
+        json.dump({"accuracy": accuracy.item()}, f)
+        print("\n", file=f)
+
+
+if __name__ == '__main__':
+    parser = ArgumentParser()
+    args = parser.parse_args()
+    main(args)
@@ -0,0 +1,24 @@
+stages:
+  prepare_data:
+    cmd: python prepare_data.py
+    deps:
+    - prepare_data.py
+    outs:
+    - CIFAR10
+  train:
+    cmd: python train.py
+    deps:
+    - CIFAR10
+    - hparams.py
+    - train.py
+    outs:
+    - model.pt
+  compute_metrics:
+    cmd: python compute_metrics.py
+    deps:
+    - CIFAR10
+    - compute_metrics.py
+    - model.pt
+    metrics:
+    - final_metrics.json:
+        cache: false
@@ -0,0 +1,7 @@
+config = dict(
+    batch_size=64,
+    learning_rate=1e-5,
+    weight_decay=0.01,
+    epochs=2,
+    zero_init_residual=False,
+)
@@ -0,0 +1,5 @@
+from torchvision.datasets import CIFAR10
+
+if __name__ == "__main__":
+    train_dataset = CIFAR10("CIFAR10/train", download=True)
+    test_dataset = CIFAR10("CIFAR10/test", download=True)
@@ -0,0 +1,38 @@
+import torch
+import pytest
+
+from train import compute_accuracy
+
+def test_arange_elems():
+    arr = torch.arange(0, 10, dtype=torch.float)
+    assert torch.allclose(arr[-1], torch.tensor([9]).float())
+
+def test_div_zero():
+    a = torch.zeros(1,dtype=torch.long)
+    b = torch.ones(1,dtype=torch.long)
+
+    assert not torch.isfinite(b/a)
+
+
+def test_div_zero_python():
+    with pytest.raises(ZeroDivisionError):
+        1/0
+
+def test_accuracy():
+    preds = torch.randint(0,2,size=(100,))
+    targets = preds.clone()
+
+    assert compute_accuracy(preds, targets) == 1.0
+
+    preds = torch.tensor([1,2,3,0,0,0])
+    targets = torch.tensor([1,2,3,4,5,6])
+
+    assert compute_accuracy(preds, targets) == 0.5
+
+@pytest.mark.parametrize("preds,targets,result",[
+    (torch.tensor([1,2,3]),torch.tensor([1,2,3]), 1.0),
+    (torch.tensor([1,2,3]),torch.tensor([0,0,0]), 0.0),
+    (torch.tensor([1,2,3]),torch.tensor([1,2,0]), 2/3),
+    ])
+def test_accuracy_parametrized(preds, targets, result):
+    assert torch.allclose(compute_accuracy(preds, targets), torch.tensor([result]), rtol=0, atol=1e-5)
@@ -0,0 +1,91 @@
+import torch
+import torch.nn as nn
+import torchvision.transforms as transforms
+import wandb
+from torchvision.datasets import CIFAR10
+from torchvision.models import resnet18
+from tqdm import tqdm, trange
+
+from hparams import config
+
+wandb.init(config=config, project="effdl_example", name="baseline")
+
+def compute_accuracy(preds, targets):
+    result = (targets == preds).float().sum()
+    return result
+
+
+def main():
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)),
+        transforms.Resize((224, 224)),
+    ])
+
+    train_dataset = CIFAR10(root='CIFAR10/train',
+                            train=True,
+                            transform=transform,
+                            download=False,
+                            )
+
+    test_dataset = CIFAR10(root='CIFAR10/test',
+                           train=False,
+                           transform=transform,
+                           download=False,
+                           )
+
+    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
+                                               batch_size=config["batch_size"],
+                                               shuffle=True)
+
+    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
+                                              batch_size=config["batch_size"])
+
+    device = torch.device("cuda")
+
+    model = resnet18(pretrained=False, num_classes=10, zero_init_residual=config["zero_init_residual"])
+    model.to(device)
+    wandb.watch(model)
+
+    criterion = nn.CrossEntropyLoss()
+    optimizer = torch.optim.AdamW(model.parameters(), lr=config["learning_rate"], weight_decay=config["weight_decay"])
+
+    for epoch in trange(config["epochs"]):
+        for i, (images, labels) in enumerate(tqdm(train_loader)):
+            images = images.to(device)
+            labels = labels.to(device)
+
+            outputs = model(images)
+            loss = criterion(outputs, labels)
+
+            loss.backward()
+            optimizer.step()
+            optimizer.zero_grad()
+
+            if i % 100 == 0:
+                all_preds = []
+                all_labels = []
+
+                for test_images, test_labels in test_loader:
+                    test_images = test_images.to(device)
+                    test_labels = test_labels.to(device)
+
+                    with torch.inference_mode():
+                        outputs = model(test_images)
+                        preds = torch.argmax(outputs, 1)
+
+                        all_preds.append(preds)
+                        all_labels.append(test_labels)
+
+                accuracy = compute_accuracy(torch.cat(all_preds), torch.cat(all_labels))
+
+                metrics = {'test_acc': accuracy, 'train_loss': loss}
+                wandb.log(metrics, step=epoch * len(train_dataset) + (i + 1) * config["batch_size"])
+    torch.save(model.state_dict(), "model.pt")
+
+    with open("run_id.txt", "w+") as f:
+        print(wandb.run.id, file=f)
+
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,92 @@
+# Week 2 home assignment
+
+This assignment consists of 4 parts: you can earn the full amount of points by completing the first two and either of 
+tasks 3 and 4 (or both of them for bonus points).
+However, completing tasks 3 or 4 without the first two will not give you any points.
+
+# Problem statement
+You are given a small codebase that should train an **unconditional** [Denoising Diffusion Probabilistic Model](https://arxiv.org/abs/2006.11239)
+on the CIFAR-10 dataset.
+However, this project contains several bugs of different severity, and even some of the tests are written incorrectly.
+A correct implementation will achieve *somewhat* decent results after training for 100 epochs (~2 hours on an average GPU),
+but you should not expect much in terms of quality.
+In this homework, we are going to have a deeper look at the training pipeline, try to fix any errors we find and make 
+the code more reliable and reproducible.
+
+# Task 1 (6.5 points)
+Implement *correct* tests for the training pipeline.
+Specifically, have a look at the current [tests](./tests) folder: it contains several files with tests, 
+some of which fail, fail sometimes or are plainly incorrect.
+Your task is to identify the bugs and make the test suite pass deterministically: this will involve changes 
+both to `modeling` and to `tests`, as some parts of the testing code need to be modified as well.
+
+In your report, please tell us how you found the bugs in all parts of the code.
+You can find the original implementation of DDPM that we use in this assignment, but giving it as an explanation for 
+your fixes will give you no points.
+Obviously, "solving" the assignment by removing all tests or having unreasonably high thresholds will not earn
+you a good grade as well.
+
+After that, implement the `test_training` function in `test_pipeline.py` that runs an integration test for the
+entire training procedure with different hyperparameters and expects different outcomes.
+This test should increase the coverage of the `modeling.training` file (measured by [pytest-cov](https://github.com/pytest-dev/pytest-cov)) to **>80%**.
+
+Importantly, you should ensure that your test code running the actual model can run both on CPU and GPU.
+Since training on CPU even for 1 epoch might take too long, you need to implement training on a subset of data.
+
+
+# Task 2 (1.5 points)
+Implement logging of the metrics and artifacts during training with [Weights and Biases](https://wandb.ai/site).
+You should log the following values:
+* Training loss and the learning rate
+* All training hyperparameters (including batch size, number of epochs etc., as well as all model and diffusion hyperparameters)
+* Inputs to the model (1 batch is enough) and samples from it after each epoch
+
+However, you should **NOT** log the training code for the model.
+
+Logging the hyperparameters and metrics will likely involve some refactoring of the original codebase.
+You can either place the necessary hyperparameters in a config file or simply have them as constants/argparse defaults 
+defined somewhere reasonable in the training code.
+
+After finishing this task, train the model for at least 100 epochs with default hyperparameters and attach the link to
+your W&B project containing this run to the final report.
+
+# Task 3 (2 points)
+Improve the configuration process of this pipeline using the [Hydra](https://hydra.cc/) library.
+You should create a config that allows adjusting at least the following attributes:
+* Peak learning rate and optimizer momentum
+* Optimizer (Adam by default, at least SGD should be supported)
+* Training batch size and the number of epochs
+* Number of workers in the dataloader
+* Existence of random flip augmentations
+
+Demonstrate that your integration works by running at least three *complete* runs (less than 100 epochs is OK) 
+with hyperparameters changed via the config file.
+From these runs, it should be evident that changing hyperparameters affects the training procedure.
+Here, you should log the config using [run.log_artifact](https://docs.wandb.ai/ref/python/run#log_artifact)
+and show that this changes the hyperparameters of the run in W&B.
+
+# Task 4 (2 points)
+Make the pipeline reproducible using [Data Version Control](https://dvc.org/). 
+You should end up with a `dvc.yaml` that represents two stages of your experiment with corresponding inputs and outputs: 
+getting the data (yes, you need to refactor that part of the code) and training the model itself.
+Also, you should specify the relevant code and configuration as dependencies of the corresponding pipeline stages.
+Lastly, after running your code, you should have a `dvc.lock` that stores hashes of all artifacts in your pipeline.
+Submit both `dvc.yaml` and `dvc.lock` as parts of your solution.
+
+Importantly, modifying any of the relevant modules or hyperparameters should trigger an invalidation of the
+corresponding pipeline stages: that is, `dvc repro` should do nothing if and only if `dvc.lock` is consistent with
+hashes of all dependencies in the pipeline.
+
+If you have also done the Hydra configuration assignment, make sure to check out [this guide](https://dvc.org/doc/user-guide/experiment-management/hydra-composition)
+on integrating Hydra with DVC experiment management.
+
+# Submission format
+When submitting this assignment, you should attach a .zip archive that contains:
+- The source code with all your fixes and improvements
+- A Markdown/PDF report in the root of the project folder that:
+  1. Details the changes you made to the original code (we will run `diff` and see if everything is explained)
+  2. Tells how to run the modified code (i.e., which command line arguments you have added and how to use them)
+  3. Describes your process of fixing and adding new tests for Task 1 and reports the test coverage
+  4. Gives a link to the Weights and Biases project with all necessary logs for tasks 2 and 3
+- If you solved Tasks 3 or 4, please ensure that the archived project contains the corresponding configuration/lock files as well.
+- An updated `requirements.txt` file, if your solution requires new dependencies such as `wandb`, `hydra-core` or `dvc`.
@@ -0,0 +1,40 @@
+import torch
+from torch.utils.data import DataLoader
+from torchvision import transforms
+from torchvision.datasets import CIFAR10
+
+from modeling.diffusion import DiffusionModel
+from modeling.training import generate_samples, train_epoch
+from modeling.unet import UnetModel
+
+
+def main(device: str, num_epochs: int = 100):
+    ddpm = DiffusionModel(
+        eps_model=UnetModel(3, 3, hidden_size=128),
+        betas=(1e-4, 0.02),
+        num_timesteps=1000,
+    )
+    ddpm.to(device)
+
+    train_transforms = transforms.Compose(
+        [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
+    )
+
+    dataset = CIFAR10(
+        "cifar10",
+        train=True,
+        download=True,
+        transform=train_transforms,
+    )
+
+    dataloader = DataLoader(dataset, batch_size=128, num_workers=4, shuffle=True)
+    optim = torch.optim.Adam(ddpm.parameters(), lr=1e-5)
+
+    for i in range(num_epochs):
+        train_epoch(ddpm, dataloader, optim, device)
+        generate_samples(ddpm, device, f"samples/{i:02d}.png")
+
+
+if __name__ == "__main__":
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    main(device=device)