Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
70 commits
Select commit Hold shift + click to select a range
1ee6f9c
init branch
sebffischer Sep 5, 2025
c5c3d47
.Rprofile
sebffischer Sep 12, 2025
cdc9e06
update
sebffischer Sep 12, 2025
0109749
...
sebffischer Sep 15, 2025
957182f
fix code
sebffischer Sep 15, 2025
82b610e
rerun paper code
sebffischer Sep 16, 2025
a5ab20c
rerun paper code
sebffischer Sep 17, 2025
e10f6e2
...
sebffischer Sep 18, 2025
24db489
cleanup
sebffischer Sep 17, 2025
9bd6d80
some progress
sebffischer Sep 18, 2025
a888226
Merge branch 'main' into jss-submission
sebffischer Sep 18, 2025
226e308
update dockerfile
sebffischer Sep 18, 2025
c4bdcf5
update cuda version
sebffischer Sep 18, 2025
3c21fdc
Merge branch 'main' into jss-submission
sebffischer Sep 18, 2025
30ebeb7
update dockerfiles
sebffischer Sep 18, 2025
1fd3499
proper run
sebffischer Sep 18, 2025
c9b330a
add cheap code
sebffischer Sep 19, 2025
30eb50d
update
sebffischer Sep 19, 2025
f28b804
update dockerfile
sebffischer Sep 19, 2025
762d1bc
cheap run
sebffischer Sep 19, 2025
4686b4c
update
sebffischer Sep 19, 2025
c5875fa
some fixes
sebffischer Sep 19, 2025
3a5fb4e
plotting code
sebffischer Sep 19, 2025
6996d25
update plot
sebffischer Sep 19, 2025
71d6a99
rerun code
sebffischer Sep 19, 2025
23c595f
Update
sebffischer Sep 22, 2025
f41807e
cleanup
sebffischer Sep 22, 2025
9988e4d
Merge commit 'f49909a' into jss-submission
sebffischer Sep 22, 2025
1eaa277
more cleanup
sebffischer Sep 22, 2025
7e9e4ac
remove script to check for non-ascii
sebffischer Sep 22, 2025
82d236f
update README
sebffischer Sep 22, 2025
c9122c5
make cpu cheap version more realistic
sebffischer Sep 23, 2025
1d05c51
...
sebffischer Sep 23, 2025
70cfea5
fix cheap script
sebffischer Sep 23, 2025
8ac1ee8
readme
sebffischer Sep 23, 2025
0be8f8a
run cheap cpu
sebffischer Sep 23, 2025
5a59e7c
update readme
sebffischer Sep 23, 2025
cfb56fc
address unavailable data
sebffischer Sep 23, 2025
b080bfa
Merge branch 'main' into jss-submission
sebffischer Oct 24, 2025
5d17a2a
torch new version
sebffischer Oct 24, 2025
67e2628
update dockerfile, desc
sebffischer Nov 3, 2025
38e7363
...
sebffischer Nov 6, 2025
09fee04
Update
sebffischer Nov 6, 2025
a0a23c4
...
sebffischer Nov 6, 2025
8724cf4
markdown in docker
sebffischer Nov 6, 2025
7275241
update
sebffischer Nov 6, 2025
7c60376
...
sebffischer Nov 6, 2025
6943bb1
update [skip ci]
sebffischer Nov 6, 2025
8ab885b
[skip ci]
sebffischer Nov 6, 2025
79193f0
[skip ci]
sebffischer Nov 6, 2025
1c3997b
...
sebffischer Nov 6, 2025
6c0a7f7
update [skip ci]
sebffischer Nov 6, 2025
dc47cf8
update [skip ci]
sebffischer Nov 6, 2025
4b85df7
another run
sebffischer Nov 6, 2025
9a14a7c
[ski ci]
sebffischer Nov 6, 2025
ce7d45f
update [skip ci]
sebffischer Nov 7, 2025
cdef414
cpu benchmark results
sebffischer Nov 7, 2025
cf41067
[skip ci]
sebffischer Nov 7, 2025
3e8e70a
[skip ci]
sebffischer Nov 7, 2025
eac0e2a
[skip ci]
sebffischer Nov 7, 2025
6aaedda
[skip ci]
sebffischer Nov 7, 2025
b25166b
paper results
sebffischer Nov 7, 2025
306388b
linux cpu results
sebffischer Nov 10, 2025
7d1d516
update
sebffischer Nov 10, 2025
1333e64
fix benchmark script
sebffischer Nov 10, 2025
c43fa30
gpu results
sebffischer Nov 11, 2025
959cc5d
...
sebffischer Nov 10, 2025
dd694a7
update
sebffischer Nov 11, 2025
34bcbad
gpu results optimizer
sebffischer Nov 11, 2025
9f60910
results gpu
sebffischer Nov 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,24 @@ mlr3torch*.tgz
*~
docs
inst/doc
*.html
**/.DS_Store
/doc/
/Meta/
CRAN-SUBMISSION
paper/data
.idea/
.vsc/
paper/data
paper/data/
paper/benchmark/registry
.vscode/
paper/benchmark/registry-linux-cpu/
paper/benchmark/registry-macos/
paper/benchmark/registry-linux-gpu/
paper/benchmark/registry-linux-gpu/**
paper/benchmark/registry-linux-gpu-optimizer/
paper/benchmark/registry-linux-gpu-old/
paper/paper.aux
paper/paper.fdb_latexmk
paper/paper.fls
paper/paper.log
paper/envs/renv/data/
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: mlr3torch
Title: Deep Learning with 'mlr3'
Version: 0.3.1.9000
Version: 0.3.2
Authors@R:
c(person(given = "Sebastian",
family = "Fischer",
Expand Down Expand Up @@ -42,7 +42,7 @@ URL: https://mlr3torch.mlr-org.com/, https://github.com/mlr-org/mlr3torch/
Depends:
mlr3 (>= 1.0.1),
mlr3pipelines (>= 0.6.0),
torch (>= 0.15.0),
torch (>= 0.16.2),
R (>= 3.5.0)
Imports:
backports,
Expand Down
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# mlr3torch (development version)
# mlr3torch 0.3.2

## Bug Fixes

Expand Down
18 changes: 18 additions & 0 deletions paper/.Rprofile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Setting HTTP User Agent to identify OS, such that P3M can detect compatibility
options(HTTPUserAgent = sprintf("R/%s R (%s)", getRversion(), paste(getRversion(), R.version["platform"], R.version["arch"], R.version["os"])))

# Ensure curl is used for downloading packages
options(download.file.method = "curl")

# Enable verbose output for curl and again set HHTP user agent
options(download.file.extra = paste(
# Follow redirects, show errors, and display the HTTP status and URL
'-fsSL -w "%{stderr}curl: HTTP %{http_code} %{url_effective}\n"',
# Configure the R user agent header to install Linux binary packages
sprintf('--header "User-Agent: R (%s)"', paste(getRversion(), R.version["platform"], R.version["arch"], R.version["os"]))
))

# for ubuntu:
options(
repos = c(CRAN = "https://packagemanager.posit.co/cran/__linux__/jammy/latest")
)
197 changes: 197 additions & 0 deletions paper/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
# Reproducing the Results

## Computational Environment

In order to reproduce the results, you can either use the provided docker images or recreate the `renv` environment that is located in `./paper/envs/renv`.

You can recreate the `renv` environment by going into the `./paper/envs/renv` directory and running:

```r
renv::init()
```

We are providing two docker images, one for CPU and one for CUDA GPU, that have the same packages from the `renv.lock` file installed.
The images can be downloaded from Zenodo: https://doi.org/10.5281/zenodo.17130368.
You can, for example, use the [zenodo-client](https://pypi.org/project/zenodo-client/) library to download the images:

```bash
# pip install zenodo-client
export ZENODO_API_TOKEN=<your-token>
# for CPU:
zenodo-client download 17130368 IMAGE_CPU.tar.gz
```

By default, the downloaded files are stored in `~/.data/zenodo`.

At the time of writing, the images are also hosted on dockerhub, but this is not a permanent storage:
https://hub.docker.com/repository/docker/sebffischer/mlr3torch-jss/general

The `Dockerfile`s used to create the images are available in the `paper/envs` directory.

After downloading the images, you can load them into Docker, e.g. via:

```bash
docker load -i IMAGE_CPU.tar.gz
```

When using another container manager such as `enroot`, a workaround is to import the image using `Docker` on a system that has it installed and then push it to a dockerhub repository and then pull it from there using `enroot`, along the lines of:

```bash
enroot import docker://sebffischer/mlr3torch-jss:cpu
enroot create --name mlr3torch-jss:cpu sebffischer+mlr3torch-jss+cpu.sqsh
```

To start the container using `Docker`, run:

```bash
docker run -it --rm -v <parent-dir-to-mlr3torch>:/mnt/data/mlr3torch sebffischer/mlr3torch-jss:cpu
# go into the mlr3torch directory
cd /mnt/data/mlr3torch
```

To start the container using `enroot`, run:

```bash
enroot start \
--mount <parent-dir-to-mlr3torch>:/mnt/data \
mlr3torch-jss:cpu bash
# go into the mlr3torch directory
cd /mnt/data/mlr3torch
```

## Running the Benchmark

Note that while the benchmark uses `batchtools` for experiment definition, we don't use it for job submission in order to ensure that all GPU and CPU benchmarks respectively are run on the same machine.

For running the benchmarks, we strongly recommend using the docker images, because we need both PyTorch and (R-)torch, which can be somewhat tricky to setup, especially when using CUDA.

If you want to run it without the docker image, you need to ajust the `PYTHON_PATH` variable in the benchmarking scripts to the path to your Python installation, ensure that `pytorch` is installed and the `"pytorch"` algorithm in `./paper/benchmark/benchmark.R` initializes the correct python environment.
But again, we strongly recommend using the provided docker images for the benchmarks.

You can still reproduce the results that compare (R) `torch` with `mlr3torch` without the python environment.
To do so, you can subset the experiments that are run to not include the `"pytorch"` algorithm.
This has to be done in the benchmarking scripts, e.g. `./paper/benchmark/linux-gpu.R`.
We show further down how to run only a subset of the jobs.

### Running the Benchmarks

Note that it's important to have enough RAM, otherwise the benchmarks will be non-comparable.
However, there are many other factors, such as the exact hardware that make it generally difficult to reproduce the exact same results.

To run the benchmarks locally, go into `./paper`:

To run the GPU benchmarks (using the CUDA docker image) on linux, run:

```bash
Rscript benchmark/linux-gpu.R
```

To run the CPU benchmarks (using the CPU docker image) on linux, run:

```bash
Rscript benchmark/linux-cpu.R
```

To run the benchmark that compares "ignite" with standard optimizers (using the CUDA docker image) on linux, run:

```bash
Rscript benchmark/linux-gpu-optimizer.R
```

The results are stored in:

* `paper/benchmark/result-linux-gpu.rds`
* `paper/benchmark/result-linux-cpu.rds`
* `paper/benchmark/result-linux-gpu-optimizer.rds`

There are also some exemplary slurm scripts that need to be adapted to the specific cluster and job submission system.

* `paper/benchmark/benchmark_gpu.sh`
* `paper/benchmark/benchmark_gpu_optimizer.sh`

### Running a subset of the Jobs

To run a subset of the jobs, modify the table `tbl` in scripts such as `./paper/benchmark/linux-gpu.R` to only include the jobs that you want to run.
For example:

```r
ids = tbl[device == "cpu" & n_layers == 10 & latent == 250 & jit & optimizer == "adamw" & repl == 1, ]$job.id
for (id in sample(ids)) {
submitJobs(id)
Sys.sleep(0.1)
}
```

### Generating the Benchmark Plots

For the main benchmark shown in the paper, run:

```r
Rscript paper/benchmark/plot_benchmark.R
```

For the comparison of "ignite" with standard optimizers, run:

```r
Rscript paper/benchmark/plot_optimizer.R
```

These commands generate the files:

* `paper/benchmark/plot_benchmark.png`
* `paper/benchmark/plot_benchmark_relative.png`
* `paper/benchmark/plot_optimizer.png`

## Recreating the Paper Code

The file `./paper/paper_code.R` contains the code from the paper.

You can reproduce it by running:

```r
knitr::spin("paper_code.R")
```

We provide the results of running this in `./paper/paper_results`.

The results in the paper are those from the CPU docker image and they were fully reproducible when we re-ran them on the same machine.
There were some minor differences in results when re-running the code on a different machine (macOS with M1 CPU vs Linux with Intel CPU).

The file `paper_code.R` contains some very minor differences to the paper we omitted in the paper for brevity:
It was extracted from the tex manuscript fully programmatically but adjusted with the following modifications:

* Time measurements (`Sys.time()`)
* Deactivate knitr caching
* Activating caching for `mlr3torch`
* Changing the `mlr3` logging level to `warn` for cleaner output
* Saving the ROC plot for postprocessing
* Adding a `sessionInfo()` call at the end

The results are stored in `./paper/paper_results/`
The ROC plot is postprocessed using the `roc.R` script, which results in the file `paper/paper_results/roc.png`.

### Possible Data Unavailability

The code shown in the paper downloads various datasets from standard resources.
In the unlikely, but possible event that these datasets are not available anymore, we include:

1. the cache directory for `torch` (MNIST, ResNet-18) and `mlr3torch` (postprocessed MNIST, Melanoma)
2. the dogs-vs-cats dataset

in the Zenodo data.

If one of the downloads (1) fails, download the `cache.tar.gz` file from zenodo, untar it and put it in the location where the cache is (put it as `/root/.cache/` when using the docker images).

If (2) fails, download `dogs-vs-cats.tar.gz` from Zenodo, untar it and put it into the directory where you are running the `paper_code.R`.

### Other errors

When reproducing the results with `knitr` in the docker container, we sometimes encountered issues with the weight downloads for the ResNet-18 model.
This was not an issue when reproducing without `knitr`.
If you also encounter this, delete the problematic model file (you can determine the torch cache directory via `rappdirs::user_cache_dir("torch")`) and download it by running.

```r
torchvision::model_resnet18(pretrained = TRUE)
```

Then, re-run the paper code.
Binary file added paper/Rplots.pdf
Binary file not shown.
1 change: 1 addition & 0 deletions paper/batchtools.conf.R
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
cluster.functions = batchtools::makeClusterFunctionsInteractive()
95 changes: 95 additions & 0 deletions paper/benchmark/benchmark.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
library(batchtools)
library(mlr3misc)

setup = function(reg_path, python_path, work_dir) {
reg = makeExperimentRegistry(
file.dir = reg_path,
work.dir = work_dir,
packages = "checkmate"
)
reg$cluster.functions = makeClusterFunctionsInteractive()

source(here::here("paper/benchmark/time_rtorch.R"))

batchExport(list(
time_rtorch = time_rtorch # nolint
))

addProblem(
"runtime_train",
data = NULL,
fun = function(
epochs,
batch_size,
n_layers,
latent,
n,
p,
optimizer,
device,
...
) {
problem = list(
epochs = assert_int(epochs),
batch_size = assert_int(batch_size),
n_layers = assert_int(n_layers),
latent = assert_int(latent),
n = assert_int(n),
p = assert_int(p),
optimizer = assert_choice(
optimizer,
c("ignite_adamw", "adamw", "sgd", "ignite_sgd")
),
device = assert_choice(device, c("cuda", "cpu", "mps"))
)

problem
}
)

addAlgorithm("pytorch", fun = function(instance, job, data, jit, ...) {
f = function(..., python_path) {
library(reticulate)
x = try(
{
#reticulate::use_python("/opt/homebrew/Caskroom/mambaforge/base/bin/python3", required = TRUE)
reticulate::use_python(python_path, required = TRUE)
reticulate::source_python(here::here("paper/benchmark/time_pytorch.py"))
print(reticulate::py_config())
time_pytorch(...) # nolint
},
silent = TRUE
)
print(x)
}
args = c(instance, list(seed = job$seed, jit = jit, python_path = python_path))
#do.call(f, args)
callr::r(f, args = args)
})

addAlgorithm("rtorch", fun = function(instance, job, opt_type, jit, ...) {
assert_choice(opt_type, c("standard", "ignite"))
if (opt_type == "ignite") {
instance$optimizer = paste0("ignite_", instance$optimizer)
}
#do.call(time_rtorch, args = c(instance, list(seed = job$seed, jit = jit))) # nolint
callr::r(time_rtorch, args = c(instance, list(seed = job$seed, jit = jit))) # nolint
})

addAlgorithm("mlr3torch", fun = function(instance, job, opt_type, jit, ...) {
if (opt_type == "ignite") {
instance$optimizer = paste0("ignite_", instance$optimizer)
}
callr::r(
time_rtorch, # nolint
args = c(instance, list(seed = job$seed, mlr3torch = TRUE, jit = jit))
)
#do.call(time_rtorch, args = c(instance, list(seed = job$seed, mlr3torch = TRUE, jit = jit)))
})
}

# global config:
REPLS = 10L
EPOCHS = 20L
N = 2000L
P = 1000L
21 changes: 21 additions & 0 deletions paper/benchmark/benchmark_gpu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash
#SBATCH --job-name=mlr3torch-benchmark
#SBATCH --partition=mcml-hgx-a100-80x4
#SBATCH --gres=gpu:4
#SBATCH --qos=mcml
#SBATCH --ntasks=1
#SBATCH --time=48:00:00
#SBATCH --exclusive
#SBATCH --output=mlr3torch-benchmark-%j.out

cd /dss/dssmcmlfs01/pr74ze/pr74ze-dss-0001/ru48nas2/
enroot create --force --name mlr3torch-jss sebffischer+mlr3torch-jss+gpu.sqsh

enroot start \
--mount /dss/dssmcmlfs01/pr74ze/pr74ze-dss-0001/ru48nas2/:/mnt/data \
mlr3torch-jss bash -c "
cd /mnt/data/mlr3torch/paper
Rscript -e \"
source('benchmark/linux-gpu.R')
\"
"
Loading
Loading